Skip to content

Commit

Permalink
wip: import aes_sources.ini into EndangermentSource
Browse files Browse the repository at this point in the history
TODO: adds bibfile and bibkey to languoidsl.jsonl, needs checksum update
TODO: config reading needs to be extended to raw or drop equivalence
TODO: don't write bibfile and bibkey into files but keep direct reference
  • Loading branch information
xflr6 committed Mar 17, 2024
1 parent d1bdcb7 commit c61e318
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 4 deletions.
3 changes: 2 additions & 1 deletion tests/reads_repo/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ def test_languoid_tree(treedb, child_id, parent_id, kwargs, expected):
r" source_id=\d+ date=datetime\.datetime\([^)]+\)>",
id='model=Endangerment'),
pytest.param(_models.EndangermentSource, _models.EndangermentSource.name == 'E22',
r"<EndangermentSource id=\d+ name='E22' bibitem_id=None pages=None>",
r"<EndangermentSource id=\d+ name='E22' full_name='Ethnologue 22'"
r" bibitem_id=\d+ pages=None url='https://www.ethnologue.com/'>",
id='model=EndangermentSource(name=E22)'),
pytest.param(_models.EthnologueComment, None,
r"<EthnologueComment languoid_id='\w+' isohid='[^']+' comment_type='[^']+'"
Expand Down
39 changes: 39 additions & 0 deletions treedb/import_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,30 @@ def key_to_params(key):
def params_to_key(params):
return tuple(sorted(params.items()))

def to_tree_reference(self):
inst = self.__class__(conn=self.conn)

def iteritems(items):
for key, pk in items:
params = dict(key)
new_key = (('bibfile', None),
('bibkey', None),
('name', params['name']),
('pages', None))
yield new_key, pk

inst.update(iteritems(self.items()))
return inst

insert_languoid_levels(conn)

insert_macroareas(conn)

insert_endangermentstatus(conn, bibitem_ids=bibitem_ids)

es_ids = EndangermentSourceMap(conn=conn)
insert_endangerment_sources(conn, es_ids=es_ids)
es_ids = es_ids.to_tree_reference()

insert_languoids(conn,
languoids=languoids,
Expand Down Expand Up @@ -199,6 +216,28 @@ def insert_endangermentstatus(conn, /, *, bibitem_ids,
conn.execute(sa.insert(EndangermentStatus), params)


def insert_endangerment_sources(conn, /, *, es_ids,
config_file='aes_sources.ini'):
log.info('insert endangerment sources from: %r', config_file)
endangerment_sources = Config.load(filename=config_file, bind=conn)

log.debug('insert %d endangerment sources', len(endangerment_sources))
for section, es in endangerment_sources.items():
params = {'name': section,
'full_name': es['name'].strip() or None,
'url': es.get('url', '').strip() or None}

reference_id = es.get('reference_id', '').strip()
if reference_id:
bibfile, _, bibkey = parts = es['reference_id'].partition(':')
assert all(parts)
params.update(bibfile=bibfile, bibkey=bibkey)

key = es_ids.params_to_key(params)
pk = es_ids[key]
assert pk > 0


def insert_languoids(conn, /, *, languoids, bibitem_ids, es_ids):
log.info('insert languoids')

Expand Down
13 changes: 10 additions & 3 deletions treedb/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1042,18 +1042,25 @@ class EndangermentSource:
id = Column(Integer, primary_key=True)
name = Column(Text, CheckConstraint("name != ''"), nullable=False, unique=True)

full_name = Column(Text, CheckConstraint("full_name != ''"), unique=True)

bibitem_id = Column(ForeignKey('bibitem.id'))
pages = Column(Text, CheckConstraint("pages != ''"))
pages = Column(Text, CheckConstraint("pages != ''")) # only direct references

url = Column(Text, CheckConstraint("url != ''"))

__table_args__ = (UniqueConstraint(bibitem_id, pages),
CheckConstraint('(bibitem_id IS NULL) = (pages IS NULL)'))
CheckConstraint('(full_name IS NULL) OR (pages IS NULL)'),
CheckConstraint('(bibitem_id IS NOT NULL) OR (pages IS NULL)'))

def __repr__(self):
return (f'<{self.__class__.__name__}'
f' id={self.id!r}'
f' name={self.name!r}'
f' full_name={self.full_name!r}'
f' bibitem_id={self.bibitem_id!r}'
f' pages={self.pages!r}>')
f' pages={self.pages!r}'
f' url={self.url!r}>')

bibitem = relationship('Bibitem',
back_populates='endangermentsources')
Expand Down

0 comments on commit c61e318

Please sign in to comment.