Skip to content

Commit

Permalink
Equivalence/Ortholog fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
wshayes committed Sep 7, 2018
1 parent 5338444 commit 91a2a00
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 11 deletions.
4 changes: 2 additions & 2 deletions bel/db/arangodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,12 @@ def batch_load_docs(db, doc_iterator, on_duplicate='replace'):
if counter % batch_size == 0:
log.debug(f'Bulk import arangodb: {counter}')
for cname in docs:
collections[cname].import_bulk(docs[cname], on_duplicate=on_duplicate)
collections[cname].import_bulk(docs[cname], on_duplicate=on_duplicate, halt_on_error=False)
docs[cname] = []

log.debug(f'Bulk import arangodb: {counter}')
for cname in docs:
collections[cname].import_bulk(docs[cname], on_duplicate=on_duplicate)
collections[cname].import_bulk(docs[cname], on_duplicate=on_duplicate, halt_on_error=False)
docs[cname] = []


Expand Down
38 changes: 32 additions & 6 deletions bel/resources/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,21 @@ def load_terms(fo: IO, metadata: dict, forceupdate: bool):
belns_db = arangodb.get_belns_handle(arango_client)
arangodb.batch_load_docs(belns_db, terms_iterator_for_arangodb(fo, version))

# TODO - delete old equivalences based on namespace and version
# delete resources matching namespace and NOT current version

log.info('Load namespace equivalences', elapsed=timer.elapsed, namespace=metadata['metadata']['namespace'])
log.info('Loaded namespace equivalences', elapsed=timer.elapsed, namespace=metadata['metadata']['namespace'])

# Clean up old entries
remove_old_equivalence_edges = f'''
FOR edge in equivalence_edges
FILTER edge.source == "{metadata["metadata"]["namespace"]}" AND edge.version != "{version}"
REMOVE edge IN equivalence_edges
'''
remove_old_equivalence_nodes = f'''
FOR node in equivalence_nodes
FILTER node.source == "{metadata["metadata"]["namespace"]}" AND node.version != "{version}"
REMOVE node IN equivalence_nodes
'''
arangodb.aql_query(belns_db, remove_old_equivalence_edges)
arangodb.aql_query(belns_db, remove_old_equivalence_nodes)

# Add metadata to resource metadata collection
metadata['_key'] = f"Namespace_{metadata['metadata']['namespace']}"
Expand Down Expand Up @@ -113,8 +124,23 @@ def terms_iterator_for_arangodb(fo, version):

(ns, val) = term_id.split(':', maxsplit=1)

yield (arangodb.equiv_nodes_name, {'_key': term_key, 'name': term_id, 'namespace': ns, 'source': source, 'version': version})
yield (arangodb.equiv_nodes_name, {'_key': term_key, 'name': term_id, 'primary': True, 'namespace': ns, 'source': source, 'version': version})

# Create Alt ID equivalences (to support other database equivalences using non-preferred Namespace IDs)
for alt_id in term['alt_ids']:
alt_id_key = arangodb.arango_id_to_key(alt_id)
yield (arangodb.equiv_nodes_name, {'_key': alt_id_key, 'name': alt_id, 'namespace': ns, 'source': source, 'version': version})

arango_edge = {
'_from': f"{arangodb.equiv_nodes_name}/{term_key}",
'_to': f"{arangodb.equiv_nodes_name}/{alt_id_key}",
'_key': bel.utils._create_hash(f'{term_id}>>{alt_id}'),
'type': 'equivalent_to',
'source': source,
'version': version,
}

# Cross-DB equivalences
for eqv in term['equivalences']:
(ns, val) = eqv.split(':', maxsplit=1)
eqv_key = arangodb.arango_id_to_key(eqv)
Expand All @@ -124,7 +150,7 @@ def terms_iterator_for_arangodb(fo, version):
arango_edge = {
'_from': f"{arangodb.equiv_nodes_name}/{term_key}",
'_to': f"{arangodb.equiv_nodes_name}/{eqv_key}",
'_key': bel.utils._create_hash(f'{term_key}>>{eqv_key}'),
'_key': bel.utils._create_hash(f'{term_id}>>{eqv}'),
'type': 'equivalent_to',
'source': source,
'version': version,
Expand Down
18 changes: 15 additions & 3 deletions bel/resources/ortholog.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,22 @@ def load_orthologs(fo: IO, metadata: dict):
belns_db = arangodb.get_belns_handle(arango_client)
arangodb.batch_load_docs(belns_db, orthologs_iterator(fo, version))

# TODO - delete old orthologs based on source and version

log.info('Load orthologs', elapsed=timer.elapsed, source=metadata['metadata']['source'])

# Clean up old entries
remove_old_ortholog_edges = f'''
FOR edge in ortholog_edges
FILTER edge.source == "{metadata["metadata"]["source"]}" AND edge.version != "{version}"
REMOVE edge IN ortholog_edges
'''
remove_old_ortholog_nodes = f'''
FOR node in ortholog_nodes
FILTER node.source == "{metadata["metadata"]["source"]}" AND node.version != "{version}"
REMOVE node IN ortholog_nodes
'''
arangodb.aql_query(belns_db, remove_old_ortholog_edges)
arangodb.aql_query(belns_db, remove_old_ortholog_nodes)

# Add metadata to resource metadata collection
metadata['_key'] = f"Orthologs_{metadata['metadata']['source']}"
try:
Expand Down Expand Up @@ -83,7 +95,7 @@ def orthologs_iterator(fo, version):
arango_edge = {
'_from': f"{arangodb.ortholog_nodes_name}/{subj_key}",
'_to': f"{arangodb.ortholog_nodes_name}/{obj_key}",
'_key': bel.utils._create_hash(f'{subj_key}>>{obj_key}'),
'_key': bel.utils._create_hash(f'{subj_id}>>{obj_id}'),
'type': 'ortholog_to',
'source': source,
'version': version,
Expand Down

0 comments on commit 91a2a00

Please sign in to comment.