Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
wshayes committed Sep 12, 2018
1 parent 77a96fd commit 20e3859
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 35 deletions.
4 changes: 2 additions & 2 deletions bel/db/arangodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,12 @@ def batch_load_docs(db, doc_iterator, on_duplicate='replace'):
docs[collection_name].append(doc)

if counter % batch_size == 0:
log.debug(f'Bulk import arangodb: {counter}')
log.info(f'Bulk import arangodb: {counter}')
for cname in docs:
collections[cname].import_bulk(docs[cname], on_duplicate=on_duplicate, halt_on_error=False)
docs[cname] = []

log.debug(f'Bulk import arangodb: {counter}')
log.info(f'Bulk import arangodb: {counter}')
for cname in docs:
collections[cname].import_bulk(docs[cname], on_duplicate=on_duplicate, halt_on_error=False)
docs[cname] = []
Expand Down
66 changes: 36 additions & 30 deletions bel/resources/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,21 @@ def load_terms(fo: IO, metadata: dict, forceupdate: bool):
with timy.Timer('Load Term Equivalences') as timer:
arango_client = arangodb.get_client()
belns_db = arangodb.get_belns_handle(arango_client)
arangodb.batch_load_docs(belns_db, terms_iterator_for_arangodb(fo, version))
arangodb.batch_load_docs(belns_db, terms_iterator_for_arangodb(fo, version), on_duplicate='update')

log.info('Loaded namespace equivalences', elapsed=timer.elapsed, namespace=metadata['metadata']['namespace'])

# Clean up old entries
remove_old_equivalence_edges = f'''
FOR edge in equivalence_edges
FILTER edge.source == "{metadata["metadata"]["namespace"]}" AND edge.version != "{version}"
FILTER edge.source != "{metadata["metadata"]["namespace"]}"
FILTER edge.version == "{version}"
REMOVE edge IN equivalence_edges
'''
remove_old_equivalence_nodes = f'''
FOR node in equivalence_nodes
FILTER node.source == "{metadata["metadata"]["namespace"]}" AND node.version != "{version}"
FILTER node.source != "{metadata["metadata"]["namespace"]}"
FILTER node.version == "{version}"
REMOVE node IN equivalence_nodes
'''
arangodb.aql_query(belns_db, remove_old_equivalence_edges)
Expand Down Expand Up @@ -127,35 +129,39 @@ def terms_iterator_for_arangodb(fo, version):
yield (arangodb.equiv_nodes_name, {'_key': term_key, 'name': term_id, 'primary': True, 'namespace': ns, 'source': source, 'version': version})

# Create Alt ID nodes/equivalences (to support other database equivalences using non-preferred Namespace IDs)
for alt_id in term['alt_ids']:
alt_id_key = arangodb.arango_id_to_key(alt_id)
yield (arangodb.equiv_nodes_name, {'_key': alt_id_key, 'name': alt_id, 'namespace': ns, 'source': source, 'version': version})

arango_edge = {
'_from': f"{arangodb.equiv_nodes_name}/{term_key}",
'_to': f"{arangodb.equiv_nodes_name}/{alt_id_key}",
'_key': bel.utils._create_hash(f'{term_id}>>{alt_id}'),
'type': 'equivalent_to',
'source': source,
'version': version,
}
if 'alt_ids' in term:
for alt_id in term['alt_ids']:
# log.info(f'Added {alt_id} equivalence')
alt_id_key = arangodb.arango_id_to_key(alt_id)
yield (arangodb.equiv_nodes_name, {'_key': alt_id_key, 'name': alt_id, 'namespace': ns, 'source': source, 'version': version})

arango_edge = {
'_from': f"{arangodb.equiv_nodes_name}/{term_key}",
'_to': f"{arangodb.equiv_nodes_name}/{alt_id_key}",
'_key': bel.utils._create_hash(f'{term_id}>>{alt_id}'),
'type': 'equivalent_to',
'source': source,
'version': version,
}
yield (arangodb.equiv_edges_name, arango_edge)

# Cross-DB equivalences
for eqv in term['equivalences']:
(ns, val) = eqv.split(':', maxsplit=1)
eqv_key = arangodb.arango_id_to_key(eqv)

yield (arangodb.equiv_nodes_name, {'_key': eqv_key, 'name': eqv, 'namespace': ns, 'source': source, 'version': version})

arango_edge = {
'_from': f"{arangodb.equiv_nodes_name}/{term_key}",
'_to': f"{arangodb.equiv_nodes_name}/{eqv_key}",
'_key': bel.utils._create_hash(f'{term_id}>>{eqv}'),
'type': 'equivalent_to',
'source': source,
'version': version,
}
yield (arangodb.equiv_edges_name, arango_edge)
if 'equivalences' in term:
for eqv in term['equivalences']:
(ns, val) = eqv.split(':', maxsplit=1)
eqv_key = arangodb.arango_id_to_key(eqv)

yield (arangodb.equiv_nodes_name, {'_key': eqv_key, 'name': eqv, 'namespace': ns, 'source': source, 'version': version})

arango_edge = {
'_from': f"{arangodb.equiv_nodes_name}/{term_key}",
'_to': f"{arangodb.equiv_nodes_name}/{eqv_key}",
'_key': bel.utils._create_hash(f'{term_id}>>{eqv}'),
'type': 'equivalent_to',
'source': source,
'version': version,
}
yield (arangodb.equiv_edges_name, arango_edge)


def terms_iterator_for_elasticsearch(fo: IO, index_name: str):
Expand Down
8 changes: 5 additions & 3 deletions bel/resources/ortholog.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,21 @@ def load_orthologs(fo: IO, metadata: dict):
with timy.Timer('Load Orthologs') as timer:
arango_client = arangodb.get_client()
belns_db = arangodb.get_belns_handle(arango_client)
arangodb.batch_load_docs(belns_db, orthologs_iterator(fo, version))
arangodb.batch_load_docs(belns_db, orthologs_iterator(fo, version), on_duplicate='update')

log.info('Load orthologs', elapsed=timer.elapsed, source=metadata['metadata']['source'])

# Clean up old entries
remove_old_ortholog_edges = f'''
FOR edge in ortholog_edges
FILTER edge.source == "{metadata["metadata"]["source"]}" AND edge.version != "{version}"
FILTER edge.source != "{metadata["metadata"]["source"]}"
FILTER edge.version == "{version}"
REMOVE edge IN ortholog_edges
'''
remove_old_ortholog_nodes = f'''
FOR node in ortholog_nodes
FILTER node.source == "{metadata["metadata"]["source"]}" AND node.version != "{version}"
FILTER node.source != "{metadata["metadata"]["source"]}"
FILTER node.version == "{version}"
REMOVE node IN ortholog_nodes
'''
arangodb.aql_query(belns_db, remove_old_ortholog_edges)
Expand Down

0 comments on commit 20e3859

Please sign in to comment.