In [1]:
from kgx import Transformer, NeoTransformer



In [3]:
# Monarch-Lite

# Credentials can be found from 'Registry of Biolink-compatible Neo4 instances' spreadsheet
monarch_uri = ''
monarch_username = ''
monarch_password = ''

# Initialize NeoTransformer
monarch_lite_transformer = NeoTransformer(uri=monarch_uri, username=monarch_username, password=monarch_password)

# Define filters
monarch_lite_transformer.set_filter("subject_category", "gene")
monarch_lite_transformer.set_filter("object_category", "disease")
monarch_lite_transformer.set_filter("edge_label", "contributes_to")

# Load nodes and edges from remote Monarch-Lite Neo4j instance
start = 0
end = 20000
monarch_lite_transformer.load(start=start, end=end)
monarch_lite_transformer.report()

2018-06-08 18:41:48,943 root       DEBUG      MATCH (n:gene:disease ) RETURN n SKIP 0 LIMIT 1000;
2018-06-08 18:41:49,036 root       DEBUG      MATCH (s:gene)-[p:contributes_to ]->(o:disease)
            RETURN s,p,o
            SKIP 0 LIMIT 1000;
2018-06-08 18:41:49,629 root       DEBUG      MATCH (s:gene)-[p:contributes_to ]->(o:disease)
            RETURN s,p,o
            SKIP 1000 LIMIT 1000;
2018-06-08 18:41:50,189 root       DEBUG      MATCH (s:gene)-[p:contributes_to ]->(o:disease)
            RETURN s,p,o
            SKIP 2000 LIMIT 1000;
2018-06-08 18:41:50,742 root       DEBUG      MATCH (s:gene)-[p:contributes_to ]->(o:disease)
            RETURN s,p,o
            SKIP 3000 LIMIT 1000;
2018-06-08 18:41:51,339 root       DEBUG      MATCH (s:gene)-[p:contributes_to ]->(o:disease)
            RETURN s,p,o
            SKIP 4000 LIMIT 1000;
2018-06-08 18:41:52,163 root       DEBUG      MATCH (s:gene)-[p:contributes_to ]->(o:disease)
            RETURN s,p,o
            SKIP 5000

In [None]:
# SemMedDB

# Credentials can be found from 'Registry of Biolink-compatible Neo4 instances' spreadsheet
semmeddb_uri = ''
semmeddb_username = ''
semmeddb_password = ''

# Initialize NeoTransformer
semmeddb_transformer = NeoTransformer(uri=semmeddb_uri, username=semmeddb_username, password=semmeddb_password)

# Define filters
semmeddb_transformer.set_filter("subject_category", "gene")
semmeddb_transformer.set_filter("object_category", "chemical_substance")
semmeddb_transformer.set_filter("edge_label", "coexists_with")

# Load nodes and edges from remote SemMedDB Neo4j instance
start = 0
end = 20000
semmeddb_transformer.load(start=start, end=end)
semmeddb_transformer.report()

In [4]:
# First, we perform a remap operation to bring SemMedDB gene nodes to HGNC namespace
semmeddb_transformer.remap_node_identifier("gene", "xrefs", "HGNC:")

2018-06-08 18:42:20,559 root       DEBUG      MATCH (n:gene:chemical_substance ) RETURN n SKIP 0 LIMIT 1000;
2018-06-08 18:42:20,710 root       DEBUG      MATCH (s:gene)-[p:coexists_with ]->(o:chemical_substance)
            RETURN s,p,o
            SKIP 0 LIMIT 1000;
2018-06-08 18:42:21,426 root       DEBUG      MATCH (s:gene)-[p:coexists_with ]->(o:chemical_substance)
            RETURN s,p,o
            SKIP 1000 LIMIT 1000;
2018-06-08 18:42:22,070 root       DEBUG      MATCH (s:gene)-[p:coexists_with ]->(o:chemical_substance)
            RETURN s,p,o
            SKIP 2000 LIMIT 1000;
2018-06-08 18:42:22,796 root       DEBUG      MATCH (s:gene)-[p:coexists_with ]->(o:chemical_substance)
            RETURN s,p,o
            SKIP 3000 LIMIT 1000;
2018-06-08 18:42:23,322 root       DEBUG      MATCH (s:gene)-[p:coexists_with ]->(o:chemical_substance)
            RETURN s,p,o
            SKIP 4000 LIMIT 1000;
Total number of nodes: 2375
Total number of edges: 3162


In [5]:
# Then we merge Monarch-Lite graph with SemMedDB graph
graphs = [monarch_lite_transformer.graph, semmeddb_transformer.graph]
merged_transformer = Transformer()
merged_transformer.merge(graphs)

In [6]:
# Now, we get a gene common in both the graphs
rad51_gene_node = [n for n in merged_transformer.graph.nodes(data=True) if n[1]['id'] == 'HGNC:9817'][0]
rad51_gene_node

('HGNC:9817',
 {'name': 'RAD51 gene',
  'xrefs': ['NCI:C104711', 'OMIM:179617', 'MTH:NOCODE', 'HGNC:HGNC:9817'],
  'id': 'HGNC:9817',
  'umls_type': ['Gene or Genome'],
  'category': 'gene',
  'labels': ['gene']})

In [7]:
# we get all out-going edges from RAD51 gene node
out_edges = merged_transformer.graph.out_edges(rad51_gene_node[0], data=True)

# we can see that there are 5 edges going to disease nodes (from Monarch-Lite) 
# and 7 edges going to chemical_substance nodes
out_edges

[('HGNC:9817',
  'MONDO:0019391',
  {'is_defined_by': 'Monarch',
   'provided_by': ['https://data.monarchinitiative.org/ttl/orphanet.ttl'],
   'evidence': ['http://purl.obolibrary.org/obo/ECO_0000322'],
   'qualifier': 'direct',
   'publications': [],
   'relation': 'RO:0002326',
   'subject': 'HGNC:9817',
   'object': 'MONDO:0019391',
   'id': 3966452,
   'type': 'contributes_to',
   'predicate': 'contributes_to'}),
 ('HGNC:9817',
  'MONDO:0013790',
  {'is_defined_by': 'Monarch',
   'provided_by': ['https://data.monarchinitiative.org/ttl/omim.ttl',
    'https://data.monarchinitiative.org/ttl/clinvar.ttl'],
   'evidence': ['http://purl.obolibrary.org/obo/ECO_0000220'],
   'qualifier': 'direct',
   'publications': ['http://www.ncbi.nlm.nih.gov/pubmed/22305526',
    'http://www.ncbi.nlm.nih.gov/pubmed/21242494',
    'http://www.ncbi.nlm.nih.gov/pubmed/27830107',
    'http://www.ncbi.nlm.nih.gov/pubmed/25813273'],
   'relation': 'RO:0002326',
   'subject': 'HGNC:9817',
   'object': 'MONDO