In [1]:
pip install neo4j


[33mYou are using pip version 10.0.1, however version 20.2b1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
from neo4j import GraphDatabase

## Connect to the DB

#### If you don't specify a database name in driver.session, you'll be using the default DB (neo4j)

In [3]:
uri = "bolt://localhost:7687"

In [4]:
driver = GraphDatabase.driver(uri, auth=("neo4j", "neo"))

In [5]:
session = driver.session(database="lsci")

## Initialise n10s

In [6]:
def init_n10s_config(tx):
    result = tx.run("CALL n10s.graphconfig.init({ handleVocabUris : 'IGNORE'})")
    for record in result:
        print(record["param"], record["value"])

In [12]:
session.write_transaction(init_n10s_config)

handleVocabUris IGNORE
handleMultival OVERWRITE
handleRDFTypes LABELS
keepLangTag False
multivalPropList None
keepCustomDataTypes False
customDataTypePropList None
applyNeo4jNaming False
classLabel Class
subClassOfRel SCO
dataTypePropertyLabel Property
objectPropertyLabel Relationship
subPropertyOfRel SPO
domainRel DOMAIN
rangeRel RANGE


## Define functions to run SPARQL queries on RDF databases 

SPARQL queries to be run against Wikidata and MeSH dbs

In [7]:
wikidata_virus_taxon = """PREFIX neo: <neo://voc#>
CONSTRUCT {
 ?virus a neo:Virus; neo:name ?virusName ;
          neo:HAS_PARENT ?parentVirus ;
          neo:RELATED_FIELD_OF_STUDY ?msAcademicUri ;
          neo:SAME_AS_MESH_DESCRIPTOR ?meshUri .
  ?parentVirus a neo:Virus .
  }
WHERE {
  ?virus wdt:P171+ wd:Q808	 ;
          wdt:P171 ?parentVirus;
          rdfs:label ?virusName ;
          filter(lang(?virusName) = "en") .
  
  optional { ?virus wdt:P486 ?meshCode . 
             bind(URI(concat("http://id.nlm.nih.gov/mesh/",?meshCode))  as ?meshUri) }
  optional { ?virus wdt:P6366 ?msAcademic .  
             bind(URI(concat("http://ma-graph.org/entity/",?msAcademic))  as ?msAcademicUri) }
        
} """

In [8]:
mesh_virus_taxon ="""PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
PREFIX neo: <neo://voc#>

CONSTRUCT {
?s a neo:MeshDescriptor, neo:Virus ; 
     neo:name ?name ;
     neo:HAS_BROADER_DESCRIPTOR ?parentDescriptor .
}
FROM <http://id.nlm.nih.gov/mesh>
WHERE { 
  {
    ?s meshv:broaderDescriptor* mesh:D014780 #viruses 
  }
  
  ?s rdfs:label ?name .
  
  optional {
    ?s meshv:broaderDescriptor ?parentDescriptor .
  }
  
} """

In [9]:
wikidata_disease_taxon = """PREFIX neo: <neo://voc#>
construct {
  ?dis a neo:InfectiousDisease ;
     neo:name ?disName ;
     neo:CAUSED_BY ?cause ;
     neo:HAS_PARENT ?parentDisease ;
     neo:RELATED_FIELD_OF_STUDY ?msAcademicUri ;
     neo:SAME_AS_MESH_DESCRIPTOR ?meshUri ;
     neo:SAME_AS_DISEASE_ONTO ?diseaseOntoUri .
}
where { 
  ?dis wdt:P31/wdt:P279* wd:Q18123741 ;
       rdfs:label ?disName . filter(lang(?disName) = "en")

  optional { ?dis wdt:P828 ?cause }
  optional { ?dis wdt:P279 ?parentDisease .
             ?parentDisease wdt:P31/wdt:P279* wd:Q18123741 }
  optional { ?dis wdt:P486 ?meshCode . bind(URI(concat("http://id.nlm.nih.gov/mesh/",?meshCode))  as ?meshUri) }
  optional { ?dis wdt:P6366 ?msAcademic .  bind(URI(concat("http://ma-graph.org/entity/",?msAcademic))  as ?msAcademicUri) }
  optional { ?dis wdt:P699 ?diseaseOntoId .  bind(URI(concat("http://purl.obolibrary.org/obo/",REPLACE(?diseaseOntoId, ":", "_")))  as ?diseaseOntoUri) }
} """

In [10]:
def run_wikidata_query(tx, sparql):
    #print(" Running Wikidata import from SPARQL query: \n" , sparql)
    result = tx.run(" CALL n10s.rdf.import.fetch( "
                    "'https://query.wikidata.org/sparql?query=' + apoc.text.urlencode($sparql), "
                    " 'N-Triples',{ headerParams: { Accept: 'text/plain'}}) "
                    " YIELD terminationStatus, triplesLoaded, triplesParsed, namespaces, extraInfo "
                    " RETURN terminationStatus, triplesLoaded, triplesParsed, extraInfo", sparql=sparql)
    for record in result:
        print("result: ", record["terminationStatus"], 
              ", loadedTriples: ", record["triplesLoaded"], 
              ", parsedTriples: ", record["triplesParsed"], 
              ", extra info: ", record["extraInfo"])

In [11]:
def run_mesh_query(tx, sparql):
    #print(" Running Wikidata import from SPARQL query: \n" , sparql)
    result = tx.run(" CALL n10s.rdf.import.fetch( "
                    "'https://id.nlm.nih.gov/mesh/sparql?format=TURTLE&query=' + apoc.text.urlencode($sparql), "
                    " 'Turtle') "
                    " YIELD terminationStatus, triplesLoaded, triplesParsed, namespaces, extraInfo "
                    " RETURN terminationStatus, triplesLoaded, triplesParsed, extraInfo", sparql=sparql)
    for record in result:
        print("result: ", record["terminationStatus"], 
              ", loadedTriples: ", record["triplesLoaded"], 
              ", parsedTriples: ", record["triplesParsed"], 
              ", extra info: ", record["extraInfo"])

## Run Imports from SPARQL Construct queries 

In [13]:
session.write_transaction(run_wikidata_query, wikidata_virus_taxon)

result:  OK , loadedTriples:  35380 , parsedTriples:  35380 , extra info:  


In [14]:
session.write_transaction(run_mesh_query, mesh_virus_taxon)

result:  OK , loadedTriples:  2308 , parsedTriples:  2308 , extra info:  


In [15]:
session.write_transaction(run_wikidata_query, wikidata_disease_taxon)

result:  OK , loadedTriples:  2972 , parsedTriples:  2972 , extra info:  


In [16]:
mesh_disease_top_level = ['D007239','D009369','D009140','D004066','D009057','D012140','D010038','D009422',
                          'D005128','D052801','D005261','D002318','D006425','D009358','D017437','D009750',
                          'D004700','D007154','D007280','D000820','D013568','D009784','D064419','D014947']

In [17]:
mesh_disease_taxon = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
PREFIX neo: <neo://voc#>

CONSTRUCT {{
?s a neo:MeshDescriptor, neo:Disease; 
     neo:name ?name ;
     neo:HAS_BROADER_DESCRIPTOR ?parentDescriptor;
     meshv:dateEstablished ?date.
}}
FROM <http://id.nlm.nih.gov/mesh>
WHERE {{ 
  {{
    ?s meshv:broaderDescriptor* mesh:{id} 
  }}
  
  ?s rdfs:label ?name ; 
     meshv:dateEstablished ?date .
  
  optional {{
    ?s meshv:broaderDescriptor ?parentDescriptor .
  }}
  
}} """

Run multiple SPARQL queries (one per root element id)

In [18]:
for did in mesh_disease_top_level:
    print("disease id: ", did)
    session.write_transaction(run_mesh_query, mesh_disease_taxon.format( id= did))

disease id:  D007239
result:  OK , loadedTriples:  4546 , parsedTriples:  4546 , extra info:  
disease id:  D009369
result:  OK , loadedTriples:  3947 , parsedTriples:  3947 , extra info:  
disease id:  D009140
result:  OK , loadedTriples:  2261 , parsedTriples:  2261 , extra info:  
disease id:  D004066
result:  OK , loadedTriples:  1844 , parsedTriples:  1844 , extra info:  
disease id:  D009057
result:  OK , loadedTriples:  1263 , parsedTriples:  1263 , extra info:  
disease id:  D012140
result:  OK , loadedTriples:  1434 , parsedTriples:  1434 , extra info:  
disease id:  D010038
result:  OK , loadedTriples:  760 , parsedTriples:  760 , extra info:  
disease id:  D009422
result:  OK , loadedTriples:  5725 , parsedTriples:  5725 , extra info:  
disease id:  D005128
result:  OK , loadedTriples:  1565 , parsedTriples:  1565 , extra info:  
disease id:  D052801
result:  OK , loadedTriples:  1532 , parsedTriples:  1532 , extra info:  
disease id:  D005261
result:  OK , loadedTriples:  2

Run several SPARQL queries, this time driven by the URIs of the `InfectiousDisease` nodes

In [20]:
def run_wikidata_foreach_disease(tx):
    result = tx.run("""MATCH (id:InfectiousDisease) 
                        WITH id.uri as uri
                        WITH uri, '

                                PREFIX neo: <neo://voc#>
                                construct {
                                  ?chemCompound a neo:ChemicalCompound ;
                                      neo:USED_FOR_DISEASE ?id;
                                      neo:ACTIVE_INGREDIENT_IN ?pharmaProduct;
                                      neo:SAME_AS_MESH_DESCRIPTOR ?meshUri ;            
                                      neo:name ?chemCompoundName . 

                                  ?pharmaProduct a neo:PharmaProduct ;
                                     neo:name ?pharmaProductName . 
                                }
                                where { 
                                  bind(<' + uri + '> as ?id)
                                  ?id wdt:P2176 ?chemCompound .
                                  ?chemCompound wdt:P31 wd:Q11173 ;
                                       rdfs:label ?chemCompoundName . 
                                       filter(lang(?chemCompoundName) = "en")

                                  optional { ?chemCompound wdt:P486 ?meshCode . 
                                        bind(URI(concat("http://id.nlm.nih.gov/mesh/",?meshCode))  as ?meshUri) }    

                                  optional { ?chemCompound wdt:P3780 ?pharmaProduct.
                                             ?pharmaProduct rdfs:label ?pharmaProductName . 
                                            filter(lang(?pharmaProductName) = "en")
                                           }
                                }
                        '
                        AS query
                        CALL n10s.rdf.import.fetch(
                          "https://query.wikidata.org/sparql?query=" + apoc.text.urlencode(query),
                          "N-Triples",
                          { headerParams: { Accept: "text/plain"}})
                        YIELD terminationStatus, triplesLoaded, triplesParsed, namespaces, extraInfo
                        RETURN uri, terminationStatus, triplesLoaded, triplesParsed, namespaces, extraInfo""")
    for record in result:
        print("uri", record["uri"], "result: ", record["terminationStatus"], 
              ", loadedTriples: ", record["triplesLoaded"], 
              ", parsedTriples: ", record["triplesParsed"], 
              ", extra info: ", record["extraInfo"])

In [21]:
session.write_transaction(run_wikidata_foreach_disease)
#this takes a couple of minutes, leave running...

uri http://www.wikidata.org/entity/Q4143044 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q55790471 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q220188 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q1077505 result:  OK , loadedTriples:  37 , parsedTriples:  37 , extra info:  
uri http://www.wikidata.org/entity/Q749342 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q55790457 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q480700 result:  OK , loadedTriples:  39 , parsedTriples:  39 , extra info:  
uri http://www.wikidata.org/entity/Q18554739 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q55790462 result:  OK , loadedTriples:  0 , parsedTriples:  0 , e

uri http://www.wikidata.org/entity/Q2544887 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q4226389 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q4417986 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q2603200 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q18554637 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q5062121 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q3540280 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q618032 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q55788209 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra

uri http://www.wikidata.org/entity/Q12195 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q2589418 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q55790882 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q2458539 result:  OK , loadedTriples:  144 , parsedTriples:  144 , extra info:  
uri http://www.wikidata.org/entity/Q11549084 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q1897162 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q797668 result:  OK , loadedTriples:  49 , parsedTriples:  49 , extra info:  
uri http://www.wikidata.org/entity/Q55787063 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q2012642 result:  OK , loadedTriples:  0 , parsedTriples:  0 , 

uri http://www.wikidata.org/entity/Q55783814 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q153356 result:  OK , loadedTriples:  98 , parsedTriples:  98 , extra info:  
uri http://www.wikidata.org/entity/Q18975460 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q55789100 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q1348374 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q5656856 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q1048084 result:  OK , loadedTriples:  7 , parsedTriples:  7 , extra info:  
uri http://www.wikidata.org/entity/Q1346131 result:  OK , loadedTriples:  0 , parsedTriples:  0 , extra info:  
uri http://www.wikidata.org/entity/Q1992236 result:  OK , loadedTriples:  14 , parsedTriples:  14 , 

In [22]:
mesh_chem_top_level = ['D009930','D006571','D011083','D046911','D006730','D045762','D002241',
                       'D008055','D000602','D009706','D045424','D001685','D001697','D004364','D020164']

In [23]:
mesh_chem_taxon = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
PREFIX neo: <neo://voc#>

CONSTRUCT {{
?s a neo:MeshDescriptor, neo:Disease; 
     neo:name ?name ;
     neo:HAS_BROADER_DESCRIPTOR ?parentDescriptor;
     meshv:PHARMACOLOGICAL_ACTION ?pharmAction ;
     meshv:dateEstablished ?date.
}}
FROM <http://id.nlm.nih.gov/mesh>
WHERE {{ 
  {{
    ?s meshv:broaderDescriptor* mesh:{id} 
  }}
  
  ?s rdfs:label ?name ; 
     meshv:dateEstablished ?date .
  
  optional {{
    ?s meshv:broaderDescriptor ?parentDescriptor .
  }}
  
  optional {{
    ?s meshv:pharmacologicalAction ?pharmAction .
  }}
  
}} """

In [24]:
for cid in mesh_chem_top_level:
    print("chem id: ", cid)
    session.write_transaction(run_mesh_query, mesh_chem_taxon.format( id= cid))

chem id:  D009930
result:  OK , loadedTriples:  10001 , parsedTriples:  10001 , extra info:  
chem id:  D006571
result:  OK , loadedTriples:  9973 , parsedTriples:  9973 , extra info:  
chem id:  D011083
result:  OK , loadedTriples:  4577 , parsedTriples:  4577 , extra info:  
chem id:  D046911
result:  OK , loadedTriples:  2177 , parsedTriples:  2177 , extra info:  
chem id:  D006730
result:  OK , loadedTriples:  1668 , parsedTriples:  1668 , extra info:  
chem id:  D045762
result:  OK , loadedTriples:  9855 , parsedTriples:  9855 , extra info:  
chem id:  D002241
result:  OK , loadedTriples:  5828 , parsedTriples:  5828 , extra info:  
chem id:  D008055
result:  OK , loadedTriples:  2447 , parsedTriples:  2447 , extra info:  
chem id:  D000602
result:  OK , loadedTriples:  10001 , parsedTriples:  10001 , extra info:  
chem id:  D009706
result:  OK , loadedTriples:  1935 , parsedTriples:  1935 , extra info:  
chem id:  D045424
result:  OK , loadedTriples:  1553 , parsedTriples:  1553 

## Import Ontology

In [25]:
def run_import_disease_onto(tx):
    disease_onto_url = "http://purl.obolibrary.org/obo/doid.owl"

    #import classes, properties, rels and hierarchies
    result = tx.run("CALL n10s.onto.import.fetch($ontourl,'RDF/XML')", ontourl=disease_onto_url)
    for record in result:
        print("result: ", record["terminationStatus"], 
              ", loadedTriples: ", record["triplesLoaded"], 
              ", parsedTriples: ", record["triplesParsed"], 
              ", extra info: ", record["extraInfo"])
    
    #add extra label to classes
    result = tx.run("MATCH (c:Class) SET c:DO_Disease RETURN count(c) as classCount")
    for record in result:
        print("labels added: ", record["classCount"])
    
    #add crossreferences to MeSH
    result = tx.run("""CALL n10s.rdf.stream.fetch($ontourl,'RDF/XML', { limit : 999999}) YIELD subject, predicate, object 
              WHERE predicate = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref' 
                AND object STARTS WITH 'MESH:'
              MATCH (doe:Resource { uri: subject}),
                    (mesh:Resource { uri: 'http://id.nlm.nih.gov/mesh/' + substring(object,5)})
                MERGE (doe)-[:SAME_AS_MESH_DESCRIPTOR]->(mesh)
                RETURN count(doe) as xrefItems """, ontourl=disease_onto_url)
    for record in result:
        print("#crossreferenced items: ", record["xrefItems"])

In [26]:
session.write_transaction(run_import_disease_onto)

result:  OK , loadedTriples:  39697 , parsedTriples:  222730 , extra info:  
labels added:  12593
#crossreferenced items:  2803
