Install dependencies

In [1]:
!pip install neo4j
!pip install requests

Collecting neo4j
  Downloading neo4j-4.4.1.tar.gz (89 kB)
[?25l[K     |███▊                            | 10 kB 18.1 MB/s eta 0:00:01[K     |███████▍                        | 20 kB 8.9 MB/s eta 0:00:01[K     |███████████                     | 30 kB 5.6 MB/s eta 0:00:01[K     |██████████████▊                 | 40 kB 5.5 MB/s eta 0:00:01[K     |██████████████████▍             | 51 kB 5.3 MB/s eta 0:00:01[K     |██████████████████████          | 61 kB 6.1 MB/s eta 0:00:01[K     |█████████████████████████▊      | 71 kB 6.6 MB/s eta 0:00:01[K     |█████████████████████████████▍  | 81 kB 6.8 MB/s eta 0:00:01[K     |████████████████████████████████| 89 kB 2.5 MB/s 
Building wheels for collected packages: neo4j
  Building wheel for neo4j (setup.py) ... [?25l[?25hdone
  Created wheel for neo4j: filename=neo4j-4.4.1-py3-none-any.whl size=114783 sha256=38762c2c75b16cbe1d56b6dbcfd5b41b51240fc5eb78d2786260e05e16e91986
  Stored in directory: /root/.cache/pip/wheels/fd/15/02/8379f

First create the FHIR data types. Need this before anything else.


In [51]:
from neo4j import GraphDatabase
import requests
import json

def clear(tx):
    tx.run("MATCH (n) DETACH DELETE n")

driver = GraphDatabase.driver("neo4j://3.83.17.192:7687", auth=("neo4j", "troubleshooters-decreases-helicopters"))

response = requests.get('https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/fhir_data_type_nodes.json')
the_nodes = json.loads(response.text)
response = requests.get('https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/fhir_data_type_relationships.json')
the_relationships = json.loads(response.text)

with driver.session() as session:
    session.write_transaction(clear)
    for key, value in the_nodes.items():
        query = "UNWIND $nodes as data CREATE (n:%s) SET n = data;" % (key)
        print(query)
        result = session.run(query, nodes=value)

    for key, value in the_relationships.items():
        query = "UNWIND $rels as data MATCH (n {uri: data.from}) MATCH (m {uri: data.to}) CREATE (n)-[:%s]->(m)" % (key)
        print(query)
        result = session.run(query, rels=value)

driver.close()

UNWIND $nodes as data CREATE (n:FHIR) SET n = data;
UNWIND $nodes as data CREATE (n:WEB_SOURCE) SET n = data;
UNWIND $nodes as data CREATE (n:FHIR_DATA_TYPE) SET n = data;
UNWIND $nodes as data CREATE (n:FHIR_DATA_TYPE_PROPERTY) SET n = data;
UNWIND $rels as data MATCH (n {uri: data.from}) MATCH (m {uri: data.to}) CREATE (n)-[:FROM_SOURCE]->(m)
UNWIND $rels as data MATCH (n {uri: data.from}) MATCH (m {uri: data.to}) CREATE (n)-[:HAS_DATA_TYPE]->(m)
UNWIND $rels as data MATCH (n {uri: data.from}) MATCH (m {uri: data.to}) CREATE (n)-[:HAS_DATA_TYPE_PROPERTY]->(m)


Now check we have loaded ok. Check the FHIR version, should be 4.0.1

In [3]:
with driver.session() as session:
  query = """MATCH (n:FHIR) Return n.version as version""" 
  result = session.run(query)
  for record in result:
    print("Version: ", record["version"])

driver.close()

Version:  4.0.1


In [52]:
response = requests.get('https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/canonical_model_nodes.json')
the_nodes = json.loads(response.text)
response = requests.get('https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/canonical_model_relationships.json')
the_relationships = json.loads(response.text)

with driver.session() as session:
    for key, value in the_nodes.items():
        query = "UNWIND $nodes as data CREATE (n:%s) SET n = data;" % (key)
        print(query)
        result = session.run(query, nodes=value)

    for key, value in the_relationships.items():
        query = "UNWIND $rels as data MATCH (n {uri: data.from}) MATCH (m {uri: data.to}) CREATE (n)-[:%s]->(m)" % (key)
        print(query)
        result = session.run(query, rels=value)

driver.close()

UNWIND $nodes as data CREATE (n:CANONICAL_NODE) SET n = data;
UNWIND $rels as data MATCH (n {uri: data.from}) MATCH (m {uri: data.to}) CREATE (n)-[:CONSISTS_OF]->(m)
UNWIND $rels as data MATCH (n {uri: data.from}) MATCH (m {uri: data.to}) CREATE (n)-[:IS_A]->(m)


Check a few relationships exist in the canonical model. Should get three results.

* THERAPEUTIC INTERVENTION
* OBSERVATION
* ADVERSE EVENT



In [5]:
with driver.session() as session:
  query = """MATCH (t)-[]->(n:CANONICAL_NODE) WHERE n.name="LOCATION" RETURN t.name as name""" 
  result = session.run(query)
  for record in result:
    print("Linked to Location: ", record["name"])

driver.close()

Linked to Location:  ADVERSE EVENT
Linked to Location:  THERAPEUTIC INTERVENTION
Linked to Location:  OBSERVATION


Now create the data type nodes for the canonical mode. For each canonical node that references a data type copy the properties for that data type to the canonical node. Give each canonical leaf a unique id. These nodes should also have a C code reference providig a definition for the data item.


In [50]:
uri_data = []
with driver.session() as session:
  query = """MATCH (n:CANONICAL_NODE)-[:IS_A]->(dt:FHIR_DATA_TYPE) RETURN n.uri as canonical, dt.uri as data_type""" 
  result = session.run(query)
  for record in result:
    query = """MATCH (n:CANONICAL_NODE{uri:'%s'}), (dt:FHIR_DATA_TYPE{uri:'%s'})
      CALL apoc.path.subgraphAll(dt, {relationshipFilter:'HAS_DATA_TYPE_PROPERTY>'})
      YIELD nodes, relationships
      CALL apoc.refactor.cloneSubgraph(
        nodes,
        [rel in relationships WHERE type(rel) = 'HAS_DATA_TYPE_PROPERTY'],
        { standinNodes:[[dt, n]] })
      YIELD input, output, error
      RETURN output;
    """ % (record["canonical"], record["data_type"])
    inner_result = session.run(query)
    for inner_record in inner_result:
      node = inner_record["output"]
      uri_data.append({"id": node.id, "uri": "%s#%s" % (record["canonical"], node["name"]) })
    print ("Duplicated for: ", record["canonical"])

driver.close()

with driver.session() as session:
  query = """UNWIND $uri_data AS d
    MATCH (n) WHERE ID(n)=d.id
    SET n.uri = d.uri;""" 
  result = session.run(query, uri_data=uri_data)
driver.close()
print ("URIs set.")



Duplicated for:  http://id.d4k.dk/dataset/canonical/n8
Duplicated for:  http://id.d4k.dk/dataset/canonical/n9
Duplicated for:  http://id.d4k.dk/dataset/canonical/n18
URIs set.
