Install dependencies

In [105]:
!pip install neo4j
!pip install requests



First create the FHIR data types. Need this before anything else.


In [113]:
from neo4j import GraphDatabase
import requests
import json
import urllib.request

driver = GraphDatabase.driver("neo4j://3.83.17.192:7687", auth=("neo4j", "troubleshooters-decreases-helicopters"))
load_file_dir = "https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/"

def clear(tx):
    tx.run("CALL apoc.periodic.iterate('MATCH (n) RETURN n', 'DETACH DELETE n', {batchSize:1000})")

def file_load(load_files):
  with driver.session() as session:
    nodes = []
    relationships = []
    for file_item in load_files:
      filename = "%s%s" % (load_file_dir, file_item["filename"])#
      if "label" in file_item:
        nodes.append("{ fileName: '%s', labels: ['%s'] }" % (filename, file_item["label"]) )
      else:
        relationships.append("{ fileName: '%s', type: '%s' }" % (filename, file_item["type"]) )
    query = """CALL apoc.import.csv( [%s], [%s], {stringIds: false})""" % (", ".join(nodes), ", ".join(relationships))
    print(query)
    result = session.run(query)
    print(result)
    for record in result:
      print(record)
  driver.close()

with driver.session() as session:
    session.write_transaction(clear)
driver.close()

print("Ready ...")


Ready ...


In [114]:
stage_1_files = [ 
    { "label": "FHIR", "filename": "stage_1_fhir_nodes.csv" },
    { "label": "WEB_SOURCE", "filename": "stage_1_web_source_nodes.csv" },
    { "label": "FHIR_DATA_TYPE", "filename": "stage_1_fhir_data_type_nodes.csv" },
    { "label": "FHIR_DATA_TYPE_PROPERTY", "filename": "stage_1_fhir_data_type_property_nodes.csv" },
    { "type": "HAS_DATA_TYPE_PROPERTY", "filename": "stage_1_has_data_type_property_relationships.csv" },
    { "type": "FROM_SOURCE", "filename": "stage_1_from_source_relationships.csv" },
    { "type": "HAS_DATA_TYPE", "filename": "stage_1_has_data_type_relationships.csv" }
  ]

file_load(stage_1_files)

CALL apoc.import.csv( [{ fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_1_fhir_nodes.csv', labels: ['FHIR'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_1_web_source_nodes.csv', labels: ['WEB_SOURCE'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_1_fhir_data_type_nodes.csv', labels: ['FHIR_DATA_TYPE'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_1_fhir_data_type_property_nodes.csv', labels: ['FHIR_DATA_TYPE_PROPERTY'] }], [{ fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_1_has_data_type_property_relationships.csv', type: 'HAS_DATA_TYPE_PROPERTY' }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_1_from_source_relationships.csv', type: 'FROM_SOURCE' }, { fileName: 'https://raw.

Now check we have loaded ok. Check the FHIR version, should be 4.0.1

In [115]:
with driver.session() as session:
  query = """MATCH (n:FHIR) Return n.version as version""" 
  result = session.run(query)
  for record in result:
    print("Version: ", record["version"])

driver.close()

Version:  4.0.1


In [116]:
stage_2_files = [ 
    { "label": "CANONICAL_MODEL", "filename": "stage_2_canonical_model_nodes.csv" },
    { "label": "CANONICAL_NODE", "filename": "stage_2_canonical_node_nodes.csv" },
    { "label": "CANONICAL_DATA_TYPE", "filename": "stage_2_canonical_data_type_nodes.csv" },
    { "label": "OTHER_SOURCE", "filename": "stage_2_other_source_nodes.csv" },
    { "type": "CONSISTS_OF", "filename": "stage_2_consists_of_relationships.csv" },
    { "type": "FROM_SOURCE", "filename": "stage_2_from_source_relationships.csv" },
    { "type": "HAS_SUB_MODEL", "filename": "stage_2_has_sub_model_relationships.csv" },
    { "type": "HAS_DATA_TYPE", "filename": "stage_2_has_data_type_relationships.csv" }
]

file_load(stage_2_files)

CALL apoc.import.csv( [{ fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_2_canonical_model_nodes.csv', labels: ['CANONICAL_MODEL'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_2_canonical_node_nodes.csv', labels: ['CANONICAL_NODE'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_2_canonical_data_type_nodes.csv', labels: ['CANONICAL_DATA_TYPE'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_2_other_source_nodes.csv', labels: ['OTHER_SOURCE'] }], [{ fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_2_consists_of_relationships.csv', type: 'CONSISTS_OF' }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_2_from_source_relationships.csv', type: 'FROM_SOURCE' }, { fileName: 'https://raw.gith

Check a few relationships exist in the canonical model. Should get three results.

* THERAPEUTIC INTERVENTION
* OBSERVATION
* ADVERSE EVENT



In [117]:
with driver.session() as session:
  query = """MATCH (r)-[]->(t)-[]->(n:CANONICAL_NODE) WHERE n.name="LOCATION" RETURN r.name as root_name, t.name as name""" 
  result = session.run(query)
  for record in result:
    print("%s -> %s -> Location: " % (record["root_name"], record["name"]))

driver.close()

CANONICAL MODEL -> OBSERVATION -> Location: 
CANONICAL MODEL -> ADVERSE EVENT -> Location: 
CANONICAL MODEL -> THERAPEUTIC INTERVENTION -> Location: 


Now link the canonical nodes and the data types using the name as the key.

In [118]:
with driver.session() as session:
  query = """MATCH (n:CANONICAL_NODE)-[]->(m:CANONICAL_DATA_TYPE) RETURN m.uri as uri""" 
  result = session.run(query)
  for record in result:
    print(record["uri"])
    query = """MATCH (n:CANONICAL_DATA_TYPE{uri:'%s'})
      WITH n
      MATCH (dt:FHIR_DATA_TYPE{name: n.data_type})
      CREATE (n)-[:IS_A]->(dt)
      RETURN n.data_type as cn_name, dt.name as dt_name;
    """ % (record["uri"])
    inner_result = session.run(query)
    for inner_record in inner_result:
      print("%s, %s linked" % (inner_record["cn_name"], inner_record["dt_name"]))

driver.close()

http://id.d4k.dk/dataset/canonical/n8/coding
coding, coding linked
http://id.d4k.dk/dataset/canonical/n9/coding
coding, coding linked
http://id.d4k.dk/dataset/canonical/n18/coding
coding, coding linked


Now create the data type nodes for the canonical mode. For each canonical node that references a data type copy the properties for that data type to the canonical node. Give each canonical leaf a unique id. These nodes should also have a C code reference providig a definition for the data item.


In [119]:
uri_data = []
with driver.session() as session:
  query = """MATCH (n:CANONICAL_DATA_TYPE)-[:IS_A]->(dt:FHIR_DATA_TYPE) RETURN n.uri as canonical, dt.uri as data_type""" 
  result = session.run(query)
  for record in result:
    query = """MATCH (n:CANONICAL_DATA_TYPE{uri:'%s'}), (dt:FHIR_DATA_TYPE{uri:'%s'})
      CALL apoc.path.subgraphAll(dt, {relationshipFilter:'HAS_DATA_TYPE_PROPERTY>'})
      YIELD nodes, relationships
      CALL apoc.refactor.cloneSubgraph(
        nodes,
        [rel in relationships WHERE type(rel) = 'HAS_DATA_TYPE_PROPERTY'],
        { standinNodes:[[dt, n]] })
      YIELD input, output, error
      RETURN output;
    """ % (record["canonical"], record["data_type"])
    inner_result = session.run(query)
    for inner_record in inner_result:
      node = inner_record["output"]
      uri_data.append({"id": node.id, "uri": "%s#%s" % (record["canonical"], node["name"]) })
    print ("Duplicated for: ", record["canonical"])

driver.close()

with driver.session() as session:
  query = """UNWIND $uri_data AS d
    MATCH (n) WHERE ID(n)=d.id
    SET n.uri = d.uri;""" 
  result = session.run(query, uri_data=uri_data)
driver.close()
print ("URIs set.")



Duplicated for:  http://id.d4k.dk/dataset/canonical/n8/coding
Duplicated for:  http://id.d4k.dk/dataset/canonical/n9/coding
Duplicated for:  http://id.d4k.dk/dataset/canonical/n18/coding
URIs set.


In [120]:
stage_3_files = [ 
    { "label": "API_SOURCE", "filename": "stage_3_api_source_nodes.csv" },
    { "label": "SKOS_CONCEPT", "filename": "stage_3_skos_concept_nodes.csv" },
    { "label": "SKOS_CONCEPT_SCHEME", "filename": "stage_3_skos_concept_scheme_nodes.csv" },
    { "type": "FROM_SOURCE", "filename": "stage_3_from_source_relationships.csv" },
    { "type": "SKOS_HAS_TOP_CONCEPT", "filename": "stage_3_skos_has_top_concept_relationships.csv" },
    { "type": "SKOS_NARROWER", "filename": "stage_3_skos_narrower_relationships.csv" }
  ]

file_load(stage_3_files)


CALL apoc.import.csv( [{ fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_3_api_source_nodes.csv', labels: ['API_SOURCE'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_3_skos_concept_nodes.csv', labels: ['SKOS_CONCEPT'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_3_skos_concept_scheme_nodes.csv', labels: ['SKOS_CONCEPT_SCHEME'] }], [{ fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_3_from_source_relationships.csv', type: 'FROM_SOURCE' }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_3_skos_has_top_concept_relationships.csv', type: 'SKOS_HAS_TOP_CONCEPT' }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_3_skos_narrower_relationships.csv', type: 'SKOS_NARROWER' }], {stringIds: false}

Do a quick test on the CT. Check AGEU and its links

In [121]:
with driver.session() as session:
  query = """MATCH (cs)-[]->(c1:SKOS_CONCEPT)-[]->(c2) WHERE c1.notation = 'AGEU' RETURN DISTINCT cs.version as version, c1.identifier as cl_identifier, c2.identifier as cli_identifier, c2.notation as cli_notation""" 
  result = session.run(query, uri_data=uri_data)
  for record in result:
    print ("%s: %s, %s, %s" % (record["version"], record["cl_identifier"], record["cli_identifier"], record["cli_notation"]))
driver.close()


2021-12-17: C66781, C29846, MONTHS
2021-12-17: C66781, C25301, DAYS
2021-12-17: C66781, C29848, YEARS
2021-12-17: C66781, C29844, WEEKS
2021-12-17: C66781, C25529, HOURS


Stage 4 to 9 are the CT files for the other areas, ADaM, Protocol, CDASH etc. Not loaded at the moment. Needed to split due to size and limited RAM on the Neo4j server.

In [122]:
stage_10_files = [ 
    { "label": "OTHER_SOURCE", "filename": "stage_10_other_source_nodes.csv" },
    { "label": "BC_DATA_TYPE", "filename": "stage_10_bc_data_type_nodes.csv" },
    { "label": "BC_ITEM", "filename": "stage_10_bc_item_nodes.csv" },
    { "label": "BC_TEMPLATE", "filename": "stage_10_bc_template_nodes.csv" },
    { "type": "FROM_SOURCE", "filename": "stage_10_from_source_relationships.csv" },
    { "type": "HAS_DATA_TYPE", "filename": "stage_10_has_data_type_relationships.csv" },
    { "type": "HAS_IDENTIFIER", "filename": "stage_10_has_identifier_relationships.csv" },
    { "type": "HAS_ITEM", "filename": "stage_10_has_item_relationships.csv" }
  ]

file_load(stage_10_files)

CALL apoc.import.csv( [{ fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_10_other_source_nodes.csv', labels: ['OTHER_SOURCE'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_10_bc_data_type_nodes.csv', labels: ['BC_DATA_TYPE'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_10_bc_item_nodes.csv', labels: ['BC_ITEM'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_10_bc_template_nodes.csv', labels: ['BC_TEMPLATE'] }], [{ fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_10_from_source_relationships.csv', type: 'FROM_SOURCE' }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_10_has_data_type_relationships.csv', type: 'HAS_DATA_TYPE' }, { fileName: 'https://raw.githubusercontent.com/data4kno

Now add in the data types notes for the templates. Copy from the data types to the templates 

In [123]:
with driver.session() as session:
  query = """MATCH (m:BC_DATA_TYPE) RETURN m.uri as uri""" 
  result = session.run(query)
  for record in result:
    print(record["uri"])
    query = """MATCH (n:BC_DATA_TYPE{uri:'%s'})
      WITH n
      MATCH (dt:FHIR_DATA_TYPE{name: n.name})
      CREATE (n)-[:IS_A]->(dt)
      RETURN n.name as bc_name, dt.name as dt_name;
    """ % (record["uri"])
    inner_result = session.run(query)
    for inner_record in inner_result:
      print("%s, %s linked" % (inner_record["bc_name"], inner_record["dt_name"]))

driver.close()

uri_data = []
with driver.session() as session:
  query = """MATCH (n:BC_DATA_TYPE)-[:IS_A]->(dt:FHIR_DATA_TYPE) RETURN n.uri as bc_data_type, dt.uri as data_type""" 
  result = session.run(query)
  for record in result:
    query = """MATCH (n:BC_DATA_TYPE{uri:'%s'}), (dt:FHIR_DATA_TYPE{uri:'%s'})
      CALL apoc.path.subgraphAll(dt, {relationshipFilter:'HAS_DATA_TYPE_PROPERTY>'})
      YIELD nodes, relationships
      CALL apoc.refactor.cloneSubgraph(
        nodes,
        [rel in relationships WHERE type(rel) = 'HAS_DATA_TYPE_PROPERTY'],
        { standinNodes:[[dt, n]] })
      YIELD input, output, error
      RETURN output;
    """ % (record["bc_data_type"], record["data_type"])
    inner_result = session.run(query)
    for inner_record in inner_result:
      node = inner_record["output"]
      print(node)
      uri_data.append({"id": node.id, "uri": "%s#%s" % (record["bc_data_type"], node["name"]) })
    print ("Duplicated for: ", record["bc_data_type"])

driver.close()

with driver.session() as session:
  query = """UNWIND $uri_data AS d
    MATCH (n) WHERE ID(n)=d.id
    SET n.uri = d.uri;""" 
  result = session.run(query, uri_data=uri_data)
driver.close()
print ("URIs set.")

http://id.d4k.dk/dataset/bc_template/base_observation/test/coding
coding, coding linked
http://id.d4k.dk/dataset/bc_template/base_observation/position/coding
coding, coding linked
http://id.d4k.dk/dataset/bc_template/base_observation/site_of_administration/coding
coding, coding linked
http://id.d4k.dk/dataset/bc_template/base_observation/laterality/coding
coding, coding linked
http://id.d4k.dk/dataset/bc_template/base_observation/method/coding
coding, coding linked
http://id.d4k.dk/dataset/bc_template/base_observation/date_time/datetime
http://id.d4k.dk/dataset/bc_template/base_observation/result/quantity
quantity, quantity linked
http://id.d4k.dk/dataset/bc_template/base_observation/result/coding
coding, coding linked
http://id.d4k.dk/dataset/bc_template/base_laboratory/test/coding
coding, coding linked
http://id.d4k.dk/dataset/bc_template/base_laboratory/position/coding
coding, coding linked
http://id.d4k.dk/dataset/bc_template/base_laboratory/site_of_administration/coding
coding, co

In [124]:
stage_11_files = [ 
    { "label": "OTHER_SOURCE", "filename": "stage_11_other_source_nodes.csv" },
    { "label": "BC_VALUE_SET", "filename": "stage_11_bc_value_set_nodes.csv" },
    { "label": "BC_DATA_PROPERTY", "filename": "stage_11_bc_data_property_nodes.csv" },
    { "label": "BC_DATA_TYPE", "filename": "stage_11_bc_data_type_nodes.csv" },
    { "label": "BC_ITEM", "filename": "stage_11_bc_item_nodes.csv" },
    { "label": "BC_INSTANCE", "filename": "stage_11_bc_instance_nodes.csv" },
    { "type": "FROM_SOURCE", "filename": "stage_11_from_source_relationships.csv" },
    { "type": "HAS_RESPONSE", "filename": "stage_11_has_response_relationships.csv" },
    { "type": "HAS_DATA_PROPERTY", "filename": "stage_11_has_data_property_relationships.csv" },
    { "type": "HAS_DATA_TYPE", "filename": "stage_11_has_data_type_relationships.csv" },
    { "type": "HAS_IDENTIFIER", "filename": "stage_11_has_identifier_relationships.csv" },
    { "type": "HAS_ITEM", "filename": "stage_11_has_item_relationships.csv" }
  ]

file_load(stage_11_files)

CALL apoc.import.csv( [{ fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_11_other_source_nodes.csv', labels: ['OTHER_SOURCE'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_11_bc_value_set_nodes.csv', labels: ['BC_VALUE_SET'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_11_bc_data_property_nodes.csv', labels: ['BC_DATA_PROPERTY'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_11_bc_data_type_nodes.csv', labels: ['BC_DATA_TYPE'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_11_bc_item_nodes.csv', labels: ['BC_ITEM'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_11_bc_instance_nodes.csv', labels: ['BC_INSTANCE'] }], [{ fileName: 'https://raw.githubusercontent.com/data4kno

In [125]:
with driver.session() as session:
  query = """MATCH (n:BC_DATA_PROPERTY)-[:HAS_RESPONSE]->(d) RETURN n.name as name, d.cl as cl, d.cli as cli, d.uri as uri""" 
  result = session.run(query)
  for record in result:
    print ("%s: [%s, %s], uri=%s" % (record["name"], record["cl"], record["cli"], record["uri"]))
driver.close()

code: [c66741, c25208], uri=http://id.d4k.dk/dataset/bc_instance/weight/test/coding/code/c66741-c25208
code: [c66770, c28252], uri=http://id.d4k.dk/dataset/bc_instance/weight/result/quantity/code/c66770-c28252
code: [c66770, c48531], uri=http://id.d4k.dk/dataset/bc_instance/weight/result/quantity/code/c66770-c48531


Need to
- Link to the template
- Link the BC to any CT

In [127]:
 stage_12_files = [    
    { "label": "ENDPOINT", "filename": "stage_12_endpoint_nodes.csv" },
    { "label": "STUDY_DATA", "filename": "stage_12_study_data_nodes.csv" },
    { "label": "PROCEDURE", "filename": "stage_12_procedure_nodes.csv" },
    { "label": "ACTIVITY", "filename": "stage_12_activity_nodes.csv" },
    { "label": "WORKFLOW_ITEM", "filename": "stage_12_workflow_item_nodes.csv" },
    { "label": "VISIT", "filename": "stage_12_visit_nodes.csv" },
    { "label": "STUDY_CELL", "filename": "stage_12_study_cell_nodes.csv" },
    { "label": "RULE", "filename": "stage_12_rule_nodes.csv" },
    { "label": "STUDY_ELEMENT", "filename": "stage_12_study_element_nodes.csv" },
    { "label": "EPOCH", "filename": "stage_12_study_epoch_nodes.csv" },
    { "label": "STUDY_ARM", "filename": "stage_12_study_arm_nodes.csv" },
    { "label": "OBJECTIVE", "filename": "stage_12_objective_nodes.csv" },
    { "label": "CODE", "filename": "stage_12_code_nodes.csv" },
    { "label": "INVESTIGATIONAL_INTERVENTIONS", "filename": "stage_12_investigational_interventions_nodes.csv" },
    { "label": "POPULATION", "filename": "stage_12_population_nodes.csv" },
    { "label": "STUDY_DESIGN", "filename": "stage_12_study_design_nodes.csv" },
    { "label": "INDICATION", "filename": "stage_12_indication_nodes.csv" },
    { "label": "STUDY_PROTOCOL", "filename": "stage_12_study_protocol_nodes.csv" },
    { "label": "STUDY_PHASE", "filename": "stage_12_study_phase_nodes.csv" },
    { "label": "STUDY_TYPE", "filename": "stage_12_study_type_nodes.csv" },
    { "label": "STUDY_IDENTIFIER", "filename": "stage_12_study_identifier_nodes.csv" },
    { "label": "STUDY", "filename": "stage_12_study_nodes.csv" },
    { "type": "HAS_CODED", "filename": "stage_12_has_coded_relationships.csv" },
    { "type": "HAS_ENDPOINT", "filename": "stage_12_has_endpoint_relationships.csv" },
    { "type": "HAS_STUDY_DATA", "filename": "stage_12_has_study_data_relationships.csv" },
    { "type": "HAS_PROCEDURE", "filename": "stage_12_has_procedure_relationships.csv" },
    { "type": "HAS_PREVIOUS_ACTIVITY", "filename": "stage_12_has_previous_activity_relationships.csv" },
    { "type": "HAS_PREVIOUS_WORKFLOW", "filename": "stage_12_has_previous_workflow_relationships.csv" },
    { "type": "USED_IN_VISIT", "filename": "stage_12_used_in_visit_relationships.csv" },
    { "type": "HAS_ACTIVITY", "filename": "stage_12_has_activity_relationships.csv" },
    { "type": "HAS_VISIT", "filename": "stage_12_has_visit_relationships.csv" },
    { "type": "HAS_END_RULE", "filename": "stage_12_has_end_rule_relationships.csv" },
    { "type": "HAS_START_RULE", "filename": "stage_12_has_start_rule_relationships.csv" },
    { "type": "HAS_ELEMENT", "filename": "stage_12_has_element_relationships.csv" },
    { "type": "HAS_EPOCH", "filename": "stage_12_has_epoch_relationships.csv" },
    { "type": "HAS_ARM", "filename": "stage_12_has_arm_relationships.csv" },
    { "type": "HAS_CELL", "filename": "stage_12_has_cell_relationships.csv" },
    { "type": "HAS_OBJECTIVE", "filename": "stage_12_has_objective_relationships.csv" },
    { "type": "HAS_INDICATION", "filename": "stage_12_has_indication_relationships.csv" },
    { "type": "HAS_INVESTIGATIONAL_INTERVENTION", "filename": "stage_12_has_investigational_intervention_relationships.csv" },
    { "type": "HAS_POPULATION", "filename": "stage_12_has_population_relationships.csv" },
    { "type": "HAS_STUDY_DESIGN", "filename": "stage_12_has_study_design_relationships.csv" },
    { "type": "HAS_PROTOCOL", "filename": "stage_12_has_protocol_relationships.csv" },
    { "type": "HAS_STUDY_PHASE", "filename": "stage_12_has_study_phase_relationships.csv" },
    { "type": "HAS_STUDY_TYPE", "filename": "stage_12_has_study_type_relationships.csv" },
    { "type": "HAS_IDENTIFIER", "filename": "stage_12_has_identifier_relationships.csv" }
  ]

file_load(stage_12_files)

CALL apoc.import.csv( [{ fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_12_endpoint_nodes.csv', labels: ['ENDPOINT'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_12_study_data_nodes.csv', labels: ['STUDY_DATA'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_12_procedure_nodes.csv', labels: ['PROCEDURE'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_12_activity_nodes.csv', labels: ['ACTIVITY'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_12_workflow_item_nodes.csv', labels: ['WORKFLOW_ITEM'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data/stage_12_visit_nodes.csv', labels: ['VISIT'] }, { fileName: 'https://raw.githubusercontent.com/data4knowledge/biomedical_concepts/main/data

So, next ...
- Duplicate the BC and link to the study via the Study Data element
- We could link the Study to CT
- Add more BCs
- Add a second study