In [None]:
import pandas as pd
from py2neo import Graph
import os

# assign directory
directory = './phuse-scripts/data/adam/TDF_ADaM_v1.0/'
# giving file extension
ext = ('.xpt')
# create empty list to dataset names
dataset_list = list()

# iterate over all files with SAS extension
for files in os.listdir(directory):
    if files.endswith(ext):
        m=files.split(sep='.')[0]
        if type(m) != 'NoneType':
            # append dataset name to list
            dataset_list.append(m)
        # assign datasetname to dataframe name (not a good practice)
        globals()[m] = pd.read_sas(directory+files,format='xport') 
    else:
        continue

# neo4j connection
port = "7687"
user = "neo4j"
pswd = "adam"

# Make sure the database is started first, otherwise attempt to connect will fail
try:
    graph = Graph('bolt://localhost:'+port, auth=(user, pswd))
    print('SUCCESS: Connected to the Neo4j Database.')
except Exception as e:
    print('ERROR: Could not connect to the Neo4j Database. See console for details.')
    raise SystemExit(e)


In [8]:
# Creating patient nodes
for row in adsl.itertuples():
    graph.run('''
      CREATE (:Patient {USUBJID: $USUBJID, AGE: $AGE, ARM:$ARM, SEX: $SEX, BMI: $BMI})
    ''', parameters = {'USUBJID': row.USUBJID, 'AGE': row.AGE, 'ARM': row.ARM, 'SEX': row.SEX, 'BMI': row.BMIBL})

# Creating treatment Nodes
treatment = adsl.groupby('ARM')['USUBJID'].count()
for i, v in treatment.items():
    graph.run('''
      CREATE (t:Treatment {Name: $ARM})
    ''', parameters = {'ARM': i})
    
# creating patient-treatment relationship
for row in adsl.itertuples():
    graph.run(f'''
    MATCH (p:Patient {{USUBJID: '{str(row.USUBJID,'utf-8')}'}}), (t:Treatment {{Name:'{str(row.ARM,'utf-8')}'}}) 
    MERGE (p)-[r:WAS_TREATED]->(t)
    SET r.Dose={row.TRT01PN}
    ''')
# Creating Adverse Event Nodes
adverse = adae.groupby('AETERM')['USUBJID'].count()
for i, v in adverse.items():
    graph.run('''
      CREATE (ae:AdverseEvent {Term: $Term})
    ''', parameters = {'Term': i})
# Creating patient-adverseevent relationship
for row in adae.itertuples():
    graph.run(f'''
    MATCH (p:Patient {{USUBJID: $USUBJID}}), (ae:AdverseEvent {{Term: $Term}}) 
    MERGE (p)-[r:HAD_ADVERSE_EVENT]->(ae)
    SET r={{Severity: $Severity, Type: $Type}}
    ''', parameters = {'USUBJID': row.USUBJID,'Term': row.AETERM, 'Severity':row.AESEV, 'Type':row.AEBODSYS})
# Creating Visit Nodes
visits = adlbc.groupby('VISIT')['USUBJID'].count()
for i, v in visits.items():
    graph.run('''
      CREATE (v:Visit {Name: $Name})
    ''', parameters = {'Name': str(i,'utf-8')})  
# Creating patient-visit relationships
for row in adlbc.itertuples():
  graph.run(f'''
  MATCH (p:Patient {{USUBJID: $USUBJID}}), (v:Visit {{Name: $Visit}}) 
  MERGE (p)-[:HAD_VISIT]->(v)
  ''', parameters = {'USUBJID': row.USUBJID,'Visit': row.VISIT})
# creating chemistry laboratory parameters
for row in adlbc.itertuples():
  graph.run(f'''
  CREATE (p:Parameter {{USUBJID: $USUBJID, VISIT: $VISIT, Laboratory: $PARCAT1, Parameter: $PARAM, Value: $AVAL, Reference: $Reference, Dataset: 'adlbc'}})
  ''', parameters = {'USUBJID':row.USUBJID, 'VISIT':row.VISIT, 'PARAM':row.PARAM, 'AVAL': row.AVAL, 'Reference':row.LBNRIND, 'PARCAT1':row.PARCAT1})
# creating hematology parameters
for row in adlbh.itertuples():
  graph.run(f'''
  CREATE (p:Parameter {{USUBJID: $USUBJID, VISIT: $VISIT, Laboratory: $PARCAT1, Parameter: $PARAM, Value: $AVAL, Reference: $Reference, Dataset: 'adlbh'}})
  ''', parameters = {'USUBJID':row.USUBJID, 'VISIT':row.VISIT, 'PARAM':row.PARAM, 'AVAL': row.AVAL, 'Reference':row.LBNRIND, 'PARCAT1':row.PARCAT1})

# creating remaining visit nodes
visitshem = adlbh.groupby('VISIT')['USUBJID'].count()
for i, v in visitshem.items():
    graph.run('''
      MERGE (:Visit {Name: $Name})
    ''', parameters = {'Name': str(i,'utf-8')})
# creating chemistry-patient-visit relationships
for row in adlbc.itertuples():
  graph.run(f'''
  MATCH (p:Patient {{USUBJID: $USUBJID}}), (param:Parameter {{USUBJID: $USUBJID,Laboratory: $PARCAT1,Parameter: $PARAM, Value: $AVAL, Reference: $Reference, Dataset: 'adlbc'}}), (v:Visit {{Name: $Visit}})
  WHERE p.USUBJID=param.USUBJID AND param.VISIT=$Visit
  CREATE (p)-[lb:MEASURED_LABPARAMETER]->(param)<-[:MEASURED_IN_VISIT]-(v)
  SET lb={{ChangeFromBaseline: $ChangeFromBaseline}}
  ''', parameters = {'USUBJID': row.USUBJID,'Visit': row.VISIT, 'PARAM':row.PARAM, 'AVAL': row.AVAL, 'Reference':row.LBNRIND, 'PARCAT1':row.PARCAT1,'ChangeFromBaseline':row.CHG})
  # creating hematology-patient-visit relationships
for row in adlbh.itertuples():
  graph.run(f'''
  MATCH (p:Patient {{USUBJID: $USUBJID}}), (param:Parameter {{USUBJID: $USUBJID, VISIT: $Visit, Laboratory: $PARCAT1,Parameter: $PARAM, Value: $AVAL, Reference: $Reference, Dataset: 'adlbh'}}), (v:Visit {{Name: $Visit}})
  WHERE p.USUBJID=param.USUBJID AND param.VISIT=$Visit
  CREATE (p)-[lb:MEASURED_LABPARAMETER]->(param)<-[:MEASURED_IN_VISIT]-(v)
  SET lb={{ChangeFromBaseline: $ChangeFromBaseline}}
  ''', parameters = {'USUBJID': row.USUBJID,'Visit': row.VISIT, 'PARAM':row.PARAM, 'AVAL': row.AVAL, 'Reference':row.LBNRIND, 'PARCAT1':row.PARCAT1,'ChangeFromBaseline':row.CHG})
# creating remaining visit nodes
visitsvs = advs.groupby('VISIT')['USUBJID'].count()
for i, v in visitsvs.items():
    graph.run('''
      MERGE (:Visit {Name: $Name})
    ''', parameters = {'Name': str(i,'utf-8')})
# creating vital sign nodes
for row in advs.itertuples():
  graph.run(f'''
  CREATE (vs:VitalSign {{USUBJID: $USUBJID, VISIT: $VISIT, Laboratory: 'VS', Parameter: $PARAM, Value: $AVAL, Reference: '', Dataset: 'adlvs'}})
  ''', parameters = {'USUBJID':row.USUBJID, 'VISIT':row.VISIT, 'PARAM':row.PARAM, 'AVAL': row.AVAL})
# creating vitalsigns-patient-visit relationships
for row in advs.itertuples():
  graph.run(f'''
  MATCH (p:Patient {{USUBJID: $USUBJID}}), (param:VitalSign {{USUBJID: $USUBJID, VISIT: $Visit, Laboratory: 'VS', Parameter: $PARAM, Value: $AVAL, Reference: '', Dataset: 'adlvs'}}), (v:Visit {{Name: $Visit}})
  WHERE p.USUBJID=param.USUBJID AND param.VISIT=$Visit
  CREATE (p)-[vs:MEASURED_VITALSIGN]->(param)<-[:MEASURED_IN_VISIT]-(v)
  SET vs={{ChangeFromBaseline: $ChangeFromBaseline}}
  ''', parameters = {'USUBJID': row.USUBJID,'Visit': row.VISIT, 'PARAM':row.PARAM, 'AVAL': row.AVAL, 'ChangeFromBaseline':row.CHG})
  # creating remaining visit nodes
visitsendpoint = adadas.groupby('VISIT')['USUBJID'].count()
for i, v in visitsendpoint.items():
    graph.run('''
      MERGE (:Visit {Name: $Name})
    ''', parameters = {'Name': str(i,'utf-8')})
# creating ADAS nodes
for row in adadas.itertuples():
  graph.run(f'''
  CREATE (e:Endpoint {{USUBJID: $USUBJID, VISIT: $VISIT, EndpointName: 'ADAS-Cog', Parameter: $PARAM, Value: $AVAL, Reference: '', Dataset: 'adadas'}})
  ''', parameters = {'USUBJID':row.USUBJID, 'VISIT':row.VISIT, 'PARAM':row.PARAM, 'AVAL': row.AVAL})
# creating ADAS-patient-visit relationships
for row in adadas.itertuples():
  graph.run(f'''
  MATCH (p:Patient {{USUBJID: $USUBJID}}), (end:Endpoint {{USUBJID: $USUBJID, VISIT: $Visit, EndpointName: 'ADAS-Cog', Parameter: $PARAM, Value: $AVAL, Reference: '', Dataset: 'adadas'}}), (v:Visit {{Name: $Visit}})
  WHERE p.USUBJID=end.USUBJID AND end.VISIT=$Visit
  CREATE (p)-[endrel:ASSESSED_ENDPOINT]->(end)<-[:MEASURED_IN_VISIT]-(v)
  SET endrel={{ChangeFromBaseline: $ChangeFromBaseline}}
  ''', parameters = {'USUBJID': row.USUBJID,'Visit': row.VISIT, 'PARAM':row.PARAM, 'AVAL': row.AVAL, 'ChangeFromBaseline':row.CHG})
# creating remaining visit nodes
visitsendpoint = adcibc.groupby('VISIT')['USUBJID'].count()
for i, v in visitsendpoint.items():
    graph.run('''
      MERGE (:Visit {Name: $Name})
    ''', parameters = {'Name': str(i,'utf-8')})
# creating CIBC nodes
for row in adcibc.itertuples():
  graph.run(f'''
  CREATE (e:Endpoint {{USUBJID: $USUBJID, VISIT: $VISIT, EndpointName: 'CIBC Score', Parameter: $PARAM, Value: $AVAL, Reference: '', Dataset: 'adcibc'}})
  ''', parameters = {'USUBJID':row.USUBJID, 'VISIT':row.VISIT, 'PARAM':row.PARAM, 'AVAL': row.AVAL})
for row in adcibc.itertuples():
  graph.run(f'''
  MATCH (p:Patient {{USUBJID: $USUBJID}}), (end:Endpoint {{USUBJID: $USUBJID, VISIT: $Visit, EndpointName: 'CIBC Score', Parameter: $PARAM, Value: $AVAL, Reference: '', Dataset: 'adcibc'}}), (v:Visit {{Name: $Visit}})
  WHERE p.USUBJID=end.USUBJID AND end.VISIT=$Visit
  CREATE (p)-[endrel:ASSESSED_ENDPOINT]->(end)<-[:MEASURED_IN_VISIT]-(v)
  SET endrel={{ChangeFromBaseline:0}}
  ''', parameters = {'USUBJID': row.USUBJID,'Visit': row.VISIT, 'PARAM':row.PARAM, 'AVAL': row.AVAL})