#Import Python libraries
##rdflib - https://pypi.python.org/pypi/rdflib

In [1]:
import os
import rdflib as rdf
#import csv for reading csv files
import csv
from rdflib.namespace import XSD
import pandas as pd
import uuid

#Create new RDF graph

In [2]:
g = rdf.Graph()

#Add namespaces
## Add a namespace for each one in the object model

In [3]:
prov = rdf.Namespace("http://www.w3.org/ns/prov#")
ncit = rdf.Namespace("http://ncitt.ncit.nih.gov/")
nidash = rdf.Namespace("http://purl.org/nidash/nidm/")
xnat = rdf.Namespace("https://www.nitrc.org/projects/fcon_1000/")
rdfs = rdf.Namespace("http://www.w3.org/2000/01/rdf-schema#")
dicom = rdf.Namespace("http://neurolex.org/wiki/Category:DICOM_term/")
dct = rdf.Namespace("http://purl.org/dc/terms/")
dctypes = rdf.Namespace("http://purl.org/dc/dcmitype/")
dcat = rdf.Namespace("http://www.w3.org/ns/dcat#")

#Bind namespaces to graph

In [4]:
g.bind('prov', prov)
g.bind('ncit', ncit)
g.bind('nidash', nidash)
g.bind('xnat',xnat)
g.bind('dicom',dicom)
g.bind('dct',dct)
g.bind('dctypes', dctypes)
g.bind('dcat', dcat)
list(g.namespaces())

[('xml', rdflib.term.URIRef(u'http://www.w3.org/XML/1998/namespace')),
 ('ncit', rdflib.term.URIRef(u'http://ncitt.ncit.nih.gov/')),
 ('rdfs', rdflib.term.URIRef(u'http://www.w3.org/2000/01/rdf-schema#')),
 ('prov', rdflib.term.URIRef(u'http://www.w3.org/ns/prov#')),
 ('nidash', rdflib.term.URIRef(u'http://purl.org/nidash/nidm/')),
 ('dctypes', rdflib.term.URIRef(u'http://purl.org/dc/dcmitype/')),
 ('rdf', rdflib.term.URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#')),
 ('dcat', rdflib.term.URIRef(u'http://www.w3.org/ns/dcat#')),
 ('xsd', rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#')),
 ('xnat', rdflib.term.URIRef(u'https://www.nitrc.org/projects/fcon_1000/')),
 ('dct', rdflib.term.URIRef(u'http://purl.org/dc/terms/')),
 ('dicom',
  rdflib.term.URIRef(u'http://neurolex.org/wiki/Category:DICOM_term/'))]

   #Create function to create element nodes

In [5]:
def nidm_add_assessment_data(g,row,acq_assessment_uri, assessment_datadict_uri, data_dictionary):
    g.add((nidash[acq_assessment_uri], rdf.RDF.type, nidash["AssessmentAcquisitionObject"]))
    g.add((nidash[acq_assessment_uri], rdf.RDF.type, prov["Entity"]))
    g.add((nidash[acq_assessment_uri], rdf.RDF.type, nidash[assessment_uri]))
    
    #for each variable in the data dictionary, store it's acquired value from this data acquisition
    for key in data_dictionary:
        g.add((nidash[acq_assessment_uri], nidash[data_dictionary[key]], rdf.Literal(row[key])))
 

In [6]:
def nidm_create_assessment(g, assessment_uri, assessment_namem, description):
    g.add((nidash[assessment_uri], rdf.RDF.type, nidash["DataStructure"]))
    g.add((nidash[assessment_uri], rdf.RDF.type, prov["Collection"]))
    g.add((nidash[assessment_uri],prov["label"],rdf.Literal(assessment_name)))
    g.add((nidash[assessment_uri],prov["description"],rdf.Literal(description)))

In [7]:
def nidm_add_codedproperty(g,valueset_id, codedvalue_id, code, label):
    g.add((nidash[codedvalue_id], rdf.RDF.type, nidash["CodedProperty"]))
    g.add((nidash[codedvalue_id], rdf.RDF.type, rdf.RDF.Property))
    g.add((nidash[codedvalue_id],nidash["code"], rdf.Literal(code)))
    g.add((nidash[codedvalue_id], prov["label"], rdf.Literal(label)))
    g.add((nidash[valueset_id], prov["hadMember"], nidash[codedvalue_id]))

In [8]:
def nidm_create_investigation(g, uid, expid,name):
    g.add((nidash[uid], rdf.RDF.type, dctypes["Dataset"]))
    g.add((nidash[uid], rdf.RDF.type, nidash["Investigation"]))
    g.add((nidash[uid], rdf.RDF.type, prov["Entity"]))
    g.add((nidash[uid], xnat["ProjectID"], rdf.Literal(name, lang='en')))
def nidm_add_investigation_metadata(g, uid, name, description, baseuri):
    g.add((nidash[uid], dct["title"], rdf.Literal(name, lang='en')))
    g.add((nidash[uid], dct["description"], rdf.Literal(description, lang='en')))
    g.add((nidash[uid], dct["publisher"], rdf.URIRef(baseuri)))
    g.add((nidash[uid], dcat["accessURL"], rdf.URIRef(baseuri)))
def nidm_add_session_person(g, uid, subjid, session_activity_uid, session_collection_uid):
    g.add((nidash[uid], rdf.RDF.type, prov["Person"]))
    g.add((nidash[uid], ncit["subjectID"], rdf.Literal(subjid)))
    nidm_add_role(g, nidash["Participant"], "Participant")
    g.add((nidash[uid], prov["hadRole"], nidash["Participant"]))
def nidm_add_role(g, uid, role):
    g.add((uid, rdf.RDF.type, prov["Role"]))
    g.add((uid, prov["label"], rdf.Literal(role)))

#Read in CSV file and convert to RDF graph

In [9]:
#load spreadsheet
filename_prefix = "SimpleData"
xls = pd.ExcelFile(filename_prefix+".xlsx")
variables = xls.parse(0)

#Create simple data dictionary

In [10]:



assessment_name="SimpleData"
assessment_uri = assessment_name + "_" + str(uuid.uuid1())

#create assessment object for data dictionary
nidm_create_assessment(g,assessment_uri, assessment_name, "XNAT query response document")


#create a simple data dictionary to describe this spreadsheet
#SimpleData has only age, gender, and handedness for interesting assessment measures
#Age
g.add((nidash[assessment_uri+"_Age"], rdf.RDF.type, nidash["DataElement"]))
g.add((nidash[assessment_uri+"_Age"], rdf.RDF.type, rdf.RDF.Property))
g.add((nidash[assessment_uri+"_Age"], rdf.RDF.type, ncit["Age"]))
g.add((nidash[assessment_uri+"_Age"], prov["label"], rdf.Literal("Age")))
g.add((nidash[assessment_uri+"_Age"], nidash["DataType"], rdf.Literal(XSD.integer)))
#make association with assessment
g.add((nidash[assessment_uri],prov["hadMember"], nidash[assessment_uri+"_Age"]))
#Gender, a coded data element
g.add((nidash[assessment_uri+"_Gender"], rdf.RDF.type, nidash["CodedDataElement"]))
g.add((nidash[assessment_uri+"_Gender"], rdf.RDF.type, rdf.RDF.Property))
g.add((nidash[assessment_uri+"_Gender"], rdf.RDF.type, ncit["Gender"]))
g.add((nidash[assessment_uri+"_Gender"], prov["label"], rdf.Literal("Gender")))
g.add((nidash[assessment_uri+"_Gender"], nidash["DataType"], rdf.Literal(XSD.string)))
g.add((nidash[assessment_uri+"_Gender"], nidash["ValueSet"], nidash[assessment_uri+"_Gender_ValueSet"]))
#create gender ValueSet collection
g.add((nidash[assessment_uri+"_Gender_ValueSet"], rdf.RDF.type, prov["Collection"]))
g.add((nidash[assessment_uri+"_Gender_ValueSet"], rdf.RDF.type, nidash["ValueSet"]))
#make association with assessment
g.add((nidash[assessment_uri],prov["hadMember"], nidash[assessment_uri+"_Gender"]))

#create coded-property for Genders
#get unique geneders from data frame
genders = variables['M/F'].unique()
#for each unique gender create a coded property
for gender in genders:
    if gender=='M':
        label='Male'
    else:
        label='Female'
    nidm_add_codedproperty(g,assessment_uri+"_Gender_ValueSet", assessment_uri+"_Gender_"+gender, gender, label)

#Handedness, another valueset
g.add((nidash[assessment_uri+"_Hand"], rdf.RDF.type, nidash["CodedDataElement"]))
g.add((nidash[assessment_uri+"_Hand"], rdf.RDF.type, rdf.RDF.Property))
g.add((nidash[assessment_uri+"_Hand"], rdf.RDF.type, ncit["HandDom"]))
g.add((nidash[assessment_uri+"_Hand"], prov["label"], rdf.Literal("Handedness")))
g.add((nidash[assessment_uri+"_Hand"], nidash["DataType"], rdf.Literal(XSD.string)))
g.add((nidash[assessment_uri+"_Hand"], nidash["ValueSet"], nidash[assessment_uri+"_Hand_ValueSet"]))
#make association with assessment
g.add((nidash[assessment_uri],prov["hadMember"], nidash[assessment_uri+"_Hand"]))
#create coded-property for Handedness
#create hand ValueSet collection
g.add((nidash[assessment_uri+"_Hand_ValueSet"], rdf.RDF.type, prov["Collection"]))
g.add((nidash[assessment_uri+"_Hand_ValueSet"], rdf.RDF.type, nidash["ValueSet"]))
#get unique hands from data frame
hands = variables['Hand'].unique()
#for each unique gender create a coded property
for hand in hands:
      nidm_add_codedproperty(g,assessment_uri+"_Hand_ValueSet", assessment_uri+"_Hand_"+hand, hand, hand)

#save SimpleData data dictionary variable URIs with column header mappings for modeling acquired data
data_dict = {'Age': assessment_uri+"_Age", 'Hand' : assessment_uri+"_Hand", 'M/F': assessment_uri+"_Gender"}

    

#Parse/model imaging and acquired assessment data from file

In [11]:
#create investigation dictionary in case there are multiple projects in spreadsheet
#create investigation collection and activity dictionaries as well
inv_dict = {}
inv_collect_dict = {}
inv_activity_dict = {}
scanner_dict = {}

#create subject dictionary in case there are multiple measurements from a subject
subj_dict = {}

#iterate over the variables in the data file
for index, row in variables.iterrows():

    #if this is a new investigation then add the UUID to dictionary and create entity
    if not inv_dict.has_key(row["Project"]):
        #create investigation UUID
        inv_uri = "Investigation_" + str(uuid.uuid1())
        #keep UUID -> uniqueid mappings for experiments in dictionary in case there are multiple experiments
        inv_dict[row["Project"]] = inv_uri
        #add investigation using HID experiment ID
        nidm_create_investigation(g, inv_uri,inv_dict[row["Project"]],row["Project"])
        #add other metadata
        nidm_add_investigation_metadata(g, inv_uri, row["Project"], "1000 Functional Connectomes Project (FCP)", "http://fcon_1000.projects.nitrc.org/")

        #create an investigation collection / investigation activity
        collect_uri = "InvestigationCollection_" + str(uuid.uuid1())
        inv_collect_dict[row["Project"]] = collect_uri
        #add collection/activities to graph
        g.add((nidash[collect_uri], rdf.RDF.type, prov["Collection"]))
        #add label for debugging/model evaluation
        g.add((nidash[collect_uri],prov["label"], rdf.Literal("Investigation Collection")))
        #associate investigation activity with collection
        g.add((nidash[inv_uri], prov["wasAssociatedWith"], nidash[collect_uri]))
    else:
        inv_uri = inv_dict[row["Project"]]
       
    activity_uri = "AssessmentCollectionActivity_"+str(uuid.uuid1())
    inv_activity_dict[row["Project"]] = activity_uri
    #add collection/activities to graph
    g.add((nidash[activity_uri], rdf.RDF.type, prov["Activity"]))
    #add label for debugging/model evaluation
    g.add((nidash[activity_uri],prov["label"], rdf.Literal("Assessment Data Collection Activity")))
    #associate assessment collection activity with investigation collection
    g.add((nidash[collect_uri],prov["hadMember"],nidash[activity_uri]))
    
    #add elements to RDF graph for variable definitions
    acq_assessment_uri = assessment_name + "_" + str(uuid.uuid1())
    nidm_add_assessment_data(g,row,acq_assessment_uri, assessment_uri, data_dict) 
    
    #associate assessment data with project acquisition activity
    g.add((nidash[acq_assessment_uri], prov["wasGeneratedBy"], nidash[activity_uri]))
    
    #if new subject add subject entity and link to project collection / activity
    if not subj_dict.has_key(row["Subject"]):
        #get subjectID and store in NIDM-Experiment graph
        subj_uid = row["Subject"] + "_" + str(uuid.uuid1())
        subj_dict[row["Subject"]] = subj_uid
        nidm_add_session_person(g,subj_uid, row["Subject"], inv_activity_dict[row["Project"]],inv_collect_dict[row["Project"]])
        #associate person with these session collections/activities
        g.add((nidash[inv_collect_dict[row["Project"]]], prov["wasAttributedTo"], nidash[subj_uid]))
        g.add((nidash[inv_activity_dict[row["Project"]]], prov["wasAssociatedWith"], nidash[subj_uid]))
    else:
        subj_uid = subj_dict[row["Subject"]]
     
    #associate subject information with acquisition activity
    #g.add((nidash[activity_uri],prov["Used"], nidash[subj_dict[row["Subject"]]]))
    
    #associate assessment data with investigation collection
    g.add((nidash[inv_collect_dict[row["Project"]]],prov["hadMember"], nidash[acq_assessment_uri]))
    
    
    #Imaging data
    #create acquisition activity
    activity_uri = "ImagingCollectionActivity_"+str(uuid.uuid1())
    g.add((nidash[activity_uri], rdf.RDF.type, prov["Activity"]))
    #associate with investigation collection
    g.add((nidash[inv_collect_dict[row["Project"]]],prov["hadMember"], nidash[activity_uri]))
    #create scanner agent if not already created, note scanners key'd by Site+FieldStrength
    if not scanner_dict.has_key(row["Acquisition Site"]+'_'+row["Field Strength"]):
        scanner_uri = row["Acquisition Site"]+'_'+row["Field Strength"]+'_'+str(uuid.uuid1())
        scanner_dict[row["Acquisition Site"]+'_'+row["Field Strength"]] = scanner_uri
        g.add((nidash[scanner_uri],rdf.RDF.type, prov["Agent"]))
        #add scanner metadata
        g.add((nidash[scanner_uri],dicom["MagneticFieldStrength"],rdf.Literal(row["Field Strength"])))
        g.add((nidash[scanner_uri],ncit["SiteID"], rdf.Literal(row["Acquisition Site"])))
    #associate scanner agent with acquisition object
    g.add((nidash[activity_uri],prov["Used"], nidash[scanner_dict[row["Acquisition Site"]+'_'+row["Field Strength"]]]))
    #associate subject agent with acquisition object
    g.add((nidash[activity_uri],prov["wasAssociatedWith"], nidash[subj_dict[row["Subject"]]]))
    
    #entity for image
    image_uri = "MRImage_"+str(uuid.uuid1())
    g.add((nidash[image_uri], rdf.RDF.type, prov["Entity"]))
    g.add((nidash[image_uri], prov["atLocation"], rdf.URIRef(row["File Path"])))
    #associate image acquisition entity with acquisition activity
    g.add((nidash[image_uri], prov["wasGeneratedBy"], nidash[activity_uri]))

In [12]:
print g.serialize(format='turtle')

@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix dctypes: <http://purl.org/dc/dcmitype/> .
@prefix dicom: <http://neurolex.org/wiki/Category:DICOM_term/> .
@prefix ncit: <http://ncitt.ncit.nih.gov/> .
@prefix nidash: <http://purl.org/nidash/nidm/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xnat: <https://www.nitrc.org/projects/fcon_1000/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

nidash:Investigation_22015d61-5148-11e6-b205-6c4008b8f03e a dctypes:Dataset,
        nidash:Investigation,
        prov:Entity ;
    dct:description "1000 Functional Connectomes Project (FCP)"@en ;
    dct:publisher <http://fcon_1000.projects.nitrc.org/> ;
    dct:title "fcon_1000"@en ;
    dcat:accessURL <http://fcon_1000.projects.nitrc.org/> ;
    prov:wasAssociatedWith 

#Save RDF Turtle file to disk

In [13]:
with open(filename_prefix+".ttl",'w') as f:
    f.write(g.serialize(format='turtle'))

#Create RDF graph from DOT file

In [14]:
from rdflib.tools import rdf2dot
with open(filename_prefix+".dot",'w') as f:
    s = rdf2dot.rdf2dot(g, f)

In [19]:
import pydot
g = pydot.graph_from_dot_file(filename_prefix+".dot")
g.write_png(filename_prefix+'.png')

1.0.29


#Sample Queries

In [16]:
import rdflib as rdf 
#reload data
g=rdf.Graph().parse("SimpleData.ttl",format='turtle')

In [17]:
#get image URLs for subjects Age > 12
qres = g.query (
    """select ?age ?img_loc
    where {
        ?agedd a ncit:Age .
        ?entity ?agedd ?age ;
            prov:wasGeneratedBy/prov:wasAssociatedWith ?subject .
        ?img_entity prov:wasGeneratedBy/prov:wasAssociatedWith ?subject ;
            prov:atLocation ?img_loc .
        filter (?age > 12)     
    }
    order by ?age
    """)
print("Age \t Image URL\n")
for row in qres:
    print ("%s \t %s" % row)

Age 	 Image URL

13 	 http://www.nitrc.org/ir/data/experiments/xnat_E01002/scans/anat_mprage_anonymized/resources/7062/files/scan_mprage_anonymized.nii.gz
13 	 http://www.nitrc.org/ir/data/experiments/xnat_E00016/scans/anat_mprage_anonymized/resources/11956/files/scan_mprage_anonymized.nii.gz
13 	 http://www.nitrc.org/ir/data/experiments/xnat_E00013/scans/anat_mprage_anonymized/resources/11948/files/scan_mprage_anonymized.nii.gz
13 	 http://www.nitrc.org/ir/data/experiments/xnat_E00975/scans/anat_mprage_anonymized/resources/6981/files/scan_mprage_anonymized.nii.gz
13 	 http://www.nitrc.org/ir/data/experiments/xnat_E00005/scans/anat_mprage_anonymized/resources/11924/files/scan_mprage_anonymized.nii.gz
14 	 http://www.nitrc.org/ir/data/experiments/xnat_E00011/scans/anat_mprage_anonymized/resources/11942/files/scan_mprage_anonymized.nii.gz
14 	 http://www.nitrc.org/ir/data/experiments/xnat_E00981/scans/anat_mprage_anonymized/resources/6999/files/scan_mprage_anonymized.nii.gz
14 	 http://w

In [18]:
#Get Handedness, Gender, Age, and Image URL for subjects scanned in AnnArbor
qres = g.query (
    """select ?site ?subject ?age ?gender ?hand
    where {
        ?age_type a ncit:Age .
        ?gender_type a ncit:Gender .
        ?hand_type a ncit:HandDom .
        ?entity ?age_type ?age ;
            ?gender_type ?gender ;
            ?hand_type ?hand ;
            prov:wasGeneratedBy/prov:wasAssociatedWith ?subject_entity .
        ?subject_entity ncit:subjectID ?subject .
        ?activity prov:wasAssociatedWith ?subject_entity ;
            prov:Used ?site_entity .
        ?site_entity ncit:SiteID ?site .
        filter (?site = 'AnnArbor')
    } """)

print("Site \t\t SubjectID \t\t Age \t Gender \t Hand \n")
for row in qres:
    print ("%s \t %s  \t %s \t %s \t %s" % row)

Site 		 SubjectID 		 Age 	 Gender 	 Hand 

AnnArbor 	 AnnArbor_sub49687  	 13 	 M 	 left
AnnArbor 	 AnnArbor_sub82334  	 14 	 M 	 right
AnnArbor 	 AnnArbor_sub20317  	 15 	 M 	 right
AnnArbor 	 AnnArbor_sub38614  	 14 	 M 	 right
AnnArbor 	 AnnArbor_sub87745  	 14 	 M 	 right
AnnArbor 	 AnnArbor_sub86367  	 15 	 M 	 right
AnnArbor 	 AnnArbor_sub96621  	 15 	 M 	 right
AnnArbor 	 AnnArbor_sub46727  	 13 	 F 	 right
AnnArbor 	 AnnArbor_sub16960  	 13 	 M 	 right
