#Import Python libraries
##rdflib - https://pypi.python.org/pypi/rdflib

In [1]:
import os
import rdflib as rdf
#import csv for reading csv files
import csv

#Create new RDF graph

In [2]:
g = rdf.Graph()

#Add namespaces
## Add a namespace for each one in the object model

In [3]:
nidm = rdf.Namespace("http://nidm.nidash.org/")
prov = rdf.Namespace("http://www.w3.org/ns/prov#")
ncit = rdf.Namespace("http://ncitt.ncit.nih.gov/")
nidash = rdf.Namespace("http://purl.org/nidash/nidm/")
fbirn = rdf.Namespace("http://www.birncommunity.org/collaborators/function-birn/")
list(g.namespaces())

[('xml', rdflib.term.URIRef(u'http://www.w3.org/XML/1998/namespace')),
 ('rdf', rdflib.term.URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#')),
 ('xsd', rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#')),
 ('rdfs', rdflib.term.URIRef(u'http://www.w3.org/2000/01/rdf-schema#'))]

#Bind namespaces to graph

In [4]:
g.bind('nidm', nidm)
g.bind('prov', prov)
g.bind('ncit', ncit)
g.bind('nidash', nidash)
g.bind('fbirn', fbirn)
list(g.namespaces())

[('xml', rdflib.term.URIRef(u'http://www.w3.org/XML/1998/namespace')),
 ('fbirn',
  rdflib.term.URIRef(u'http://www.birncommunity.org/collaborators/function-birn/')),
 ('ncit', rdflib.term.URIRef(u'http://ncitt.ncit.nih.gov/')),
 ('rdfs', rdflib.term.URIRef(u'http://www.w3.org/2000/01/rdf-schema#')),
 ('prov', rdflib.term.URIRef(u'http://www.w3.org/ns/prov#')),
 ('nidash', rdflib.term.URIRef(u'http://purl.org/nidash/nidm/')),
 ('nidm', rdflib.term.URIRef(u'http://nidm.nidash.org/')),
 ('rdf', rdflib.term.URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#')),
 ('xsd', rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#'))]

    #Create function to create element nodes

In [5]:
def nidm_add_elements(g,line,assessment_name):
    #Create new node for each element with metadata consistent with NIDM-Experiment assessment acquistion
    g.add((nidash[assessment_name+line["SCORENAME"]], rdf.RDF.type, nidash["DataElement"]))
    g.add((nidash[assessment_name+line["SCORENAME"]], rdf.RDF.type, rdf.RDF.Property))
    g.add((nidash[assessment_name+line["SCORENAME"]], rdf.RDF.type, rdf.RDF.Property))
    g.add((nidash[assessment_name+line["SCORENAME"]], prov["label"], rdf.Literal(line["SCORENAME"])))
    g.add((nidash[assessment_name+line["SCORENAME"]], nidash["DataType"], rdf.Literal(line["SCORETYPE"])))
    g.add((nidash[assessment_name+line["SCORENAME"]], nidash["Question"], rdf.Literal(line["QUESTION"])))
    g.add((nidash[assessment_name+line["SCORENAME"]], nidash["Sequence"], rdf.Literal(line["SCORESEQ"])))   
    #make association with assessment
    g.add((nidash[assessment_name],prov["hadMember"], nidash[assessment_name+line["SCORENAME"]]))

In [6]:
def nidm_create_assessment(g, assessment_name):
    g.add((nidash[assessment_name], rdf.RDF.type, nidash["DataStructure"]))
    g.add((nidash[assessment_name],prov["label"],rdf.Literal(assessment_name)))

In [7]:
def nidm_add_codedproperty(g,line,valueset_id, codedvalue_id):
    g.add((nidash[codedvalue_id], rdf.RDF.type, nidash["CodedProperty"]))
    g.add((nidash[codedvalue_id], rdf.RDF.type, rdf.RDF.Property))
    g.add((nidash[codedvalue_id],nidash["code"], rdf.Literal(line["SCORECODE"])))
    g.add((nidash[codedvalue_id], prov["label"], rdf.Literal(line["SCORELABEL"])))
    g.add((nidash[valueset_id], prov["hadMember"], nidash[codedvalue_id]))
    

#Read in CSV file and convert to RDF graph

In [8]:
import pandas as pd
variables = pd.read_csv("FBIRNPhaseII_DEMOGRAPHICS_Variables.csv")
value_sets = pd.read_csv("FBIRNPhaseII_DEMOGRAPHICS_ValueSets.csv")
assessment_name = "FBIRNPhaseIIDemographics"
#iterate over the variables in the data dictionary
for index, row in variables.iterrows():
    #add elements to RDF graph for variable definitions
    nidm_add_elements(g,row,assessment_name) 
    #print "looking for " + row['SCORENAME']
    #look for value sets in data dictionary that match the variable
    query_sets = value_sets[value_sets['SCORENAME'].str.contains(row['SCORENAME'])]
    #iterate over the value set rows and add to RDF graph
    for query_index, query_row in query_sets.iterrows():
        #print query_row['SCORECODE'] + "," + query_row['SCORELABEL'] 
        #add attribute to coded data element for valueset
        valueset_id = assessment_name+row["SCORENAME"]+"ValueSet"
        g.add((nidash[assessment_name+row["SCORENAME"]], nidash["ValueSet"], nidash[valueset_id]))
        #create value set collection
        g.add((nidash[valueset_id], rdf.RDF.type, prov["Collection"]))
        g.add((nidash[valueset_id], rdf.RDF.type, nidash["ValueSet"]))
        nidm_add_codedproperty(g, query_row, valueset_id ,row["SCORENAME"] + "_" + query_row["SCORECODE"] )
        


In [9]:
print g.serialize(format='turtle')

@prefix fbirn: <http://www.birncommunity.org/collaborators/function-birn/> .
@prefix ncit: <http://ncitt.ncit.nih.gov/> .
@prefix nidash: <http://purl.org/nidash/nidm/> .
@prefix nidm: <http://nidm.nidash.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

nidash:FBIRNPhaseIIDemographics prov:hadMember nidash:FBIRNPhaseIIDemographicsAge,
        nidash:FBIRNPhaseIIDemographicsEducation,
        nidash:FBIRNPhaseIIDemographicsEthnicity,
        nidash:FBIRNPhaseIIDemographicsFathersEducation,
        nidash:FBIRNPhaseIIDemographicsFathersOccupation,
        nidash:FBIRNPhaseIIDemographicsGender,
        nidash:FBIRNPhaseIIDemographicsHandedness,
        nidash:FBIRNPhaseIIDemographicsLivingArrangement,
        nidash:FBIRNPhaseIIDemographicsMaritalStatus,
        nid

#Save RDF Turtle file to disk

In [10]:
with open("FBIRNPhaseII_DEMOGRAPHICS_DataDictionary.ttl",'w') as f:
    f.write(g.serialize(format='turtle'))

#Create RDF graph from DOT file

In [11]:
from rdflib.tools import rdf2dot
with open("FBIRNPhaseII_DEMOGRAPHICS_DataDictionary.dot",'w') as f:
    s = rdf2dot.rdf2dot(g, f)


In [12]:
import pydot
g = pydot.graph_from_dot_file("FBIRNPhaseII_DEMOGRAPHICS_DataDictionary.dot")
g.write_png('"FBIRNPhaseII_DEMOGRAPHICS_DataDictionary.png')

True