# An example of integrating two RDF datasets
- Read in the original datasets converted to RDF
- Create a mapping between a few measures in in each file
- Query the integrated dataset

In [1]:
import os
import rdflib as rdf
#import csv for reading csv files
import csv

#Read in Datasets Expressed in Turtle Format

In [2]:
abide_graph = rdf.Graph()
abide_graph_parse = abide_graph.parse('../abide/ABIDE_to_NIDM_RDFLib.ttl', format='turtle')

fbirn_graph = rdf.Graph()
fbirn_graph_parse = fbirn_graph.parse('../fbirn/FBIRN_to_NIDM_RDFLib.ttl', format='turtle')

#SPARQL query for FIQ measures across graphs
- Step 1: Figure out that ABIDE_FIQ and FBIRN FSIQ are the same thing based on looking up the terms
- Step 2: Create a union graph
- Step 3: Perform SPARQL Query

In [3]:
#Step 2:
union_graph = abide_graph_parse + fbirn_graph_parse

In [4]:
#step 3a: Add namespaces from each graph
nidm = rdf.URIRef("http://nidm.nidash.org/")
prov = rdf.URIRef("http://www.w3.org/ns/prov#")
ncit = rdf.URIRef("http://ncitt.ncit.nih.gov/")
nidash = rdf.URIRef("http://purl.org/nidash/nidm/")
fbirn = rdf.URIRef("http://www.birncommunity.org/collaborators/function-birn/")
abide = rdf.URIRef("http://fcon_1000.projects.nitrc.org/indi/abide/")

In [5]:
#Step 3:
qres = union_graph.query(
    """SELECT DISTINCT *
       WHERE {
          ?agent a prov:Agent ;
          ncit:subjectID ?subjectid .
       }""")

for row in qres:
    print("%s has_subject_id %s" % row)

303851049 has_subject_id http://purl.org/nidash/nidm/agent_303851049
CMU_50646 has_subject_id http://purl.org/nidash/nidm/agent_CMU_50646
CMU_50643 has_subject_id http://purl.org/nidash/nidm/agent_CMU_50643
301882920 has_subject_id http://purl.org/nidash/nidm/agent_301882920
CMU_50647 has_subject_id http://purl.org/nidash/nidm/agent_CMU_50647
303269784 has_subject_id http://purl.org/nidash/nidm/agent_303269784
CMU_50645 has_subject_id http://purl.org/nidash/nidm/agent_CMU_50645
305738011 has_subject_id http://purl.org/nidash/nidm/agent_305738011
CMU_50644 has_subject_id http://purl.org/nidash/nidm/agent_CMU_50644
304253859 has_subject_id http://purl.org/nidash/nidm/agent_304253859


# Examine the term definitions
- look at the definitions for each term and try to figure out if ter is a direct mapping or not
- if there is a direct mapping, we can create a simple mapping file to query across both datasets

In [6]:
abide_terms = rdf.Graph()
abide_terms.parse('../abide/abide_terms.ttl', format='turtle')
print abide_terms.serialize(format='turtle')

@prefix abide: <http://fcon_1000.projects.nitrc.org/indi/abide/> .
@prefix ncit: <http://ncitt.ncit.nih.gov/> .
@prefix nidash: <http://purl.org/nidash/nidm/> .
@prefix nidm: <http://nidm.nidash.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

abide:ABIDE_FSIQ a prov:Entity ;
    abide:form "WASI" ;
    abide:term "ABIDE_FIQ" ;
    prov:definition "FIQ Standard Score" ;
    prov:label "ABIDE vocabulary term" .

abide:ABIDE_PIQ a prov:Entity ;
    abide:form "WASI" ;
    abide:term "ABIDE_PIQ" ;
    prov:definition "PIQ Standard Score" ;
    prov:label "ABIDE vocabulary term" .

abide:ABIDE_VIQ a prov:Entity ;
    abide:form "WASI" ;
    abide:term "ABIDE_VIQ" ;
    prov:definition "VIQ Standard Score" ;
    prov:label "ABIDE vocabulary term" .




In [7]:
fbirn_terms = rdf.Graph()
fbirn_terms.parse('../fbirn/fbirn_terms.ttl', format='turtle')
print fbirn_terms.serialize(format='turtle')

@prefix fbirn: <http://www.birncommunity.org/collaborators/function-birn/> .
@prefix ncit: <http://ncitt.ncit.nih.gov/> .
@prefix nidash: <http://purl.org/nidash/nidm/> .
@prefix nidm: <http://nidm.nidash.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

fbirn:FSIQ a prov:Entity ;
    fbirn:form "North American Adult Reading Test (NAART)" ;
    fbirn:term "FSIQ" ;
    prov:definition "Estimated Full Scale IQ = 127.8 - .78 * errors" ;
    prov:label "FBIRN vocabulary term" .

fbirn:PIQ a prov:Entity ;
    fbirn:form "North American Adult Reading Test (NAART)" ;
    fbirn:term "PIQ" ;
    prov:definition "Estimated Performance IQ = 119.4 - .42 * errors" ;
    prov:label "FBIRN vocabulary term" .

fbirn:VIQ a prov:Entity ;
    fbirn:form "North American Adult Reading

# Example Mapping
- We see that the term `ABIDE FSIQ` matches `FBIRN FIQ`
- Now we can encode this knowlede by creating a parent term that matches the two together

In [8]:
mapping = """
@prefix abide: <http://fcon_1000.projects.nitrc.org/indi/abide/> .
@prefix fbirn: <http://www.birncommunity.org/collaborators/function-birn/> .
@prefix ncit: <http://ncitt.ncit.nih.gov/> .
@prefix nidash: <http://purl.org/nidash/nidm/> .
@prefix nidm: <http://nidm.nidash.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

# Declare our general term for mapping
nidm:FSIQ a prov:Entity ;
    nidm:form "North American Adult Reading Test (NAART)" ;
    nidm:term "FSIQ" ;
    prov:definition "Estimated Full Scale IQ = 127.8 - .78 * errors" ;
    prov:label "Generic vocabulary term." .

# Now include that these are subclasses of this 
abide:ABIDE_FIQ a prov:Entity ;
    rdfs:subClassOf nidm:FSIQ ;
    abide:form "WASI" ;
    abide:term "ABIDE_FIQ" ;
    prov:definition "FIQ Standard Score" ;
    prov:label "ABIDE vocabulary term" .
    
fbirn:FSIQ a prov:Entity ;
    rdfs:subClassOf nidm:FSIQ ;
    fbirn:form "North American Adult Reading Test (NAART)" ;
    fbirn:term "FSIQ" ;
    prov:definition "Estimated Full Scale IQ = 127.8 - .78 * errors" ;
    prov:label "FBIRN vocabulary term" ."""

In [9]:
gmap = rdf.Graph()

In [11]:
gmap.parse(data=mapping, format='turtle')
print gmap.serialize(format='turtle')

@prefix abide: <http://fcon_1000.projects.nitrc.org/indi/abide/> .
@prefix fbirn: <http://www.birncommunity.org/collaborators/function-birn/> .
@prefix ncit: <http://ncitt.ncit.nih.gov/> .
@prefix nidash: <http://purl.org/nidash/nidm/> .
@prefix nidm: <http://nidm.nidash.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

abide:ABIDE_FIQ a prov:Entity ;
    abide:form "WASI" ;
    abide:term "ABIDE_FIQ" ;
    rdfs:subClassOf nidm:FSIQ ;
    prov:definition "FIQ Standard Score" ;
    prov:label "ABIDE vocabulary term" .

fbirn:FSIQ a prov:Entity ;
    fbirn:form "North American Adult Reading Test (NAART)" ;
    fbirn:term "FSIQ" ;
    rdfs:subClassOf nidm:FSIQ ;
    prov:definition "Estimated Full Scale IQ = 127.8 - .78 * errors" ;
    prov:label "FBIRN vocabulary te

# Query for Mapping
- now that we have created a mapping we can query for the list of mapped terms
- Secondly, we can use this mapping to filter our query results over actual data

In [12]:
qres = gmap.query(
    """SELECT DISTINCT ?subclasses
       WHERE { 
              ?subclasses rdfs:subClassOf nidm:FSIQ .
              }""")
print qres.serialize(format='csv')

subclasses
http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ
http://www.birncommunity.org/collaborators/function-birn/FSIQ



## Add the Mappings to the Joinined Datasets

In [15]:
mapped_graph = union_graph + gmap

## Now use the mappings to filter the results!

In [17]:
qres = mapped_graph.query(
    """SELECT DISTINCT ?s ?subclasses ?o 
       WHERE { 
              ?subclasses rdfs:subClassOf nidm:FSIQ .
              ?s ?subclasses ?o
              }""")
print qres.serialize(format='csv')

s,subclasses,o
http://purl.org/nidash/nidm/entity_CMU_50645,http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ,124
http://purl.org/nidash/nidm/entity_CMU_50643,http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ,123
http://purl.org/nidash/nidm/entity_CMU_50647,http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ,104
http://purl.org/nidash/nidm/entity_CMU_50646,http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ,108
http://purl.org/nidash/nidm/entity_CMU_50644,http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ,107
http://purl.org/nidash/nidm/entity_303851049,http://www.birncommunity.org/collaborators/function-birn/FSIQ,117.66
http://purl.org/nidash/nidm/entity_301882920,http://www.birncommunity.org/collaborators/function-birn/FSIQ,118.44
http://purl.org/nidash/nidm/entity_304253859,http://www.birncommunity.org/collaborators/function-birn/FSIQ,112.2
http://purl.org/nidash/nidm/entity_303269784,http://www.birncommunity.org/collaborators/function-birn/FSI