# An example of integrating two RDF datasets
- Read in the original datasets converted to RDF
- Create a mapping between a few measures in in each file
- Query the integrated dataset

In [1]:
import os
import rdflib
#import csv for reading csv files
import csv

#Read in Datasets Expressed in Turtle Format

In [16]:
simpledata_graph = rdflib.Graph()
simpledata_graph_parse = simpledata_graph.parse('SimpleData.ttl', format='turtle')

braintx_graph = rdflib.Graph()
braintx_graph_parse = braintx_graph.parse('data2.ttl', format='turtle')

In [17]:

union_graph = simpledata_graph_parse + braintx_graph_parse

In [18]:
#step 3a: Add namespaces from each graph
nidm = rdflib.Namespace("http://nidm.nidash.org/")
prov = rdflib.Namespace("http://www.w3.org/ns/prov#")
ncit = rdflib.Namespace("http://ncitt.ncit.nih.gov/")
dcat = rdflib.Namespace("http://www.w3.org/ns/dcat#")
dct = rdflib.Namespace("http://purl.org/dc/terms/")
dctypes = rdflib.Namespace("http://purl.org/dc/dcmitype/")
dicom = rdflib.Namespace("http://neurolex.org/wiki/Category:DICOM_term/")
ncit = rdflib.Namespace("http://ncitt.ncit.nih.gov/")
rdf = rdflib.Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs = rdflib.Namespace("http://www.w3.org/2000/01/rdf-schema#")
xml = rdflib.Namespace("http://www.w3.org/XML/1998/namespace")
xnat = rdflib.Namespace("https://www.nitrc.org/projects/fcon_1000/")
xsd = rdflib.Namespace("http://www.w3.org/2001/XMLSchema#")
foaf = rdflib.Namespace("http://xmlns.com/foaf/0.1/")
metadb_catalog = rdflib.Namespace("http://metadb.riken.jp/db/DBcatalog/")
skos = rdflib.Namespace("http://www.w3.org/2004/02/skos/core#")
dc = rdflib.Namespace("http://purl.org/dc/elements/1.1/")
prov = rdflib.Namespace("http://www.w3.org/ns/prov#")
ncit = rdflib.Namespace("http://ncitt.ncit.nih.gov/")
xnat = rdflib.Namespace("https://www.nitrc.org/projects/fcon_1000/")
rdfs = rdflib.Namespace("http://www.w3.org/2000/01/rdf-schema#")
dicom = rdflib.Namespace("http://neurolex.org/wiki/Category:DICOM_term/")
dct = rdflib.Namespace("http://purl.org/dc/terms/")
dctypes = rdflib.Namespace("http://purl.org/dc/dcmitype/")
dcat = rdflib.Namespace("http://www.w3.org/ns/dcat#")
void = rdflib.Namespace("http://rdfs.org/ns/void#")


In [19]:
union_graph.bind('void', void)
union_graph.bind('dctypes',dctypes)

#Adding Additional Integration Triples to Union Graph

In [20]:
qres=union_graph.query(
    """ select *
    where {
        ?s a void:Dataset .
    
    }"""
    )
for row in qres:
    print("%s a void:Dataset" % row)

http://metadb.riken.jp/db/BrainTx a void:Dataset


In [21]:
qres=union_graph.query(
    """ select *
    where {
        ?s a dctypes:Dataset .
    
    }"""
    )
for row in qres:
    print("%s a dctypes:Dataset" % row)

http://purl.org/nidash/nidm/Investigation_22015d61-5148-11e6-b205-6c4008b8f03e a dctypes:Dataset


In [22]:
#add triples to union graph stating a void:Dataset and dctypes:Dataset are same
#for this example, metadb_catalog:RIKENorganization and dctypes:Dataset are same
ns = {'void':'http://rdfs.org/ns/void#', 'dctypes':'http://purl.org/dc/dcmitype/'}
query = """
            INSERT {?s a void:Dataset .}
            WHERE {?s a dctypes:Dataset .}
            ;
            INSERT {?s a dctypes:Dataset .}
            WHERE {?s a void:Dataset .}
              """
union_graph.update(query, initNs=ns)

In [23]:
#print union_graph.serialize(format='turtle')

In [25]:
#Query for URIRefs and Descriptions of datasets across graphs
qres = union_graph.query(
    """SELECT DISTINCT ?s ?description
       WHERE {
          ?s a dctypes:Dataset ;
              dct:description ?description .
       }""")

print ("Dataset Ref \t Description \n")
for row in qres:
    print("%s \t %s" % row)

Dataset Ref 	 Description 

http://purl.org/nidash/nidm/Investigation_22015d61-5148-11e6-b205-6c4008b8f03e 	 1000 Functional Connectomes Project (FCP)
http://metadb.riken.jp/db/BrainTx 	 The Brain Transcriptome Database (BrainTx) project aims to create an integrated platform to visualize and analyze our original transcriptome data and publicly accessible transcriptome data related to the genetics that underlie the development, function, and dysfunction stages and states of the brain. BrainTx was developed from the Cerebellar Development Transcriptome Database (CDT-DB). Information about CDT-DB can be downloaded using the "Download" option in the menu bar.


In [26]:
#serialize union graph
with open("SimpleData+data2.ttl",'w') as f:
    f.write(union_graph.serialize(format='turtle'))

#Second example using skos relationships

In [48]:
simpledata_graph = rdflib.Graph()
simpledata_graph_parse = simpledata_graph.parse('SimpleData.ttl', format='turtle')

braintx_graph = rdflib.Graph()
braintx_graph_parse = braintx_graph.parse('data2.ttl', format='turtle')

union_graph = simpledata_graph_parse + braintx_graph_parse

union_graph.bind('void', void)
union_graph.bind('dctypes',dctypes)


In [49]:
#add triples to union graph stating a void:Dataset and dctypes:Dataset are same
#for this example, metadb_catalog:RIKENorganization and dctypes:Dataset are same
ns = {'void':'http://rdfs.org/ns/void#','skos':'http://www.w3.org/2004/02/skos/core#', 'dctypes':'http://purl.org/dc/dcmitype/', 'metadb_catalog':'http://metadb.riken.jp/db/DBcatalog/'}
query = """
            INSERT {?s skos:relatedTo void:Dataset .}
            WHERE {?s a dctypes:Dataset .}
            ;
            INSERT {?s skos:relatedTo dctypes:Dataset .}
            WHERE {?s a void:Dataset .}
            ;
            INSERT {?s skos:relatedTo dctypes:Dataset .}
            WHERE {?s a metadb_catalog:RIKENorganization .}
            ;
            INSERT {?s skos:relatedTo void:Dataset .}
            WHERE {?s a metadb_catalog:RIKENorganization .}
            ;
            INSERT {?s skos:relatedTo metadb_catalog:RIKENorganization .}
            WHERE {?s a dctypes:Dataset .}
            ;
            INSERT {?s skos:relatedTo metadb_catalog:RIKENorganization .}
            WHERE {?s a void:Dataset .}
           """
union_graph.update(query, initNs=ns)

In [76]:
#Query for URIRefs and Descriptions of datasets across graphs
qres = union_graph.query(
    """SELECT DISTINCT ?s ?description
       WHERE {
          {?s skos:relatedTo dctypes:Dataset .
          OPTIONAL{ ?s dct:description ?description .}
          OPTIONAL{ ?s dc:description ?description .}
          OPTIONAL{ ?s metadb_catalog:centerDescription ?description .}}
          union
          {?s a dctypes:Dataset .
          OPTIONAL{ ?s dct:description ?description .}
          OPTIONAL{ ?s dc:description ?description .}}
        
         }""")

print ("Dataset or Organizations \t Description \n")
for row in qres:
    print("%s \t %s" % row)

Dataset or Organizations 	 Description 

http://metadb.riken.jp/db/resource/organisation/riken 	 None
http://metadb.riken.jp/db/resource/organisation/riken_labs_accc 	 None
http://metadb.riken.jp/db/resource/organisation/riken_labs_bsi 	 Brain science is valuable not only for the advancement of science but also because it can greatly impact our society and economy. To meet these expectations, the Brain Science Institute (BSI) was established in 1997 as part of RIKEN, an independent research institution supported by the Japanese government. BSI has a mission to produce innovative research and technology leading to scientific discoveries of the brain. In addition, BSI aims to develop domestic and international brain researchers by creating an environment that will integrate various intellectual disciplines and from that convergence find solutions that will ultimately benefit society in the realms of medicine, engineering, business, and education.In striving toward this goal, BSI has beco

In [15]:
#serialize union graph
with open("SimpleData+data2_skosExample.ttl",'w') as f:
    f.write(union_graph.serialize(format='turtle'))