In [2]:
import pandas as pd

In [39]:
url = 'https://media.nature.com/full/nature-assets/srep/2016/161017/srep35241/extref/srep35241-s3.txt'
interactome_df = pd.read_csv(url,skiprows=25,sep='\t')

In [40]:
interactome_df.head()

Unnamed: 0,# gene_ID_1,gene_ID_2,data_source(s)
0,1394,2778,literature
1,100290337,4214,literature
2,122704,54460,complexes
3,4790,79155,binary;literature;signaling
4,2597,70,signaling


In [41]:
interactome_df.rename(columns={'# gene_ID_1':'gene_id_1',' gene_ID_2':'gene_id_2','data_source(s)':'source'}, inplace=True)

In [42]:
interactome_df.to_csv('data/external/human_interactome.tsv', sep='\t', index=False)

In [43]:
interactome_df.head()

Unnamed: 0,gene_id_1,gene_id_2,source
0,1394,2778,literature
1,100290337,4214,literature
2,122704,54460,complexes
3,4790,79155,binary;literature;signaling
4,2597,70,signaling


In [44]:
from rdflib import Graph, URIRef, Literal, RDF, ConjunctiveGraph
def to_rdf(g, df, column_types, row_uri):
    """
    Parameters
    ----------
    g : input rdflib.Graph  
    df: DataFrame to be converted into RDF Graph
    column_types: dictonary of column and its type, type can be URI or Literal
    row_uri: rdf:type value for row index, should be URI
    Returns
    -------
    g: rdflib.Graph generated from DataFrame object
    """
    
    for (index, series) in df.iterrows():
        g.add((URIRef(index), RDF.type, URIRef(row_uri)) )
        for (column, value) in series.iteritems():
            if column_types[column] == 'URI':
                g.add((URIRef(index), URIRef(column), URIRef(value)))
            else:
                g.add((URIRef(index), URIRef(column), Literal(value)))
                
    return g

In [47]:
interactome_df.index =interactome_df.apply(lambda row: 'http://bio2rdf.org/openpredict_resource:'+str(row['gene_id_1'])+'_'+str(row['gene_id_2']), axis=1)

In [48]:
interactome_df['gene_id_1'] = interactome_df['gene_id_1'].map(lambda x: 'http://bio2rdf.org/ncbigene:'+str(x))

In [49]:
interactome_df['gene_id_2'] = interactome_df['gene_id_2'].map(lambda x: 'http://bio2rdf.org/ncbigene:'+str(x))

In [50]:
interactome_df.head()

Unnamed: 0,gene_id_1,gene_id_2,source
http://bio2rdf.org/openpredict_resource:1394_2778,http://bio2rdf.org/ncbigene:1394,http://bio2rdf.org/ncbigene:2778,literature
http://bio2rdf.org/openpredict_resource:100290337_4214,http://bio2rdf.org/ncbigene:100290337,http://bio2rdf.org/ncbigene:4214,literature
http://bio2rdf.org/openpredict_resource:122704_54460,http://bio2rdf.org/ncbigene:122704,http://bio2rdf.org/ncbigene:54460,complexes
http://bio2rdf.org/openpredict_resource:4790_79155,http://bio2rdf.org/ncbigene:4790,http://bio2rdf.org/ncbigene:79155,binary;literature;signaling
http://bio2rdf.org/openpredict_resource:2597_70,http://bio2rdf.org/ncbigene:2597,http://bio2rdf.org/ncbigene:70,signaling


In [51]:
interactome_df.rename(columns={'gene_id_1':'http://bio2rdf.org/irefindex_vocabulary:interactor_a'},inplace=True)

In [52]:
interactome_df.rename(columns={'gene_id_2':'http://bio2rdf.org/irefindex_vocabulary:interactor_b'},inplace=True)

In [53]:
interactome_df.rename(columns={'source':'http://bio2rdf.org/irefindex_vocabulary:source'},inplace=True)

In [55]:
column_types ={'http://bio2rdf.org/irefindex_vocabulary:interactor_a':'URI','http://bio2rdf.org/irefindex_vocabulary:interactor_b':'URI','http://bio2rdf.org/irefindex_vocabulary:source':'Literal'}
graphURI = URIRef('http://fairworkflows.org/openpredict_resource:fairworkflows.dataset.openpredict.interactome.R1')
g =  ConjunctiveGraph(identifier = graphURI)     

g=  to_rdf(g, interactome_df, column_types, 'http://edamontology.org/topic_0128' )

In [56]:
from rdflib import Namespace
import datetime
DC = Namespace("http://purl.org/dc/terms/")
def addProvanace(g, graphURI):
    now = datetime.datetime.now()
    
    datasetURI= URIRef('https://github.com/fair-workflows/openpredict/data/rdf/human_interactome.nq')
    g.add((graphURI, RDF.type, DC.Dataset))
    g.add((graphURI, URIRef('http://www.w3.org/ns/dcat#distribution'), datasetURI))
    sourcedatasetURI =  URIRef('https://media.nature.com/full/nature-assets/srep/2016/161017/srep35241/extref/srep35241-s3.txt')
    
    g.add((datasetURI, DC['title'], Literal('RDF Version of the Human Interactome')))
    g.add((datasetURI, DC['format'], Literal('application/n-quads')))
    g.add((datasetURI, DC['created'], Literal(now.strftime("%Y-%m-%d %H:%M:%S"))))
    g.add((datasetURI, DC['creator'], Literal('https://github.com/fair-workflows/openpredict/HumanInteractome.ipynb')))

    g.add((datasetURI, DC['homepage'], URIRef('https://github.com/fair-workflows/openpredict/')))
    g.add((datasetURI, DC['license'], URIRef('http://creativecommons.org/licenses/by/3.0/')))
    g.add((datasetURI, DC['rights'], Literal('use-share-modify')))
    g.add((datasetURI, DC['rights'], Literal('by-attribution')))
    g.add((datasetURI, DC['rights'], Literal('restricted-by-source-license')))

    g.add((datasetURI, DC['source'], sourcedatasetURI))
        
    g.add((sourcedatasetURI, DC['title'], Literal('The Human Interactome used in Uncovering Disease-Disease Relationships Through The Human Interactome  (srep35241-s3.txt)')))
    g.add((sourcedatasetURI, RDF['type'], URIRef('http://www.w3.org/ns/dcat#Distribution')))
    g.add((sourcedatasetURI, DC['homepage'], URIRef('https://dx.doi.org/10.1126%2Fscience.1257601')))
    g.add((sourcedatasetURI, URIRef('http://purl.org/pav/retrievedOn'), Literal(now.strftime("%Y-%m-%d %H:%M:%S"))))
    g.add((sourcedatasetURI, DC['format'], Literal('text')))
    g.add((sourcedatasetURI, DC['rights'], URIRef('https://creativecommons.org/publicdomain/mark/1.0/')))
    g.add((sourcedatasetURI, DC['publisher'], Literal('https://science.sciencemag.org/')))
    g.add((sourcedatasetURI, DC['rights'], Literal('use')))
    g.add((sourcedatasetURI, DC['rights'], Literal('allow-commercial-purposes')))
    
    return g

In [57]:
g=addProvanace(g, graphURI)

In [58]:
g.serialize('data/rdf/human_interactome.nq', format='nquads')