In [1]:
import pandas as pd
from csv import reader
from src.util import utils
from src.util.utils import Dataset, DataResource
from rdflib import Graph, URIRef, Literal, RDF, ConjunctiveGraph
from rdflib import Namespace
import datetime


In [2]:
omim_annots =[]
with open('data/external/meshAnnotationsFromBioPorttalUsingOMIMDesc.txt','r') as mim2mesh_file:
    for row in mim2mesh_file:
        line = row.strip().split()
        #print (line)
        if len(line) != 2: continue
        omim =line[0]
        mesh =line[1].split(',')
        for i in range(1,len(mesh)):
            omim_annots.append([omim,mesh[i]])

In [3]:
mim2mesh_df = pd.DataFrame(omim_annots, columns=['disease','annotation'])

In [4]:
mim2mesh_df.head()

Unnamed: 0,disease,annotation
0,100050,D005121
1,100050,Q000002
2,100050,D014918
3,100050,D005220
4,100050,D009679


In [5]:
mim2mesh_df = pd.DataFrame(omim_annots, columns=['disease','annotation'])

mim2mesh_df['disease'] = mim2mesh_df['disease'].map(lambda x: 'http://bio2rdf.org/omim:'+str(x))
mim2mesh_df['annotation'] = mim2mesh_df['annotation'].map(lambda x: 'http://bio2rdf.org/mesh:'+str(x))
mim2mesh_df = mim2mesh_df.set_index('disease', drop=True)
mim2mesh_df.rename(columns={'annotation':'http://semanticscience.org/resource/SIO_000255'},inplace=True)


In [6]:
mim2mesh_df.head()

Unnamed: 0_level_0,http://semanticscience.org/resource/SIO_000255
disease,Unnamed: 1_level_1
http://bio2rdf.org/omim:100050,http://bio2rdf.org/mesh:D005121
http://bio2rdf.org/omim:100050,http://bio2rdf.org/mesh:Q000002
http://bio2rdf.org/omim:100050,http://bio2rdf.org/mesh:D014918
http://bio2rdf.org/omim:100050,http://bio2rdf.org/mesh:D005220
http://bio2rdf.org/omim:100050,http://bio2rdf.org/mesh:D009679


In [8]:
column_types ={'http://semanticscience.org/resource/SIO_000255':'URI'}
graphURI = URIRef('http://w3id.org/fairworkflows/dataset.openpredict.meshannot_bioportal.R1')
    
g = ConjunctiveGraph(identifier = graphURI )     
g =  utils.to_rdf(g, mim2mesh_df, column_types, 'http://bio2rdf.org/omim_vocabulary:Phenotype' )



In [9]:
g.serialize('data/rdf/omim_mesh_bioportal.nq', format='nquads')

In [10]:
def addMetaData(g, graphURI):
    #generate dataset
    data_source = Dataset(qname=graphURI, graph = g)
    data_source.setURI(graphURI)
    data_source.setTitle('Mesh Annotations for OMIM diseases')
    data_source.setDescription('This dataset contains the MeSH terms appeared in OMIM disease description. The annotation was done using BioPortal API.')
    data_source.setPublisher('https://www.maastrichtuniversity.nl/research/institute-data-science')
    data_source.setPublisherName('Institute of Data Science')
    data_source.addRight('use-share-modify')
    data_source.addTheme('http://www.wikidata.org/entity/Q199897')
    data_source.addTheme('http://www.wikidata.org/entity/Q857525')
    data_source.setLicense('http://creativecommons.org/licenses/by/4.0/')
    data_source.setVersion('1.0')


    #generate dataset distribution
    data_dist = DataResource(qname=graphURI, graph = data_source.toRDF())
    data_dist.setURI('http:/w3id.org/fairworkflows/dataset.openpredict.meshannot_bioportal/version/1/source')
    data_dist.setDownloadURL('https://github.com/fair-workflows/openpredict/blob/master/data/external/meshAnnotationsFromBioPorttalUsingOMIMDesc.txt')
    data_dist.setTitle('Mesh Annotations using BioPortal API')
    data_dist.setLicense('http://creativecommons.org/licenses/by/4.0/')
    data_dist.setVersion('1.0')
    data_dist.setFormat('text/tab-separated-value')
    data_dist.setMediaType('text/tab-separated-value')
    data_dist.setPublisher('https://github.com/fair-workflows/openpredict')
    data_dist.addRight('use-share-modify')
    data_dist.setRetrievedDate(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    data_dist.setDataset(data_source.getURI())

    #generate RDF data distrubtion
    rdf_dist = DataResource(qname=graphURI, graph = data_dist.toRDF() )
    rdf_dist.setURI('http:/w3id.org/fairworkflows/dataset.openpredict.meshannot_bioportal/version/1/rdf/data')
    rdf_dist.setTitle('RDF Version of the MESH Annotations for OMIM diseases using BioPortal')
    rdf_dist.setLicense('http://creativecommons.org/licenses/by/3.0/')
    rdf_dist.setVersion('1.0')
    rdf_dist.setFormat('application/n-quads')
    rdf_dist.setMediaType('application/n-quads')
    rdf_dist.addRight('use-share-modify')
    rdf_dist.addRight('by-attribution')
    rdf_dist.addRight('restricted-by-source-license')
    rdf_dist.setCreateDate(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    rdf_dist.setCreator('https://github.com/fair-workflows/openpredict/src/RDFConversionOfMeshAnnotation-BioPortal.py')
    rdf_dist.setDownloadURL('https://github.com/fair-workflows/openpredict/blob/master/data/rdf/omim_mesh_bioportal.nq.gz')
    rdf_dist.setDataset(data_dist.getURI())
      
    return rdf_dist.toRDF()



In [11]:
g = ConjunctiveGraph(identifier = graphURI)
g= addMetaData(g, graphURI)


In [12]:
outfile ='data/rdf/omim_mesh_bioportal_metadata.nq'
g.serialize(outfile, format='nquads')
print('RDF is generated at '+outfile)

RDF is generated at data/rdf/omim_mesh_bioportal_metadata.nq
