## Set up
First you need to install GraphDB locally on you machine, create a repo where you load the needed data


In [69]:
from rdflib import ConjunctiveGraph, Namespace
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import numpy as np

print("Loading graph ...", end="")
g = ConjunctiveGraph()
g.parse('https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl', format='xml')
#g.parse('../edamontology/EDAM_dev.owl', format='xml')
g.bind('edam', Namespace('http://edamontology.org#'))
print("done!")
print(str(len(g)) + ' triples in the EDAM triple store')

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",5000,"display.width",5000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)


Loading graph ...done!
38255 triples in the EDAM triple store



### With a GraphDB SPARQL endpoint

ep_no_inference points to a SPARQL end point repository in Graphdb where the automatic inferences were deactivated

The input for SPARQLWrapper is the link to the GraphDB repository where you loaded the appropriate data set, in this example the dev version of edam was loaded in the GrapphDB repository

In [70]:
ep_edam = SPARQLWrapper("http://llamothe-HP-EliteBook-x360-1040-G8-Notebook-PC:7200/repositories/EDAM")
#ep_edam = SPARQLWrapper("http://llamothe-HP-EliteBook-x360-1040-G8-Notebook-PC:7200/repositories/biotools") #edam loaded as well in my biotools repo
ep_edam.setReturnFormat(JSON)

In [71]:
ep_no_inference = SPARQLWrapper("http://llamothe-HP-EliteBook-x360-1040-G8-Notebook-PC:7200/repositories/EDAM_no_inference")
ep_no_inference.setReturnFormat(JSON)

## Graphdb

In [72]:
#this query gets all has_topic relation
q= """
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT ?entity ?label ?topic 
WHERE {
    ?entity rdfs:subClassOf ?restriction . 
    ?restriction rdf:type owl:Restriction ; 
            owl:onProperty edam:has_topic  ; 
            owl:someValuesFrom ?topic.
    ?entity rdfs:label ?label .
    ?topic rdfs:label ?tlabel
    }
"""

In [73]:
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q, endpoint=ep_edam))}")
display(remoteQuery(query=q, endpoint=ep_edam).head(20))

nb data/operation with has_topic property: 1208


Unnamed: 0,entity,label,topic
0,http://edamontology.org/data_0582,Ontology,http://edamontology.org/topic_0089
1,http://edamontology.org/data_0858,Sequence signature matches,http://edamontology.org/topic_0160
2,http://edamontology.org/data_0860,Sequence signature data,http://edamontology.org/topic_0160
3,http://edamontology.org/data_1353,Sequence motif,http://edamontology.org/topic_0160
4,http://edamontology.org/data_1354,Sequence profile,http://edamontology.org/topic_0160
5,http://edamontology.org/data_1361,Position frequency matrix,http://edamontology.org/topic_0160
6,http://edamontology.org/data_2854,Position-specific scoring matrix,http://edamontology.org/topic_0160
7,http://edamontology.org/data_1362,Position weight matrix,http://edamontology.org/topic_0160
8,http://edamontology.org/data_1363,Information content matrix,http://edamontology.org/topic_0160
9,http://edamontology.org/data_1365,Fingerprint,http://edamontology.org/topic_0160


In [74]:
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q, endpoint=ep_no_inference))}")
display(remoteQuery(query=q, endpoint=ep_no_inference).head(20))

nb data/operation with has_topic property: 221


Unnamed: 0,entity,label,topic
0,http://edamontology.org/data_0582,Ontology,http://edamontology.org/topic_0089
1,http://edamontology.org/data_0860,Sequence signature data,http://edamontology.org/topic_0160
2,http://edamontology.org/data_0863,Sequence alignment,http://edamontology.org/topic_0080
3,http://edamontology.org/data_0872,Phylogenetic tree,http://edamontology.org/topic_0084
4,http://edamontology.org/data_0880,RNA secondary structure,http://edamontology.org/topic_0097
5,http://edamontology.org/data_0883,Structure,http://edamontology.org/topic_0081
6,http://edamontology.org/data_0886,Structure alignment,http://edamontology.org/topic_0081
7,http://edamontology.org/data_0889,Structural profile,http://edamontology.org/topic_0081
8,http://edamontology.org/data_0906,Protein interaction data,http://edamontology.org/topic_0128
9,http://edamontology.org/data_0907,Protein family report,http://edamontology.org/topic_0623


## RDFLIB


In [75]:
results = g.query(q)
print(f"nb data/operation with has_topic property: {len(results)} ")
before_inf = pd.DataFrame(columns=["entity","label","topic"])
for r in results:
        pd.set_option("display.max_rows",None,"display.max_colwidth",5000,"display.width",5000,)
        concept = pd.DataFrame([[r["entity"], r["label"], r["topic"]]],columns=["entity","label","topic"])
        before_inf=pd.concat([before_inf, concept], ignore_index=True)
#df.index = np.arange(1, len(df) + 1)
display(before_inf.head(20))

nb data/operation with has_topic property: 221 


Unnamed: 0,entity,label,topic
0,http://edamontology.org/data_0582,Ontology,http://edamontology.org/topic_0089
1,http://edamontology.org/data_0860,Sequence signature data,http://edamontology.org/topic_0160
2,http://edamontology.org/data_0863,Sequence alignment,http://edamontology.org/topic_0080
3,http://edamontology.org/data_0872,Phylogenetic tree,http://edamontology.org/topic_0084
4,http://edamontology.org/data_0880,RNA secondary structure,http://edamontology.org/topic_0097
5,http://edamontology.org/data_0883,Structure,http://edamontology.org/topic_0081
6,http://edamontology.org/data_0886,Structure alignment,http://edamontology.org/topic_0081
7,http://edamontology.org/data_0889,Structural profile,http://edamontology.org/topic_0081
8,http://edamontology.org/data_0906,Protein interaction data,http://edamontology.org/topic_0128
9,http://edamontology.org/data_0907,Protein family report,http://edamontology.org/topic_0623


In [76]:
q2 = """
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

CONSTRUCT { 
    ?children rdfs:subClassOf ?restriction . 
    ?restriction rdf:type owl:Restriction ; 
            owl:onProperty edam:has_topic  ; 
            owl:someValuesFrom ?topic. 
}
WHERE {
    ?parent rdfs:subClassOf ?restriction . 
    ?restriction rdf:type owl:Restriction ; 
            owl:onProperty edam:has_topic  ; 
            owl:someValuesFrom ?topic.
    ?children rdfs:subClassOf+ ?parent .
    }
    
    
"""

In [77]:
results = g.query(q2)
print(f"nb constructed has_topic property: {len(results)} ")
df= pd.DataFrame(columns=["entity","property","object"])
for r in results:
        pd.set_option("display.max_rows",None,"display.max_colwidth",5000,"display.width",5000,)
        concept = pd.DataFrame([[r[0],r[1],r[2]]],columns=["entity","property","object"])
        df=pd.concat([df, concept], ignore_index=True)
        g.add((r[0],r[1],r[2]))
#df.index = np.arange(1, len(df) + 1)
#display(df.head(20))

nb constructed has_topic property: 1376 


In [78]:
results = g.query(q)
print(f"nb data/operation with has_topic property: {len(results)} ")
after_inf = pd.DataFrame(columns=["entity","label","topic"])
for r in results:
        pd.set_option("display.max_rows",None,"display.max_colwidth",5000,"display.width",5000,)
        concept = pd.DataFrame([[r["entity"], r["label"], r["topic"]]],columns=["entity","label","topic"])
        after_inf=pd.concat([after_inf, concept], ignore_index=True)
#df.index = np.arange(1, len(df) + 1)
display(after_inf.head(20))

nb data/operation with has_topic property: 1208 


Unnamed: 0,entity,label,topic
0,http://edamontology.org/data_0582,Ontology,http://edamontology.org/topic_0089
1,http://edamontology.org/data_0860,Sequence signature data,http://edamontology.org/topic_0160
2,http://edamontology.org/data_1362,Position weight matrix,http://edamontology.org/topic_0160
3,http://edamontology.org/data_2071,Sequence motif (protein),http://edamontology.org/topic_0160
4,http://edamontology.org/data_1353,Sequence motif,http://edamontology.org/topic_0160
5,http://edamontology.org/data_2070,Sequence motif (nucleic acid),http://edamontology.org/topic_0160
6,http://edamontology.org/data_2854,Position-specific scoring matrix,http://edamontology.org/topic_0160
7,http://edamontology.org/data_1361,Position frequency matrix,http://edamontology.org/topic_0160
8,http://edamontology.org/data_1363,Information content matrix,http://edamontology.org/topic_0160
9,http://edamontology.org/data_0858,Sequence signature matches,http://edamontology.org/topic_0160


In [80]:
if len(before_inf.index) == len(remoteQuery(query=q, endpoint=ep_no_inference).index):

    equal = True

    for i in range(0,len(before_inf.index)-1) : 
        for j in range(0,2):
            # print(str(before_inf.iat[i,j]),remoteQuery(query=q, endpoint=ep_no_inference).iat[i,j] )
            if str(before_inf.iat[i,j]) != remoteQuery(query=q, endpoint=ep_no_inference).iat[i,j] :
                equal = False
    print(equal)

True


In [81]:
no_inf_comp=before_inf.compare(remoteQuery(query=q, endpoint=ep_no_inference))
no_inf_comp.head(5)
# no_inf_comp["self",1]


Unnamed: 0_level_0,entity,entity,label,label,topic,topic
Unnamed: 0_level_1,self,other,self,other,self,other
0,http://edamontology.org/data_0582,http://edamontology.org/data_0582,Ontology,Ontology,http://edamontology.org/topic_0089,http://edamontology.org/topic_0089
1,http://edamontology.org/data_0860,http://edamontology.org/data_0860,Sequence signature data,Sequence signature data,http://edamontology.org/topic_0160,http://edamontology.org/topic_0160
2,http://edamontology.org/data_0863,http://edamontology.org/data_0863,Sequence alignment,Sequence alignment,http://edamontology.org/topic_0080,http://edamontology.org/topic_0080
3,http://edamontology.org/data_0872,http://edamontology.org/data_0872,Phylogenetic tree,Phylogenetic tree,http://edamontology.org/topic_0084,http://edamontology.org/topic_0084
4,http://edamontology.org/data_0880,http://edamontology.org/data_0880,RNA secondary structure,RNA secondary structure,http://edamontology.org/topic_0097,http://edamontology.org/topic_0097


In [103]:
print(len(after_inf.index) , len(remoteQuery(query=q, endpoint=ep_edam).index))
sort_after=after_inf.sort_values(by='entity').reset_index(drop=True)
sort_edam_graphdb=remoteQuery(query=q, endpoint=ep_edam).sort_values(by='entity').reset_index(drop=True)

print(sort_after.head(10))
print(sort_edam_graphdb.head(10))

if len(after_inf.index) == len(remoteQuery(query=q, endpoint=ep_edam).index):

    equal = True

    for i in range(0,len(sort_after.index)-1) : 
        for j in range(0,2):
            if str(sort_after.iat[i,j]) != sort_edam_graphdb.iat[i,j] :
                print(i,j,sort_after.iat[i,j],sort_edam_graphdb.iat[i,j])
                print(i,j,type(str(sort_after.iat[i,j])),type(sort_edam_graphdb.iat[i,j]))
                equal = False
    print(equal)

1208 1208
                              entity                       label                               topic
0  http://edamontology.org/data_0582                    Ontology  http://edamontology.org/topic_0089
1  http://edamontology.org/data_0849             Sequence record  http://edamontology.org/topic_0080
2  http://edamontology.org/data_0858  Sequence signature matches  http://edamontology.org/topic_0160
3  http://edamontology.org/data_0860     Sequence signature data  http://edamontology.org/topic_0160
4  http://edamontology.org/data_0863          Sequence alignment  http://edamontology.org/topic_0080
5  http://edamontology.org/data_0872           Phylogenetic tree  http://edamontology.org/topic_0084
6  http://edamontology.org/data_0880     RNA secondary structure  http://edamontology.org/topic_0081
7  http://edamontology.org/data_0880     RNA secondary structure  http://edamontology.org/topic_0097
8  http://edamontology.org/data_0883                   Structure  http://edamonto