## Set up
First you need to install GraphDB locally on you machine


In [None]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON

import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",5000,"display.width",5000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)



### With a GraphDB SPARQL endpoint

ep_no_inference points to a SPARQL end point repository in Graphdb where the automatic inferences were deactivated

In [None]:
ep_biotools = SPARQLWrapper("http://llamothe-HP-EliteBook-x360-1040-G8-Notebook-PC:7200/repositories/biotools")
ep_biotools.setReturnFormat(JSON)

## query to get top 5 operation reprented in bio.tools

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT  ?operation (COUNT(?operation) as ?count) ?label WHERE {
?x rdf:type <http://schema.org/SoftwareApplication> ;
   <http://schema.org/name> ?name ; 
   <http://schema.org/featureList> ?operation .
   ?operation rdfs:label ?label . 
} GROUP BY ?operation ?label
ORDER BY DESC(?count)
LIMIT 5
"""

In [None]:
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q, endpoint=ep_biotools))}")
remoteQuery(query=q, endpoint=ep_biotools)

## query to get the top 100 tools and their associated EDAM def and synonyms

In [None]:
q= """
SELECT  * WHERE {
?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   <http://schema.org/name> ?name ; 
   <http://schema.org/featureList> ?feature .

?feature  <http://www.geneontology.org/formats/oboInOwl#hasDefinition> ?def ; 
         <http://www.geneontology.org/formats/oboInOwl#hasExactSynonym> ?syn .
} limit 100
"""
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q, endpoint=ep_biotools))}")
remoteQuery(query=q, endpoint=ep_biotools)

## *Has_topic* comparison

In [58]:
q= """
PREFIX edam:<http://edamontology.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT DISTINCT ?biotools_id ?name ?operation ?topic WHERE {
?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   <http://schema.org/name> ?name ; 
   <http://schema.org/featureList> ?operation ;
   <http://schema.org/applicationSubCategory> ?topic .


} 
# limit 50
"""


The above query returns the relation between a topic and a operation represented in bio.tools via the tools annotation

In [59]:
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q, endpoint=ep_biotools))}")

nb data/operation with has_topic property: 227032


In [65]:
q2= """
PREFIX edam:<http://edamontology.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT DISTINCT  ?biotools_id ?name ?operation ?topic WHERE {
# SELECT DISTINCT ?operation ?topic WHERE {
?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   <http://schema.org/name> ?name ; 
   <http://schema.org/featureList> ?operation ;
   <http://schema.org/applicationSubCategory> ?topic .


?operation rdfs:subClassOf ?restriction . 
?restriction rdf:type owl:Restriction ; 
   owl:onProperty  edam:has_topic ; 
   owl:someValuesFrom ?topic.

} 
# limit 50
"""


The above query returns the relation between a topic and a operation represented in bio.tools  via the tools annotation, that are validated in edam via the "has_input" restriction in EDAM 

In [66]:
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q2, endpoint=ep_biotools))}")

nb data/operation with has_topic property: 749


In [None]:
remoteQuery(query=q2, endpoint=ep_biotools)

In [62]:
q3= """
PREFIX edam:<http://edamontology.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT DISTINCT  ?biotools_id ?name ?operation ?topic WHERE {
# SELECT DISTINCT ?operation ?topic WHERE {

?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   <http://schema.org/name> ?name ; 
   <http://schema.org/featureList> ?operation ;
   <http://schema.org/applicationSubCategory> ?topic .

FILTER NOT EXISTS {    
?operation rdfs:subClassOf ?restriction . 
?restriction rdf:type owl:Restriction ; 
   owl:onProperty  edam:has_topic ; 
   owl:someValuesFrom ?topic.}

} 
# limit 50
"""

The above query returns the relation between a topic and a operation represented in bio.tools  via the tools annotation, that are NOT validated in edam via the "has_input" restriction in EDAM 

In [63]:
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q3, endpoint=ep_biotools))}")

nb data/operation with has_topic property: 33136


In [64]:
remoteQuery(query=q3, endpoint=ep_biotools)

Unnamed: 0,operation,topic
0,http://edamontology.org/operation_3215,http://edamontology.org/topic_0749
1,http://edamontology.org/operation_3215,http://edamontology.org/topic_3169
2,http://edamontology.org/operation_3215,http://edamontology.org/topic_3295
3,http://edamontology.org/operation_3215,http://edamontology.org/topic_3474
4,http://edamontology.org/operation_3222,http://edamontology.org/topic_0749
5,http://edamontology.org/operation_3222,http://edamontology.org/topic_3169
6,http://edamontology.org/operation_3222,http://edamontology.org/topic_3295
7,http://edamontology.org/operation_3222,http://edamontology.org/topic_3474
8,http://edamontology.org/operation_3439,http://edamontology.org/topic_0749
9,http://edamontology.org/operation_3439,http://edamontology.org/topic_3169
