## Set up
First you need to install GraphDB locally on you machine


In [None]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import numpy as np
from IPython.display import display, HTML


import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",6000,"display.width",6000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)



### With a GraphDB SPARQL endpoint

The input for SPARQLWrapper is the link to the GraphDB repository where you loaded the appropriate data set, in this example the dev version of EDAM (https://raw.githubusercontent.com/edamontology/edamontology/main/EDAM_dev.owl) and a bio.tools bioschemas turtle file (https://raw.githubusercontent.com/bio-tools/content/master/datasets/bioschemas-dump.ttl) was loaded in the GrapphDB repository. 


In [None]:
ep_biotools = SPARQLWrapper("http://localhost:7200/repositories/Project25")
ep_biotools.setReturnFormat(JSON)

## How many EDAM operations are used to annotate bio.tools?

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sc: <http://schema.org/>

SELECT DISTINCT ?operation ?label WHERE {

?x rdf:type sc:SoftwareApplication ;
   sc:featureList ?operation .
   ?operation rdfs:label ?label . 
} 
"""


In [None]:
results1=remoteQuery(query=q, endpoint=ep_biotools)
print(f"nb of edam operation used in bio.tools: {len(results1)}")

display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results1.to_html() +
             "</div>"))

## How many bio.tools entries are annotated with EDAM operations?

In [None]:
q5= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sc: <http://schema.org/>

SELECT DISTINCT ?entries WHERE {

?entries rdf:type sc:SoftwareApplication ;
   sc:featureList ?operation .
   #?operation rdfs:label ?label . 
} 
"""


In [None]:
results5=remoteQuery(query=q5, endpoint=ep_biotools)
print(f"nb of bio.tools entries annotated with edam operation: {len(results5)}")


display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results5.to_html() +
             "</div>"))

## How many operation are there in EDAM? (total)

In [None]:

q2="""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sc: <http://schema.org/>

SELECT DISTINCT ?operation ?label WHERE {

?operation rdfs:subClassOf+ <http://edamontology.org/operation_0004> .
   ?operation rdfs:label ?label . 
} 
GROUP BY ?operation ?label
"""


In [None]:
results2=remoteQuery(query=q2, endpoint=ep_biotools)
print(f"nb of edam operation total: {len(results2)}")

## How many bio.tools entries are annotated with deprecated operations?

In [None]:
q3="""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sc: <http://schema.org/>

SELECT DISTINCT ?operation ?label WHERE {
?x rdf:type sc:SoftwareApplication ;
   sc:featureList ?operation .
   ?operation rdfs:label ?label .
?operation rdfs:subClassOf <http://www.w3.org/2002/07/owl#DeprecatedClass> .   

} 
GROUP BY ?operation ?label
"""

In [None]:
results3=remoteQuery(query=q3, endpoint=ep_biotools)
print(f"number of bio.tools entries annotated with deprecated operation: {len(results3)}")
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results3.to_html() +
             "</div>"))

## How many bio.tools entries are annotated with root operations?

In [None]:
q6="""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sc: <http://schema.org/>

SELECT DISTINCT ?entries WHERE {
?entries rdf:type sc:SoftwareApplication ;
   sc:featureList  <http://edamontology.org/operation_0004>.   

} 
"""

In [None]:
results6=remoteQuery(query=q6, endpoint=ep_biotools)
print(f"number of bio.tools entries annotated with deprecated operation: {len(results6)}")
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results6.to_html() +
             "</div>"))