## Set up
First you need to install GraphDB locally on you machine


In [13]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import numpy as np
from IPython.display import display, HTML


import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",6000,"display.width",6000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)



### With a GraphDB SPARQL endpoint

The input for SPARQLWrapper is the link to the GraphDB repository where you loaded the appropriate data set, in this example the dev version of EDAM (https://raw.githubusercontent.com/edamontology/edamontology/main/EDAM_dev.owl) and a bio.tools bioschemas turtle file (https://raw.githubusercontent.com/bio-tools/content/master/datasets/bioschemas-dump.ttl) was loaded in the GrapphDB repository. 


In [14]:
ep_biotools = SPARQLWrapper("http://localhost:7200/repositories/Project25")
ep_biotools.setReturnFormat(JSON)

## How many EDAM operations are used to annotate bio.tools?

In [40]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sc: <http://schema.org/>

SELECT DISTINCT ?operation ?label WHERE {

?x rdf:type sc:SoftwareApplication ;
   sc:featureList ?operation .
   ?operation rdfs:label ?label . 
} 
"""


In [41]:
results1=remoteQuery(query=q, endpoint=ep_biotools)
print(f"nb of edam operation used in bio.tools: {len(results1)}")


display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

nb of edam operation used in bio.tools: 546


Unnamed: 0,operation,label
0,http://edamontology.org/operation_3215,Peak detection
1,http://edamontology.org/operation_3222,Peak calling
2,http://edamontology.org/operation_3439,Pathway or network prediction
3,http://edamontology.org/operation_2422,Data retrieval
4,http://edamontology.org/operation_3198,Read mapping
5,http://edamontology.org/operation_3799,Quantification
6,http://edamontology.org/operation_0306,Text mining
7,http://edamontology.org/operation_3196,Genotyping
8,http://edamontology.org/operation_3202,Polymorphism detection
9,http://edamontology.org/operation_3431,Deposition


## How many bio.tools entries are annotated with EDAM operations?

In [47]:
q5= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sc: <http://schema.org/>

SELECT DISTINCT ?entries WHERE {

?entries rdf:type sc:SoftwareApplication ;
   sc:featureList ?operation .
   ?operation rdfs:label ?label . 
} 
"""


In [48]:
results5=remoteQuery(query=q5, endpoint=ep_biotools)
print(f"nb of bio.tools entries annotated with edam operation: {len(results5)}")


display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results5.to_html() +
             "</div>"))

nb of bio.tools entries annotated with edam operation: 25647


Unnamed: 0,entries
0,https://bio.tools/-CNN
1,https://bio.tools/-TRIS
2,https://bio.tools/1000genomes
3,https://bio.tools/1000genomes_assembly_converter
4,https://bio.tools/1000genomes_data_slicer
5,https://bio.tools/1000genomes_id_history_converter
6,https://bio.tools/1000genomes_variation_pattern_finder
7,https://bio.tools/1000genomes_vcf2ped
8,https://bio.tools/13Check_RNA
9,https://bio.tools/1433pred


## How many operation are there in EDAM? (total)

In [34]:

q2="""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sc: <http://schema.org/>

SELECT DISTINCT ?operation ?label WHERE {

?operation rdfs:subClassOf+ <http://edamontology.org/operation_0004> .
   ?operation rdfs:label ?label . 
} 
GROUP BY ?operation ?label
"""


In [35]:
results2=remoteQuery(query=q2, endpoint=ep_biotools)
print(f"nb of edam operation total: {len(results2)}")

nb of edam operation total: 537


## How many bio.tools entries are annotated with deprecated operations?

In [55]:
q3="""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sc: <http://schema.org/>

SELECT DISTINCT ?operation ?label WHERE {
?x rdf:type sc:SoftwareApplication ;
   sc:featureList ?operation .
   ?operation rdfs:label ?label .
?operation rdfs:subClassOf <http://www.w3.org/2002/07/owl#DeprecatedClass> .   

} 
GROUP BY ?operation ?label
"""

In [57]:
results3=remoteQuery(query=q3, endpoint=ep_biotools)
print(f"number of bio.tools entries annotated with deprecated operation: {len(results3)}")
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results3.to_html() +
             "</div>"))

number of bio.tools entries annotated with deprecated operation: 20


Unnamed: 0,operation,label
0,http://edamontology.org/operation_3439,Pathway or network prediction
1,http://edamontology.org/operation_3202,Polymorphism detection
2,http://edamontology.org/operation_2414,Protein function analysis
3,http://edamontology.org/operation_3083,Pathway or network visualisation
4,http://edamontology.org/operation_3562,Network simulation
5,http://edamontology.org/operation_0277,Pathway or network comparison
6,http://edamontology.org/operation_2497,Pathway or network analysis
7,http://edamontology.org/operation_0271,Structure prediction
8,http://edamontology.org/operation_2513,Sequence generation (nucleic acid)
9,http://edamontology.org/operation_3441,Plotting


## How many bio.tools entries are annotated with root operations?

In [62]:
q6="""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sc: <http://schema.org/>

SELECT DISTINCT ?entries WHERE {
?entries rdf:type sc:SoftwareApplication ;
   sc:featureList  <http://edamontology.org/operation_0004>.   

} 
"""

In [63]:
results6=remoteQuery(query=q6, endpoint=ep_biotools)
print(f"number of bio.tools entries annotated with deprecated operation: {len(results6)}")
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results6.to_html() +
             "</div>"))

number of bio.tools entries annotated with deprecated operation: 9


Unnamed: 0,entries
0,https://bio.tools/LeishInDB
1,https://bio.tools/MDPBiome
2,https://bio.tools/MeroX
3,https://bio.tools/augustus
4,https://bio.tools/checkm
5,https://bio.tools/long-read-tools
6,https://bio.tools/pharmcat
7,https://bio.tools/phaseme
8,https://bio.tools/webaugustus
