## Set up
First you need to install GraphDB locally on you machine


In [None]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import numpy as np
from IPython.display import display, HTML


import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",6000,"display.width",6000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)


In [None]:
ep_biotools = SPARQLWrapper("http://localhost:7200/repositories/Project25")
ep_biotools.setReturnFormat(JSON)

## List all EDAM entities with relations (transitive)

You can change the relation in the query in   `VALUES ?relation { xxxxxxx }` to : has_topic, has_output, has_input, is_format_of

In [None]:
q= """
PREFIX biotools: <https://bio.tools/ontology/>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX edam: <http://edamontology.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX sc: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT
    ?entity ?label
    (COUNT(?target) as ?count)
WHERE
{
    VALUES ?relation { edam:has_topic }
    ?entity
        rdfs:subClassOf [owl:onProperty ?relation ; owl:someValuesFrom ?target] ;
        rdfs:label ?label .
    ?target rdfs:label ?target_label .
} GROUP BY ?entity ?label
ORDER BY DESC(?count) ASC(?entity) ASC(?target)
"""


In [None]:
results1=remoteQuery(query=q, endpoint=ep_biotools)
print(f"nb of concepts with relations: {len(results1)}")

display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results1.to_html() +
             "</div>"))

## EDAM Relations consistent with bio.tools entries

### 1) has_topic relation between operation and topic 

In [None]:
q2= """
PREFIX biotools: <https://bio.tools/ontology/>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX edam: <http://edamontology.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX sc: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT
?operation ?operation_label ?topic ?topic_label (COUNT(?tool) as ?tool_count)
WHERE
{
    ?operation rdfs:subClassOf+ edam:operation_0004;
               rdfs:label ?operation_label;
               rdfs:subClassOf [owl:onProperty edam:has_topic ; owl:someValuesFrom ?topic] .
    ?topic rdfs:label ?topic_label.

?tool rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/applicationSubCategory> ?topic;
    <http://schema.org/featureList> ?operation.

}GROUP BY ?operation ?operation_label ?topic ?topic_label 
ORDER BY DESC(?tool_count) ASC(?operation)
"""


In [None]:
results2=remoteQuery(query=q2, endpoint=ep_biotools)
print(f"nb of edam format with is_format_of relation: {len(results2)}")

display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results2.to_html() +
             "</div>"))

In [None]:
q3= """
PREFIX biotools: <https://bio.tools/ontology/>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX edam: <http://edamontology.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX sc: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT
 ?tool 
WHERE
{
?tool rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/applicationSubCategory> ?topic;
    <http://schema.org/featureList> ?operation.
    
    ?operation rdfs:subClassOf+ edam:operation_0004;
               rdfs:label ?operation_label;
               rdfs:subClassOf [owl:onProperty edam:has_topic ; owl:someValuesFrom ?topic] .
    ?topic rdfs:label ?topic_label.

} GROUP BY ?tool
ORDER BY ASC(?tool)


"""

In [None]:
results3=remoteQuery(query=q3, endpoint=ep_biotools)
print(f"nb tools that have at least one operation/topic relation confirmed in edam: {len(results3)}")

display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results3.to_html() +
             "</div>"))

## a