## Set up
First you need to install GraphDB locally on you machine


In [5]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import numpy as np
from IPython.display import display, HTML


import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",6000,"display.width",6000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)


In [6]:
ep_biotools = SPARQLWrapper("http://localhost:7200/repositories/Project25")
ep_biotools.setReturnFormat(JSON)

## List all EDAM entities with relations (transitive)

You can change the relation in the query in   `VALUES ?relation { xxxxxxx }` to : has_topic, has_output, has_input, is_format_of

In [32]:
q= """
PREFIX biotools: <https://bio.tools/ontology/>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX edam: <http://edamontology.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX sc: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT
    ?entity ?label
    (COUNT(?target) as ?count)
WHERE
{
    VALUES ?relation { edam:has_topic }
    ?entity
        rdfs:subClassOf [owl:onProperty ?relation ; owl:someValuesFrom ?target] ;
        rdfs:label ?label .
    ?target rdfs:label ?target_label .
} GROUP BY ?entity ?label
ORDER BY DESC(?count) ASC(?entity) ASC(?target)
"""


In [35]:
results1=remoteQuery(query=q, endpoint=ep_biotools)
print(f"nb of concepts with relations: {len(results1)}")

display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results1.to_html() +
             "</div>"))

nb of concepts with relations: 618


Unnamed: 0,entity,label,count
0,http://edamontology.org/operation_0269,Transmembrane protein prediction,8
1,http://edamontology.org/operation_0245,Structural motif discovery,7
2,http://edamontology.org/operation_0246,Protein domain recognition,7
3,http://edamontology.org/operation_0267,Protein secondary structure prediction,7
4,http://edamontology.org/operation_0268,Protein super-secondary structure prediction,7
5,http://edamontology.org/operation_0390,Protein peeling,7
6,http://edamontology.org/operation_0468,Protein secondary structure prediction (helices),7
7,http://edamontology.org/operation_0469,Protein secondary structure prediction (turns),7
8,http://edamontology.org/operation_0470,Protein secondary structure prediction (coils),7
9,http://edamontology.org/operation_2464,Protein-protein binding site prediction,7


## EDAM Relations consistent with bio.tools entries

### 1) has_topic relation between operation and topic 

In [36]:
q2= """
PREFIX biotools: <https://bio.tools/ontology/>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX edam: <http://edamontology.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX sc: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT
?operation ?operation_label ?topic ?topic_label (COUNT(?tool) as ?tool_count)
WHERE
{
    ?operation rdfs:subClassOf+ edam:operation_0004;
               rdfs:label ?operation_label;
               rdfs:subClassOf [owl:onProperty edam:has_topic ; owl:someValuesFrom ?topic] .
    ?topic rdfs:label ?topic_label.

?tool rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/applicationSubCategory> ?topic;
    <http://schema.org/featureList> ?operation.

}GROUP BY ?operation ?operation_label ?topic ?topic_label 
ORDER BY DESC(?tool_count) ASC(?operation)
"""


In [37]:
results2=remoteQuery(query=q2, endpoint=ep_biotools)
print(f"nb of edam format with is_format_of relation: {len(results2)}")

display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results2.to_html() +
             "</div>"))

nb of edam format with is_format_of relation: 751


Unnamed: 0,operation,operation_label,topic,topic_label,tool_count
0,http://edamontology.org/operation_2495,Expression analysis,http://edamontology.org/topic_0203,Gene expression,542
1,http://edamontology.org/operation_3443,Image analysis,http://edamontology.org/topic_3382,Imaging,461
2,http://edamontology.org/operation_2492,Protein interaction prediction,http://edamontology.org/topic_0128,Protein interactions,450
3,http://edamontology.org/operation_0310,Sequence assembly,http://edamontology.org/topic_0196,Sequence assembly,403
4,http://edamontology.org/operation_3223,Differential gene expression profiling,http://edamontology.org/topic_0203,Gene expression,350
5,http://edamontology.org/operation_1781,Gene regulatory network analysis,http://edamontology.org/topic_0602,"Molecular interactions, pathways and networks",340
6,http://edamontology.org/operation_3660,Metabolic network modelling,http://edamontology.org/topic_0602,"Molecular interactions, pathways and networks",322
7,http://edamontology.org/operation_3198,Read mapping,http://edamontology.org/topic_0102,Mapping,316
8,http://edamontology.org/operation_0314,Gene expression profiling,http://edamontology.org/topic_0203,Gene expression,315
9,http://edamontology.org/operation_0308,PCR primer design,http://edamontology.org/topic_0632,Probes and primers,296


In [59]:
q3= """
PREFIX biotools: <https://bio.tools/ontology/>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX edam: <http://edamontology.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX sc: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT
 ?tool 
WHERE
{
?tool rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/applicationSubCategory> ?topic;
    <http://schema.org/featureList> ?operation.
    
    ?operation rdfs:subClassOf+ edam:operation_0004;
               rdfs:label ?operation_label;
               rdfs:subClassOf [owl:onProperty edam:has_topic ; owl:someValuesFrom ?topic] .
    ?topic rdfs:label ?topic_label.

} GROUP BY ?tool
ORDER BY ASC(?tool)


"""

In [62]:
results3=remoteQuery(query=q3, endpoint=ep_biotools)
print(f"nb tools that have at least one operation/topic relation confirmed in edam: {len(results3)}")

display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results3.to_html() +
             "</div>"))

nb tools that have at least one operation/topic relation confirmed in edam: 10321


Unnamed: 0,tool
0,https://bio.tools/1000genomes_assembly_converter
1,https://bio.tools/1000genomes_data_slicer
2,https://bio.tools/16s_classifier
3,https://bio.tools/16spip
4,https://bio.tools/2-kupl
5,https://bio.tools/2d-image-cepstral-analysis
6,https://bio.tools/2d-mh
7,https://bio.tools/2dkd
8,https://bio.tools/2sigfinder
9,https://bio.tools/3DCNN


## a