## Set up
First you need to install GraphDB locally on you machine


In [30]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import numpy as np
from IPython.display import display, HTML


import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",6000,"display.width",6000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)


In [31]:
ep_biotools = SPARQLWrapper("http://localhost:7200/repositories/Project25")
ep_biotools.setReturnFormat(JSON)

## List all EDAM formats with _is_format_of_ relations (transitive)

In [32]:
q= """
PREFIX biotools: <https://bio.tools/ontology/>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX edam: <http://edamontology.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX sc: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT
    ?format ?format_label
    (COUNT(?is_format_of_data) as ?count)
    #?is_format_of_data ?is_format_of_data_label
WHERE
{
    ?format
        rdfs:subClassOf [owl:onProperty edam:is_format_of ; owl:someValuesFrom ?is_format_of_data] ;
        rdfs:label ?format_label .
    ?is_format_of_data rdfs:label ?is_format_of_data_label .
} GROUP BY ?format ?format_label
ORDER BY DESC(?count) ASC(?format) ASC(?is_format_of_data)
"""


In [33]:
results1=remoteQuery(query=q, endpoint=ep_biotools)
print(f"nb of edam format with is_format_of relation: {len(results1)}")

display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results1.to_html() +
             "</div>"))

nb of edam format with is_format_of relation: 533


Unnamed: 0,format,format_label,count
0,http://edamontology.org/format_2352,BioXSD (XML),9
1,http://edamontology.org/format_3772,BioJSON (BioXSD),9
2,http://edamontology.org/format_3773,BioYAML,9
3,http://edamontology.org/format_2572,BAM,4
4,http://edamontology.org/format_2573,SAM,4
5,http://edamontology.org/format_3007,PSL,4
6,http://edamontology.org/format_3771,UniProtKB RDF,4
7,http://edamontology.org/format_3774,BioJSON (Jalview),4
8,http://edamontology.org/format_3777,MCPD,4
9,http://edamontology.org/format_3826,proBAM,4


## List all EDAM formats with has_topic relations (transitive)

In [35]:
q= """
PREFIX biotools: <https://bio.tools/ontology/>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX edam: <http://edamontology.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX sc: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT
    ?format ?format_label
    (COUNT(?is_format_of_data) as ?count)
    #?is_format_of_data ?is_format_of_data_label
WHERE
{
    ?format
        rdfs:subClassOf [owl:onProperty edam:has_topic ; owl:someValuesFrom ?is_format_of_data] ;
        rdfs:label ?format_label .
    ?is_format_of_data rdfs:label ?is_format_of_data_label .
} GROUP BY ?format ?format_label
ORDER BY DESC(?count) ASC(?format) ASC(?is_format_of_data)
"""


In [36]:
results1=remoteQuery(query=q, endpoint=ep_biotools)
print(f"nb of edam format with is_format_of relation: {len(results1)}")

display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results1.to_html() +
             "</div>"))

nb of edam format with is_format_of relation: 618


Unnamed: 0,format,format_label,count
0,http://edamontology.org/operation_0269,Transmembrane protein prediction,8
1,http://edamontology.org/operation_0245,Structural motif discovery,7
2,http://edamontology.org/operation_0246,Protein domain recognition,7
3,http://edamontology.org/operation_0267,Protein secondary structure prediction,7
4,http://edamontology.org/operation_0268,Protein super-secondary structure prediction,7
5,http://edamontology.org/operation_0390,Protein peeling,7
6,http://edamontology.org/operation_0468,Protein secondary structure prediction (helices),7
7,http://edamontology.org/operation_0469,Protein secondary structure prediction (turns),7
8,http://edamontology.org/operation_0470,Protein secondary structure prediction (coils),7
9,http://edamontology.org/operation_2464,Protein-protein binding site prediction,7
