## Set up

In [6]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import numpy as np


import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",6000,"display.width",6000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)


In [7]:
ep_biotools = SPARQLWrapper("http://localhost:7200/repositories/Project25")
ep_biotools.setReturnFormat(JSON)

## How many EDAM data are there in EDAM?

In [8]:
q= """
PREFIX edam:<http://edamontology.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>

PREFIX sc: <http://schema.org/>
SELECT (COUNT(DISTINCT ?data) as ?count) WHERE {
    ?data rdfs:subClassOf <http://edamontology.org/data_0006>
}
"""
print(f"Total number of EDAM data: {len(remoteQuery(query=q, endpoint=ep_biotools))}")
remoteQuery(query=q, endpoint=ep_biotools)

Total number of EDAM data: 1


Unnamed: 0,count
0,947


## How many EDAM data are used to annotate bio.tools?

In [9]:
q= """
PREFIX edam:<http://edamontology.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>

PREFIX sc: <http://schema.org/>
SELECT (COUNT(DISTINCT ?data) as ?count) WHERE {
    {
?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   bsc:input [sc:additionalType ?data].        
    } UNION {
?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   bsc:output [sc:additionalType ?data].        
    }
}
"""
print(f"Total number of EDAM data used to annotate bio.tools: {len(remoteQuery(query=q, endpoint=ep_biotools))}")
remoteQuery(query=q, endpoint=ep_biotools)

Total number of EDAM data used to annotate bio.tools: 1


Unnamed: 0,count
0,559


## How many bio.tools entries are annotated with EDAM data?

In [10]:
q= """
PREFIX edam:<http://edamontology.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>

PREFIX sc: <http://schema.org/>
SELECT (COUNT(DISTINCT ?biotools_id) as ?count) WHERE {
    {
?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   bsc:input [sc:additionalType ?data].        
    } UNION {
?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   bsc:output [sc:additionalType ?data].        
    }
}
"""
print(f"Total number of bio.tools entries used to annotate EDAM data: {len(remoteQuery(query=q, endpoint=ep_biotools))}")
remoteQuery(query=q, endpoint=ep_biotools)

Total number of bio.tools entries used to annotate EDAM data: 1


Unnamed: 0,count
0,3284


## Which EDAM data are used for bio.tools annotation?

In [13]:
q= """
PREFIX edam:<http://edamontology.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>

PREFIX sc: <http://schema.org/>
SELECT DISTINCT ?data WHERE {
    {
?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   bsc:input [sc:additionalType ?data].        
    } UNION {
?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   bsc:output [sc:additionalType ?data].        
    }
}

ORDER BY ASC(?data)
"""
print(f"EDAM data used to annotate bio.tools:")
remoteQuery(query=q, endpoint=ep_biotools)

EDAM data used to annotate bio.tools:


Unnamed: 0,data
0,http://edamontology.org/data_0006
1,http://edamontology.org/data_0581
2,http://edamontology.org/data_0582
3,http://edamontology.org/data_0842
4,http://edamontology.org/data_0844
5,http://edamontology.org/data_0846
6,http://edamontology.org/data_0848
7,http://edamontology.org/data_0849
8,http://edamontology.org/data_0850
9,http://edamontology.org/data_0857
