## Set up
First you need to install GraphDB locally on you machine


In [2]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON

import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",5000,"display.width",5000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)



### With a GraphDB SPARQL endpoint

ep_no_inference points to a SPARQL end point repository in Graphdb where the automatic inferences were deactivated

In [14]:
ep_biotools = SPARQLWrapper("http://llamothe-HP-EliteBook-x360-1040-G8-Notebook-PC:7200/repositories/biotools")
ep_biotools.setReturnFormat(JSON)

## query to get top 5 operation reprented in bio.tools

In [15]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT  ?operation (COUNT(?operation) as ?count) ?label WHERE {
?x rdf:type <http://schema.org/SoftwareApplication> ;
   <http://schema.org/name> ?name ; 
   <http://schema.org/featureList> ?operation .
   ?operation rdfs:label ?label . 
} GROUP BY ?operation ?label
ORDER BY DESC(?count)
LIMIT 5
"""

In [16]:
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q, endpoint=ep_biotools))}")
remoteQuery(query=q, endpoint=ep_biotools)

nb data/operation with has_topic property: 5


Unnamed: 0,operation,count,label
0,http://edamontology.org/operation_0337,2075,Visualisation
1,http://edamontology.org/operation_3435,1581,Standardisation and normalisation
2,http://edamontology.org/operation_3196,1351,Genotyping
3,http://edamontology.org/operation_2422,1340,Data retrieval
4,http://edamontology.org/operation_2495,1139,Expression analysis


## query to get the top 100 tools and their associated EDAM def and synonyms

In [18]:
q= """
SELECT  * WHERE {
?biotools_id rdf:type <http://schema.org/SoftwareApplication> ;
   <http://schema.org/name> ?name ; 
   <http://schema.org/featureList> ?feature .

?feature  <http://www.geneontology.org/formats/oboInOwl#hasDefinition> ?def ; 
         <http://www.geneontology.org/formats/oboInOwl#hasExactSynonym> ?syn .
} limit 100
"""
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q, endpoint=ep_biotools))}")
remoteQuery(query=q, endpoint=ep_biotools)

nb data/operation with has_topic property: 100


Unnamed: 0,biotools_id,name,feature,def,syn
0,https://bio.tools/-CNN,-CNN,http://edamontology.org/operation_3215,"Identify peaks in a spectrum from a mass spectrometry, NMR, or some other spectrum-generating experiment.",Peak assignment
1,https://bio.tools/-CNN,-CNN,http://edamontology.org/operation_3215,"Identify peaks in a spectrum from a mass spectrometry, NMR, or some other spectrum-generating experiment.",Peak finding
2,https://bio.tools/-CNN,-CNN,http://edamontology.org/operation_3222,Identify putative protein-binding regions in a genome sequence from analysis of Chip-sequencing data or ChIP-on-chip data.,Protein binding peak detection
3,https://bio.tools/-TRIS,-TRIS,http://edamontology.org/operation_2422,"Retrieve an entry (or part of an entry) from a data resource that matches a supplied query. This might include some primary data and annotation. The query is a data identifier or other indexed term. For example, retrieve a sequence record with the specified accession number, or matching supplied keywords.",Data extraction
4,https://bio.tools/-TRIS,-TRIS,http://edamontology.org/operation_2422,"Retrieve an entry (or part of an entry) from a data resource that matches a supplied query. This might include some primary data and annotation. The query is a data identifier or other indexed term. For example, retrieve a sequence record with the specified accession number, or matching supplied keywords.",Retrieval
5,https://bio.tools/-TRIS,-TRIS,http://edamontology.org/operation_3198,Align short oligonucleotide sequences (reads) to a larger (genomic) sequence.,Oligonucleotide alignment
6,https://bio.tools/-TRIS,-TRIS,http://edamontology.org/operation_3198,Align short oligonucleotide sequences (reads) to a larger (genomic) sequence.,Oligonucleotide alignment construction
7,https://bio.tools/-TRIS,-TRIS,http://edamontology.org/operation_3198,Align short oligonucleotide sequences (reads) to a larger (genomic) sequence.,Oligonucleotide alignment generation
8,https://bio.tools/-TRIS,-TRIS,http://edamontology.org/operation_3198,Align short oligonucleotide sequences (reads) to a larger (genomic) sequence.,Oligonucleotide mapping
9,https://bio.tools/-TRIS,-TRIS,http://edamontology.org/operation_3198,Align short oligonucleotide sequences (reads) to a larger (genomic) sequence.,Read alignment
