## Set up
First you need to install GraphDB locally on you machine, create a repo where you load the needed data


In [None]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON

import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",5000,"display.width",5000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)



### With a GraphDB SPARQL endpoint

ep_no_inference points to a SPARQL end point repository in Graphdb where the automatic inferences were deactivated

The input for SPARQLWrapper is the link to the GraphDB repository where you loaded the appropriate data set, in this example the dev version of edam was loaded in the GrapphDB repository

In [None]:
ep_edam = SPARQLWrapper("http://localhost:7200/repositories/biotools")
ep_edam.setReturnFormat(JSON)

In [None]:
ep_no_inference = SPARQLWrapper("http://localhost:7200/repositories/EDAM_no_inference")
ep_no_inference.setReturnFormat(JSON)

## query to get format without the *is_format_of* property

In [None]:
q = """
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?entity ?label ?property WHERE
{
  
    ?entity rdfs:subClassOf+ edam:format_1915 .
    ?entity rdfs:label ?label .

     VALUES ?property { edam:is_format_of               
                        }
    FILTER NOT EXISTS {    
        ?entity rdfs:subClassOf ?restriction . 
        ?restriction rdf:type owl:Restriction ; 
                owl:onProperty ?property  ; 
                owl:someValuesFrom ?data.}

}ORDER BY ?entity
    
    
"""

- With inferences:

In [None]:
print(f"nb formats missing is_format_of property: {len(remoteQuery(query=q, endpoint=ep_edam))}")
remoteQuery(query=q, endpoint=ep_edam)

- Without inferences:

In [None]:
print(f"nb formats missing is_format_of property: {len(remoteQuery(query=q, endpoint=ep_no_inference))}")
remoteQuery(query=q, endpoint=ep_no_inference)

##  query to get data and operantion without *has_topic* property

In [None]:
q = """
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?entity ?label ?property WHERE
{
  
    {?entity rdfs:subClassOf+ edam:data_0006 .}
    UNION
    {?entity rdfs:subClassOf+ edam:operation_0004 .}

    ?entity rdfs:label ?label .

     VALUES ?property { edam:has_topic               
                        }
    FILTER NOT EXISTS {    
        ?entity rdfs:subClassOf ?restriction . 
        ?restriction rdf:type owl:Restriction ; 
                owl:onProperty ?property  ; 
                owl:someValuesFrom ?topic.}

}ORDER BY ?entity
    
    
"""

In [None]:
print(f"nb formats missing is_format_of property: {len(remoteQuery(query=q, endpoint=ep_edam))}")
remoteQuery(query=q, endpoint=ep_edam)

## query to get all *has_topic* property

In [None]:
q= """
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT ?entity ?label ?topic ?tlabel
WHERE {
    ?entity rdfs:subClassOf ?restriction . 
    ?restriction rdf:type owl:Restriction ; 
            owl:onProperty edam:has_topic  ; 
            owl:someValuesFrom ?topic.
    ?entity rdfs:label ?label .
    ?topic rdfs:label ?tlabel
    }
"""

In [None]:
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q, endpoint=ep_edam))}")
remoteQuery(query=q, endpoint=ep_edam)