## Set up
First you need to install GraphDB locally on you machine, create a repo where you load the needed data


In [1]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON

import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",5000,"display.width",5000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)



### With a GraphDB SPARQL endpoint

ep_no_inference points to a SPARQL end point repository in Graphdb where the automatic inferences were deactivated

The input for SPARQLWrapper is the link to the GraphDB repository where you loaded the appropriate data set, in this example the dev version of EDAM (https://raw.githubusercontent.com/edamontology/edamontology/main/EDAM_dev.owl) was loaded in the GrapphDB repository. 

You can find the url of you repository in GraphDB by going into the "Setup" tab on the left menu, then click on "Repositories". You can get the link by clicking on the "link logo".

In [2]:
ep_edam = SPARQLWrapper("http://llamothe-HP-EliteBook-x360-1040-G8-Notebook-PC:7200/repositories/EDAM")
#ep_edam = SPARQLWrapper("http://link/to/your/local/graphdb/repo") 
ep_edam.setReturnFormat(JSON)

In [None]:
ep_no_inference = SPARQLWrapper("http://llamothe-HP-EliteBook-x360-1040-G8-Notebook-PC:7200/repositories/EDAM_no_inference")
ep_no_inference.setReturnFormat(JSON)

## query to get format without the *is_format_of* property

In [3]:
q = """
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?entity ?label ?property WHERE
{
  
    ?entity rdfs:subClassOf+ edam:format_1915 .
    ?entity rdfs:label ?label .

     VALUES ?property { edam:is_format_of               
                        }
    FILTER NOT EXISTS {    
        ?entity rdfs:subClassOf ?restriction . 
        ?restriction rdf:type owl:Restriction ; 
                owl:onProperty ?property  ; 
                owl:someValuesFrom ?data.}

}ORDER BY ?entity
    
    
"""

- With inferences:

In [4]:
print(f"nb formats missing is_format_of property: {len(remoteQuery(query=q, endpoint=ep_edam))}")
remoteQuery(query=q, endpoint=ep_edam)

nb formats missing is_format_of property: 85


Unnamed: 0,entity,label,property
0,http://edamontology.org/format_1295,quicktandem,http://edamontology.org/is_format_of
1,http://edamontology.org/format_1296,Sanger inverted repeats,http://edamontology.org/is_format_of
2,http://edamontology.org/format_1297,EMBOSS repeat,http://edamontology.org/is_format_of
3,http://edamontology.org/format_1318,restrict format,http://edamontology.org/is_format_of
4,http://edamontology.org/format_1319,restover format,http://edamontology.org/is_format_of
5,http://edamontology.org/format_1320,REBASE restriction sites,http://edamontology.org/is_format_of
6,http://edamontology.org/format_1454,dssp,http://edamontology.org/is_format_of
7,http://edamontology.org/format_1455,hssp,http://edamontology.org/is_format_of
8,http://edamontology.org/format_1627,Primer3 primer,http://edamontology.org/is_format_of
9,http://edamontology.org/format_1665,Taverna workflow format,http://edamontology.org/is_format_of


- Without inferences:

In [None]:
print(f"nb formats missing is_format_of property: {len(remoteQuery(query=q, endpoint=ep_no_inference))}")
remoteQuery(query=q, endpoint=ep_no_inference)

nb formats missing is_format_of property: 528


Unnamed: 0,entity,label,property
0,http://edamontology.org/format_1196,SMILES,http://edamontology.org/is_format_of
1,http://edamontology.org/format_1197,InChI,http://edamontology.org/is_format_of
2,http://edamontology.org/format_1198,mf,http://edamontology.org/is_format_of
3,http://edamontology.org/format_1199,InChIKey,http://edamontology.org/is_format_of
4,http://edamontology.org/format_1200,smarts,http://edamontology.org/is_format_of
5,http://edamontology.org/format_1206,unambiguous pure,http://edamontology.org/is_format_of
6,http://edamontology.org/format_1207,nucleotide,http://edamontology.org/is_format_of
7,http://edamontology.org/format_1208,protein,http://edamontology.org/is_format_of
8,http://edamontology.org/format_1209,consensus,http://edamontology.org/is_format_of
9,http://edamontology.org/format_1210,pure nucleotide,http://edamontology.org/is_format_of


##  query to get data and operantion without *has_topic* property

In [None]:
q = """
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?entity ?label ?property WHERE
{
  
    {?entity rdfs:subClassOf+ edam:data_0006 .}
    UNION
    {?entity rdfs:subClassOf+ edam:operation_0004 .}

    ?entity rdfs:label ?label .

     VALUES ?property { edam:has_topic               
                        }
    FILTER NOT EXISTS {    
        ?entity rdfs:subClassOf ?restriction . 
        ?restriction rdf:type owl:Restriction ; 
                owl:onProperty ?property  ; 
                owl:someValuesFrom ?topic.}

}ORDER BY ?entity
    
    
"""

In [None]:
print(f"nb formats missing is_format_of property: {len(remoteQuery(query=q, endpoint=ep_edam))}")
remoteQuery(query=q, endpoint=ep_edam)

nb formats missing is_format_of property: 866


Unnamed: 0,entity,label,property
0,http://edamontology.org/data_0842,Identifier,http://edamontology.org/has_topic
1,http://edamontology.org/data_0844,Molecular mass,http://edamontology.org/has_topic
2,http://edamontology.org/data_0845,Molecular charge,http://edamontology.org/has_topic
3,http://edamontology.org/data_0846,Chemical formula,http://edamontology.org/has_topic
4,http://edamontology.org/data_0847,QSAR descriptor,http://edamontology.org/has_topic
5,http://edamontology.org/data_0850,Sequence set,http://edamontology.org/has_topic
6,http://edamontology.org/data_0856,Sequence feature source,http://edamontology.org/has_topic
7,http://edamontology.org/data_0857,Sequence search results,http://edamontology.org/has_topic
8,http://edamontology.org/data_0862,Dotplot,http://edamontology.org/has_topic
9,http://edamontology.org/data_0865,Sequence similarity score,http://edamontology.org/has_topic


## query to get all *has_topic* property

In [None]:
q= """
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT ?entity ?label ?topic ?tlabel
WHERE {
    ?entity rdfs:subClassOf ?restriction . 
    ?restriction rdf:type owl:Restriction ; 
            owl:onProperty edam:has_topic  ; 
            owl:someValuesFrom ?topic.
    ?entity rdfs:label ?label .
    ?topic rdfs:label ?tlabel
    }
"""

In [None]:
print(f"nb data/operation with has_topic property: {len(remoteQuery(query=q, endpoint=ep_edam))}")
remoteQuery(query=q, endpoint=ep_edam)

nb data/operation with has_topic property: 1208


Unnamed: 0,entity,label,topic,tlabel
0,http://edamontology.org/data_0582,Ontology,http://edamontology.org/topic_0089,Ontology and terminology
1,http://edamontology.org/data_0858,Sequence signature matches,http://edamontology.org/topic_0160,"Sequence sites, features and motifs"
2,http://edamontology.org/data_0860,Sequence signature data,http://edamontology.org/topic_0160,"Sequence sites, features and motifs"
3,http://edamontology.org/data_1353,Sequence motif,http://edamontology.org/topic_0160,"Sequence sites, features and motifs"
4,http://edamontology.org/data_1354,Sequence profile,http://edamontology.org/topic_0160,"Sequence sites, features and motifs"
5,http://edamontology.org/data_1361,Position frequency matrix,http://edamontology.org/topic_0160,"Sequence sites, features and motifs"
6,http://edamontology.org/data_2854,Position-specific scoring matrix,http://edamontology.org/topic_0160,"Sequence sites, features and motifs"
7,http://edamontology.org/data_1362,Position weight matrix,http://edamontology.org/topic_0160,"Sequence sites, features and motifs"
8,http://edamontology.org/data_1363,Information content matrix,http://edamontology.org/topic_0160,"Sequence sites, features and motifs"
9,http://edamontology.org/data_1365,Fingerprint,http://edamontology.org/topic_0160,"Sequence sites, features and motifs"
