## Set up
First you need to install GraphDB locally on you machine

In [None]:
from rdflib import ConjunctiveGraph
from SPARQLWrapper import SPARQLWrapper, JSON
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import numpy as np
from IPython.display import display, HTML


import pandas as pd

def remoteQuery(query, endpoint):
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option("display.max_rows",None,"display.max_colwidth",6000,"display.width",6000,)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
        #return (result['results']['bindings'])
    except Exception as e:
        print(e)


### With a GraphDB SPARQL endpoint

The input for SPARQLWrapper is the link to the GraphDB repository where you loaded the appropriate data set, in this example the dev version of EDAM (https://raw.githubusercontent.com/edamontology/edamontology/main/EDAM_dev.owl) and a bio.agents bioschemas turtle file (https://raw.githubusercontent.com/bio-agents/content/master/datasets/bioschemas-dump.ttl) was loaded in the GrapphDB repository. 


In [None]:
ep_bioagents = SPARQLWrapper("http://localhost:7200/repositories/Project25")
ep_bioagents.setReturnFormat(JSON)

## How many EDAM topics are used to annotate bio.agents?

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT  DISTINCT?topic ?label WHERE {
?x rdf:type <http://schema.org/SoftwareApplication> ;
   <http://schema.org/name> ?name ; 
   <http://schema.org/applicationSubCategory> ?topic .
   ?topic rdfs:label ?label .
} GROUP BY ?topic ?label
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## How many bio.agents entries are annotated with EDAM topics?

In [None]:
q_2= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT  DISTINCT?entries  WHERE {
?entries rdf:type <http://schema.org/SoftwareApplication> ;
   <http://schema.org/name> ?name ; 
   <http://schema.org/applicationSubCategory> ?topic . 
} GROUP BY ?entries
"""

In [None]:
print(f"nb of bioagents entries annotated with EDAM topics: {len(remoteQuery(query=q_2, endpoint=ep_bioagents))}")
results=remoteQuery(query=q_2, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## How many bio.agents entries are annotated with deprecated topics?

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT  DISTINCT?agent WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication> ; 
   <http://schema.org/applicationSubCategory> ?topic.
   ?topic rdfs:label ?label.
?topic rdfs:subClassOf <http://www.w3.org/2002/07/owl#DeprecatedClass>.

} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## How many bio.agents entries are annotated with edam root topic concept?

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT  DISTINCT?agent  WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
   <http://schema.org/name> ?name ; 
   <http://schema.org/applicationSubCategory> <http://edamontology.org/topic_0003> .
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## How many bio.agents entries have EDAM topics and operations?

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT  DISTINCT?agent  WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/featureList> ?operation; 
    <http://schema.org/applicationSubCategory> ?topic  .
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## How many bio.agents entries have EDAM data (for input and/or output) and topics?

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>

SELECT  DISTINCT?agent WHERE {
{
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    bsc:input [<http://schema.org/additionalType> ?data];
    <http://schema.org/applicationSubCategory> ?topic . 
    } UNION {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
   bsc:output [<http://schema.org/additionalType> ?data];
    <http://schema.org/applicationSubCategory> ?topic  .
}
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## How many bio.agents entries have EDAM data (for both input and output) and topics? 

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>

SELECT  DISTINCT ?agent WHERE {
{
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    bsc:input [<http://schema.org/additionalType> ?data].
    } UNION {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
   bsc:output [<http://schema.org/additionalType> ?data].
}
 ?agent <http://schema.org/applicationSubCategory> ?topic  .
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## How many bio.agents entries have EDAM operations and data?

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>

SELECT  DISTINCT?agent WHERE {
{
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    bsc:input [<http://schema.org/additionalType> ?data];
    <http://schema.org/featureList> ?operation . 
    } UNION {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
   bsc:output [<http://schema.org/additionalType> ?data];
    <http://schema.org/featureList> ?operation  .
}
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## Operation + data + format

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>
SELECT  DISTINCT?agent  WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/featureList> ?operation;
    {
?agent bsc:input [<http://schema.org/additionalType> ?data].
    } UNION {
?agent bsc:output [<http://schema.org/additionalType> ?data].
}
{
?agent bsc:input [<http://schema.org/encodingFormat> ?format].
    } UNION {
?agent bsc:output [<http://schema.org/encodingFormat> ?format].
}
    
FILTER NOT EXISTS {
 ?agent <http://schema.org/applicationSubCategory> ?topic .
    }
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## Operation + data

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>
SELECT  DISTINCT?agent  WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/featureList> ?operation;
    {
?agent bsc:input [<http://schema.org/additionalType> ?data].
    } UNION {
?agent bsc:output [<http://schema.org/additionalType> ?data].
}
    
FILTER NOT EXISTS {
{
?agent bsc:input [<http://schema.org/encodingFormat> ?format].
    } UNION {
?agent bsc:output [<http://schema.org/encodingFormat> ?format].
}
    }
FILTER NOT EXISTS {
 ?agent <http://schema.org/applicationSubCategory> ?topic }
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## Operation

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>
SELECT  DISTINCT?agent  WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/featureList> ?operation
    
FILTER NOT EXISTS {
 ?agent <http://schema.org/applicationSubCategory> ?topic 
}
FILTER NOT EXISTS {        {
?agent bsc:input [<http://schema.org/additionalType> ?data].
    } UNION {
?agent bsc:output [<http://schema.org/additionalType> ?data].
}}
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## Operation + data + format + topic

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>
SELECT  DISTINCT?agent  WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/featureList> ?operation ;
    <http://schema.org/applicationSubCategory> ?topic
    {
?agent bsc:input [<http://schema.org/encodingFormat> ?format].
    } UNION {
?agent bsc:output [<http://schema.org/encodingFormat> ?format].
}      {
?agent bsc:input [<http://schema.org/additionalType> ?data].
    } UNION {
?agent bsc:output [<http://schema.org/additionalType> ?data].
}
    
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## Operation + data + topic

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>
SELECT  DISTINCT?agent  WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/featureList> ?operation;
    <http://schema.org/applicationSubCategory> ?topic 
    {
?agent bsc:input [<http://schema.org/additionalType> ?data].
    } UNION {
?agent bsc:output [<http://schema.org/additionalType> ?data].
}

    
FILTER NOT EXISTS {
 {
?agent bsc:input [<http://schema.org/encodingFormat> ?format].
    } UNION {
?agent bsc:output [<http://schema.org/encodingFormat> ?format].
}
    }
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))

## Operation + topic

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>
SELECT  DISTINCT?agent  WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/featureList> ?operation;
    <http://schema.org/applicationSubCategory> ?topic 


    
FILTER NOT EXISTS {
    {
?agent bsc:input [<http://schema.org/additionalType> ?data].
    } UNION {
?agent bsc:output [<http://schema.org/additionalType> ?data].
}
    }
    
FILTER NOT EXISTS {
 {
?agent bsc:input [<http://schema.org/encodingFormat> ?format].
    } UNION {
?agent bsc:output [<http://schema.org/encodingFormat> ?format].
}
    }
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))
results.to_csv("~\OneDrive\Dokumenter\\bioagents\github\\biohackathon2022\\notebooks\\operation+topic.csv", index=False, header=False)

## Topic

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>
SELECT  DISTINCT?agent  WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication> ;
    <http://schema.org/applicationSubCategory> ?topic
    
FILTER NOT EXISTS {
 ?agent <http://schema.org/featureList> ?operation}

} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))
results.to_csv("~\OneDrive\Dokumenter\\bioagents\github\\biohackathon2022\\notebooks\\topic.csv", index=False, header=False)

## None

In [None]:
q= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsc: <http://bioschemas.org/>
SELECT  DISTINCT?agent  WHERE {
?agent rdf:type <http://schema.org/SoftwareApplication>.
    
FILTER NOT EXISTS {
 ?agent <http://schema.org/featureList> ?operation}

FILTER NOT EXISTS { 
?agent <http://schema.org/applicationSubCategory> ?topic}
} GROUP BY ?agent
"""

In [None]:
print(f"nb of EDAM topics used in bio.agents: {len(remoteQuery(query=q, endpoint=ep_bioagents))}")
results=remoteQuery(query=q, endpoint=ep_bioagents)
display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" +
             results.to_html() +
             "</div>"))
results.to_csv("~\OneDrive\Dokumenter\\bioagents\github\\biohackathon2022\\notebooks\\none.csv", index=False, header=False)