# Using a Jupyter Notebook to perform a nanopublication analysis Coda Databank

Imran Asif ([ORCID:0000-0002-1144-6265](https://orcid.org/0000-0002-1144-6265))  
PhD Student, Computer Science

_Heriot-Watt University, Edinburgh, UK_

__Information:__ I have basic information about how much nanopublications are published on different servers. the following graph shows the total number of nanopublications among different datasets

In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
import time

In [2]:
#Define local DisGeNet SPARQL endpoint and function to run queries over it
dbSparql = SPARQLWrapper("http://localhost:8890/sparql/")
dbSparql.setReturnFormat(JSON)
def queryCoDa(query):
    dbSparql.setQuery(query)
    results = dbSparql.queryAndConvert()
    return results

In [3]:
# Define function to extract count result from JSON SPARQL result set
def extract_count(results):
    """
    Extract the count result from the JSON format
    """
    for result in results["results"]["bindings"]:
        return result["count"]["value"]
    
#Initialise counts dictionary to store count for each dataset in
counts = {}

### Get All Conflicting nanopublications ###

In [4]:

query = """
prefix np: <http://www.nanopub.org/nschema#>
prefix codaNp: <http://www.example.org/>

select distinct ?np1 ?np2 ?s1 ?s2 ?dep1 ?dep2 ?iv1 ?iv2 ?effectsizeVal1 ?effectsizeVal2 where {
  graph ?h {
    ?np1 a np:Nanopublication ;
          np:hasAssertion ?a1 .
  }
  graph ?h2 {
    ?np2 a np:Nanopublication ;
          np:hasAssertion ?a2 .
  }
  graph ?a1 {
    		  ?s1 codaNp:claimsIV ?iv1 ;
           		 codaNp:claimsDV ?dep1 ;
              	 codaNp:has-effect-value ?effectsizeVal1 .
  		   }
  graph ?a2 {
  				?s2 codaNp:claimsIV ?iv2 ;
           		 codaNp:claimsDV ?dep2 ;
              	 codaNp:has-effect-value ?effectsizeVal2 .
  			}
  FILTER (?iv1=?iv2 && ?dep1=?dep2 && ?effectsizeVal1!=?effectsizeVal2)
}"""

start_time = time.time()
results = queryCoDa(query)
print("--- %d records in %s seconds ---" % (len(results["results"]["bindings"]), (time.time() - start_time)))

--- 10000 records in 6.788402795791626 seconds ---


### Get All Assertion with Provenance ###

In [6]:
query = """
prefix np: <http://www.nanopub.org/nschema#>
prefix codaNp: <http://www.example.org/>

select * where {
    graph ?G {
      ?np a np:Nanopublication ;
          np:hasAssertion ?a ;
          np:hasProvenance ?prov .
    }
  graph ?a {?s ?p ?o}
  graph ?prov {?s1 ?p1 ?o1}
}"""

start_time = time.time()
results = queryCoDa(query)
#print(str(len(results)) + " records in ")
#print()
print("--- %d records in %s seconds ---" % (len(results["results"]["bindings"]), (time.time() - start_time)))

--- 10000 records in 5.525863170623779 seconds ---
