### SPARQL with Python
- https://sparqlwrapper.readthedocs.io/en/latest/
- https://github.com/fanavarro/sparql_uniprot/

In [1]:
import sys

In [19]:
from SPARQLWrapper import SPARQLWrapper, JSON, CSV

In [4]:
sparql = SPARQLWrapper('http://sparql.uniprot.org/sparql')

In [5]:
prefixes = '''
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX vg: <http://biohackathon.org/resource/vg#>
PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>
PREFIX uberon: <http://purl.obolibrary.org/obo/uo#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX sp: <http://spinrdf.org/sp#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX sh: <http://www.w3.org/ns/shacl#>
PREFIX schema: <http://schema.org/>
PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX pubmed: <http://rdf.ncbi.nlm.nih.gov/pubmed/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX patent: <http://data.epo.org/linked-data/def/patent/>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX orthodbGroup: <http://purl.orthodb.org/odbgroup/>
PREFIX orthodb: <http://purl.orthodb.org/>
PREFIX orth: <http://purl.org/net/orth#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX np: <http://nextprot.org/rdf#>
PREFIX nextprot: <http://nextprot.org/rdf/entry/>
PREFIX mnx: <https://rdf.metanetx.org/schema/>
PREFIX mnet: <https://rdf.metanetx.org/mnet/>
PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
PREFIX lscr: <http://purl.org/lscr#>
PREFIX lipidmaps: <https://www.lipidmaps.org/rdf/>
PREFIX keywords: <http://purl.uniprot.org/keywords/>
PREFIX insdcschema: <http://ddbj.nig.ac.jp/ontologies/nucleotide/>
PREFIX insdc: <http://identifiers.org/insdc/>
PREFIX identifiers: <http://identifiers.org/>
PREFIX glyconnect: <https://purl.org/glyconnect/>
PREFIX glycan: <http://purl.jp/bio/12/glyco/glycan#>
PREFIX genex: <http://purl.org/genex#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX eunisSpecies: <http://eunis.eea.europa.eu/rdf/species-schema.rdf#>
PREFIX ensembltranscript: <http://rdf.ebi.ac.uk/resource/ensembl.transcript/>
PREFIX ensemblterms: <http://rdf.ebi.ac.uk/terms/ensembl/>
PREFIX ensemblprotein: <http://rdf.ebi.ac.uk/resource/ensembl.protein/>
PREFIX ensemblexon: <http://rdf.ebi.ac.uk/resource/ensembl.exon/>
PREFIX ensembl: <http://rdf.ebi.ac.uk/resource/ensembl/>
PREFIX ec: <http://purl.uniprot.org/enzyme/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX dc: <http://purl.org/dc/terms/>
PREFIX chebislash: <http://purl.obolibrary.org/obo/chebi/>
PREFIX chebihash: <http://purl.obolibrary.org/obo/chebi#>
PREFIX cco: <http://rdf.ebi.ac.uk/terms/chembl#>
PREFIX busco: <http://busco.ezlab.org/schema#>
PREFIX bibo: <http://purl.org/ontology/bibo/>
PREFIX allie: <http://allie.dbcls.jp/>
PREFIX SWISSLIPID: <https://swisslipids.org/rdf/SLM_>
PREFIX GO: <http://purl.obolibrary.org/obo/GO_>
PREFIX ECO: <http://purl.obolibrary.org/obo/ECO_>
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
'''



In [33]:

query = '''
SELECT ?protein ?transcript ?ensprotein ?gene
WHERE 
{
  ?protein rdfs:seeAlso ?transcript .
  ?protein a up:Protein .
  ?protein up:reviewed true .
  ?protein up:organism taxon:9606 .
  ?transcript a up:Transcript_Resource .
  ?transcript up:translatedTo ?ensprotein .
  ?transcript up:transcribedFrom ?gene .
 
}
'''

In [7]:
q=f'''
{prefixes}
{query}
LIMIT 10
'''

In [8]:
# Creamos un objeto del tipo SPARQLWrapper indicando en que
# direccion esta el servicio que recibe consultas en sparql
# y responde a estas.
sparql = SPARQLWrapper('http://sparql.uniprot.org/sparql')

In [10]:
# Especificamos la consulta que queremos hacer en sparql.
sparql.setQuery(q)

In [25]:
output_format = 'csv'

In [26]:
# Indicamos en que formato queremos que nos devuelva
# los resultados de la consulta. Puede ser json, xml,
# rfd, turtle... Simplemente son distintos formatos
# para representar los datos en ficheros de texto.
sparql.setReturnFormat(output_format)

In [24]:
(JSON)

'json'

In [21]:
# Esta es la instruccion que realiza la consulta a
# uniprot. Devuelve un objeto de python que hay que
# tratar.
print ("Ejecutando query")
results = sparql.query()

# Con esto, convertimos el objeto devuelto por
# el servicio al formato que especificamos antes.
# En este caso, json.
print (f"Conviertiendo a {output_format}")
res = results.convert()
print (f"Fin conversion a {output_format}")

Ejecutando query
Conviertiendo a json
Fin conversion a json


In [27]:
# Dentro de la variable results tenemos informacion
# (metadatos) de lo que ha devuelto el servidor de
# uniprot.
print (results.info())

{'date': 'Tue, 27 Jun 2023 03:03:06 GMT', 'server': 'Apache', 'access-control-allow-origin': '*', 'access-control-allow-headers': 'origin, x-requested-with, content-type, X-Release', 'access-control-expose-headers': 'X-Total-Results, X-Release', 'x-release': '2023_02', 'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff', 'vary': 'Negotiate,Accept,Accept-Encoding', 'cache-control': 'public', 'etag': 'W/"2023_02"', 'content-disposition': 'attachment; filename="sparql-C6EF9FCF3BADB3E4B4D89FF8E8D025A5.csv"', 'content-type': 'text/csv;charset=ISO-8859-1', 'content-length': '2365', 'x-powered-by': 'sib.swiss', 'connection': 'close'}


In [None]:
import io

In [31]:
df = pd.read_csv(io.BytesIO(res), sep=",")

In [32]:
df

Unnamed: 0,protein,transcript,ensprotein,gene,rhea,equation,chebi
0,http://purl.uniprot.org/uniprot/Q96HR9,http://rdf.ebi.ac.uk/resource/ensembl.transcri...,http://rdf.ebi.ac.uk/resource/ensembl.protein/...,http://rdf.ebi.ac.uk/resource/ensembl/ENSG0000...,,,
1,http://purl.uniprot.org/uniprot/Q96HR9,http://rdf.ebi.ac.uk/resource/ensembl.transcri...,http://rdf.ebi.ac.uk/resource/ensembl.protein/...,http://rdf.ebi.ac.uk/resource/ensembl/ENSG0000...,,,
2,http://purl.uniprot.org/uniprot/Q96HR9,http://rdf.ebi.ac.uk/resource/ensembl.transcri...,http://rdf.ebi.ac.uk/resource/ensembl.protein/...,http://rdf.ebi.ac.uk/resource/ensembl/ENSG0000...,,,
3,http://purl.uniprot.org/uniprot/Q96HR9,http://rdf.ebi.ac.uk/resource/ensembl.transcri...,http://rdf.ebi.ac.uk/resource/ensembl.protein/...,http://rdf.ebi.ac.uk/resource/ensembl/ENSG0000...,,,
4,http://purl.uniprot.org/uniprot/Q96HR9,http://rdf.ebi.ac.uk/resource/ensembl.transcri...,http://rdf.ebi.ac.uk/resource/ensembl.protein/...,http://rdf.ebi.ac.uk/resource/ensembl/ENSG0000...,,,
5,http://purl.uniprot.org/uniprot/Q96HR9,http://rdf.ebi.ac.uk/resource/ensembl.transcri...,http://rdf.ebi.ac.uk/resource/ensembl.protein/...,http://rdf.ebi.ac.uk/resource/ensembl/ENSG0000...,,,
6,http://purl.uniprot.org/uniprot/Q96HR9,http://rdf.ebi.ac.uk/resource/ensembl.transcri...,http://rdf.ebi.ac.uk/resource/ensembl.protein/...,http://rdf.ebi.ac.uk/resource/ensembl/ENSG0000...,,,
7,http://purl.uniprot.org/uniprot/Q96HR9,http://rdf.ebi.ac.uk/resource/ensembl.transcri...,http://rdf.ebi.ac.uk/resource/ensembl.protein/...,http://rdf.ebi.ac.uk/resource/ensembl/ENSG0000...,,,
8,http://purl.uniprot.org/uniprot/Q9BZ11,http://rdf.ebi.ac.uk/resource/ensembl.transcri...,http://rdf.ebi.ac.uk/resource/ensembl.protein/...,http://rdf.ebi.ac.uk/resource/ensembl/ENSG0000...,,,
9,http://purl.uniprot.org/uniprot/Q9BZ11,http://rdf.ebi.ac.uk/resource/ensembl.transcri...,http://rdf.ebi.ac.uk/resource/ensembl.protein/...,http://rdf.ebi.ac.uk/resource/ensembl/ENSG0000...,,,
