# UniProt can also be accessed programmatically

    -directly, see: https://www.uniprot.org/help/programmatic_access 
    -via the python Bioservices package: https://bioservices.readthedocs.io/en/master/
    
Here below some examples using bioservices

In [2]:
from bioservices.uniprot import UniProt
u = UniProt(verbose=False)

## Retrieve

In [None]:
# retrieve information about an uniprot-id - the default format is xml
res = u.retrieve("P43403")
print(res)

In [None]:
# retrieve information about an uniprot-id in txt format
res = u.retrieve("P43403", frmt="txt")
print(res)

In [8]:
print(res.split())

<class 'str'>


In [14]:
# retrieve the sequence of an uniprot-id in fasta format
#res = u.retrieve("P43403", frmt="fasta")
print(res.split('NCBI_TaxID=')[1].split(';')[0])

9606


In [None]:
# retrieve a list of fasta formats
res = u.retrieve(["P43403"], frmt="fasta")
res

# Search

In [None]:
# search for the term zap70 on uniprot and return as a "list" of uniprot-ids
res = u.search("zap70", frmt="list")
type(res)

In [None]:
array = res.split("\n") # convert the "list" into an python list

In [None]:
len(array) # notice that the last position of the list is an empty string 
array

In [None]:
# another example of search returning a "list" of ids
res = u.search("zap70+AND+human", frmt="list")
print(res)

In [None]:
# search of ZAP70_HUMAN or CBL_HUMAN and return as a tabular format with the 
# columns "entry name", "length", "id" and "genes"
res = u.search("ZAP70_HUMAN+or+CBL_HUMAN", frmt="tab", columns="entry name, length, id, genes")
print(res)

In [None]:
# search of zap70 and organism 9606 output as a tabular format with the 
# columns "entry name", "length", "id" and "genes". Return only the first 25 results.
res = u.search("zap70 AND organism:\"Homo sapiens (Human) [9606]\"", frmt="tab", limit=25, 
columns="entry name,length,id, genes, organism")
print(res)
##  To do advanced searchs like that go to "http://www.uniprot.org" click on Advanced,
##  select the options of search you want. It will return a search string copy and paste that string.

## ID mapping

### ID mapping directly via UniProt: 

In [None]:
import urllib
from urllib.request import urlopen, Request

# check instructions on https://www.uniprot.org/help/api_idmapping

url = 'https://www.uniprot.org/uploadlists/'

params = {
'from':'GENENAME',
'to':'ACC',
'format':'tab',
'query':'C4A'
}

data = urllib.parse.urlencode(params).encode("utf-8")
request = Request(url, data)
request.add_header('User-Agent', 'Python %s')
response = urllib.request.urlopen(request)
page = response.read(200000)
print (page)

In [52]:
p1 = str(page).strip().split('_HUMAN')
print(p1[1])
#p2 = p1.split('C4A')
#print(p2[::-1])
#p3 = p2[::-1][1]
#print(p3.replace('\\t',''))

\tunreviewed\tComplement C4-A\tC4A\tHomo sapiens (Human)\t1744\nC4A\tA0A0G2JT52\tA0A0G2JT52_RAT\tunreviewed\tComplement C4A (Fragment)\tC4a\tRattus norvegicus (Rat)\t188\nC4A\tA0A0G2JV52\tA0A0G2JV52_RAT\tunreviewed\tComplement C4A (Fragment)\tC4a\tRattus norvegicus (Rat)\t1374\nC4A\tA0A0N8ESX5\tA0A0N8ESX5_HETGA\tunreviewed\tComplement C4-A isoform 1 preproprotein\tC4A\tHeterocephalus glaber (Naked mole rat)\t1745\nC4A\tA0A140TA32\tA0A140TA32


### ID mapping via bioservices:

NOTE: to know the valid identifiers to search in mapping (ie ACC, KGG_ID, NF50, etc) see http://www.uniprot.org/help/api_idmapping

In [None]:
from bioservices.uniprot import UniProt
u = UniProt(verbose=False)
# here we are going from accession (uniprot-id) to gene-name and the accession is "P43403"
u.search('C4A')
u.mapping("GENENAME", "ACC", query='C4A')

In [None]:
# what is the kegg-id of the protein whose uniprot-id is "P43403"
u.mapping(fr="ACC", to="KEGG_ID", query='P43403')

In [None]:
# what is the biogrid-id from the protein whose uniprot-id is "P43403"
u.mapping(fr="ACC", to="BIOGRID_ID", query='P43403')