# BridgeDB example with Interleukin-6 mapping over different databases

In [None]:
import requests
import pandas as pd

In [3]:
def to_df(response, batch=False):
    if batch:
        records = []
        for tup in to_df(response).itertuples():
            if tup[3] != None:
                for mappings in tup[3].split(','):
                    target = mappings.split(':', 1)
                    if len(target) > 1:
                        records.append((tup[1], tup[2], target[1], target[0]))
                    else:
                        records.append((tup[1], tup[2], target[0], target[0]))
        return pd.DataFrame(records, columns = ['original', 'source', 'mapping', 'target'])
        
    return pd.DataFrame([line.split('\t') for line in response.text.split('\n')])

In [9]:
url = "https://webservice.bridgedb.org/"
batch_request = url+"{org}/xrefsBatch/{source}{}"

## Map Uniprot ID

In [64]:
df = pd.DataFrame({0 : ['P05231']})
df

Unnamed: 0,0
0,P05231


In [74]:
query = batch_request.format('?dataSource=En', org='Homo sapiens', source='S')
response = requests.post(query, data=df.to_csv(index=False, header=False))
response = requests.post(batch_request.format('', org='Homo sapiens', source='S'), data = df.to_csv(index=False, header=False))
case1_df = to_df(response, batch=True)
case1_df.loc[case1_df['target'] == 'T']

Unnamed: 0,original,source,mapping,target
0,P05231,Uniprot-TrEMBL,GO:0070102,T
1,P05231,Uniprot-TrEMBL,GO:0097421,T
2,P05231,Uniprot-TrEMBL,GO:0045669,T
3,P05231,Uniprot-TrEMBL,GO:0045944,T
4,P05231,Uniprot-TrEMBL,GO:0010718,T
...,...,...,...,...
162,P05231,Uniprot-TrEMBL,GO:0150077,T
163,P05231,Uniprot-TrEMBL,GO:0070091,T
164,P05231,Uniprot-TrEMBL,GO:0007165,T
170,P05231,Uniprot-TrEMBL,GO:0032733,T


## Map Ensemble ID

In [102]:
df_transcript = pd.DataFrame({0 : ['ENSG00000136244']})
df_transcript

Unnamed: 0,0
0,ENSG00000136244


In [103]:
query = batch_request.format('?dataSource=En', org='Homo sapiens', source='En')
response = requests.post(query, data=df_transcript.to_csv(index=False, header=False))
response = requests.post(batch_request.format('', org='Homo sapiens', source='En'), data = df_transcript.to_csv(index=False, header=False))
case1_df = to_df(response, batch=True)
case1_df.target.value_counts()

T      99
X      24
Pd     12
Q      10
Uc      7
S       7
Ag      5
Il      2
En      1
Om      1
H       1
Hac     1
Wg      1
Wd      1
L       1
Name: target, dtype: int64

In [105]:
# Subset mappings to Uniprot
case1_df.loc[case1_df['target'] == 'S']

Unnamed: 0,original,source,mapping,target
22,ENSG00000136244,Ensembl,B4DNV3,S
24,ENSG00000136244,Ensembl,B5MC14,S
26,ENSG00000136244,Ensembl,Q75MH2,S
30,ENSG00000136244,Ensembl,B5MCZ3,S
35,ENSG00000136244,Ensembl,C9J5B0,S
124,ENSG00000136244,Ensembl,P05231,S
140,ENSG00000136244,Ensembl,B5MC21,S


In [None]:
Br	BRENDA
I	InterPro
Ip	IPI
Ma	MACiE
Np	NCBI Protein
Pd	PDB
Pf	Pfam
Pp	PhosphoSite Protein
Res	RESID
Sma	SMART
Spr	SPRINT
Str	STRING
Sdb	SubstrateDB
Sf	SUPFAM
Sw	SWISS-MODEL
Tt	TTD Target
S	Uniprot-TrEMBL
Sp	Uniprot-SwissProt


Bc	BioCyc
Cc	CCDS
Sn	dbSNP
Ec	EcoGene
Em	EMBL
En	Ensembl
L	NCBI Gene
E	Enzyme Nomenclature
F	FlyBase
G	GenBank
Gw	Gene Wiki
EnAt	Gramene Arabidopsis
Gg	Gramene Genes DB
Gl	Gramene Literature
EnZm	Gramene Maize
EnOj	Gramene Rice
H	HGNC
Hac	HGNC Accession number
Hg	HomoloGene
Ir	IRGSP Gene
Kg	KEGG Genes
Mg	MaizeGDB
M	MGI
Mbm	miRBase mature sequence
Mb	miRBase Sequence
N	NASC Gene
Om	OMIM
Ob	Oryzabase
O	Other
Pgg	PharmGKB Gene
Pl	PlantGDB
Q	RefSeq
Rf	Rfam
R	RGD
Os	Rice Ensembl Gene
D	SGD
Bsu	SubtiWiki
A	TAIR
Ti	TIGR
Tb	TubercuList
Uc	UCSC Genome Browser
U	UniGene
Wn	Wheat gene names
Wg	WikiGenes
W	WormBase
Z	ZFIN