## BioServices introduction

**Bioservices** is a Python package that provides access to many Bioinformatices Web Services (e.g.,
UniProt) and a framework to easily implement Web Services wrappers (based on 
WSDL/SOAP or REST protocols).


In [1]:
from bioservices import *
import bioservices
%pylab inline

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


### uniprot

In [2]:
u = UniProt()

In [3]:
u.get_fasta("P43403")

'>sp|P43403|ZAP70_HUMAN Tyrosine-protein kinase ZAP-70 OS=Homo sapiens OX=9606 GN=ZAP70 PE=1 SV=1\nMPDPAAHLPFFYGSISRAEAEEHLKLAGMADGLFLLRQCLRSLGGYVLSLVHDVRFHHFP\nIERQLNGTYAIAGGKAHCGPAELCEFYSRDPDGLPCNLRKPCNRPSGLEPQPGVFDCLRD\nAMVRDYVRQTWKLEGEALEQAIISQAPQVEKLIATTAHERMPWYHSSLTREEAERKLYSG\nAQTDGKFLLRPRKEQGTYALSLIYGKTVYHYLISQDKAGKYCIPEGTKFDTLWQLVEYLK\nLKADGLIYCLKEACPNSSASNASGAAAPTLPAHPSTLTHPQRRIDTLNSDGYTPEPARIT\nSPDKPRPMPMDTSVYESPYSDPEELKDKKLFLKRDNLLIADIELGCGNFGSVRQGVYRMR\nKKQIDVAIKVLKQGTEKADTEEMMREAQIMHQLDNPYIVRLIGVCQAEALMLVMEMAGGG\nPLHKFLVGKREEIPVSNVAELLHQVSMGMKYLEEKNFVHRDLAARNVLLVNRHYAKISDF\nGLSKALGADDSYYTARSAGKWPLKWYAPECINFRKFSSRSDVWSYGVTMWEALSYGQKPY\nKKMKGPEVMAFIEQGKRMECPPECPPELYALMSDCWIYKWEDRPDFLTVEQRMRACYYSL\nASKVEGPPGSTQKAEAACA\n'

In [8]:
print(u.search("ZAP70+AND+taxonomy_id:9606", limit=3))

Entry	Entry Name	Reviewed	Protein names	Gene Names	Organism	Length
P22681	CBL_HUMAN	reviewed	E3 ubiquitin-protein ligase CBL, EC 2.3.2.27 (Casitas B-lineage lymphoma proto-oncogene) (Proto-oncogene c-Cbl) (RING finger protein 55) (RING-type E3 ubiquitin transferase CBL) (Signal transduction protein CBL)	CBL CBL2 RNF55	Homo sapiens (Human)	906
P20963	CD3Z_HUMAN	reviewed	T-cell surface glycoprotein CD3 zeta chain (T-cell receptor T3 zeta chain) (CD antigen CD247)	CD247 CD3Z T3Z TCRZ	Homo sapiens (Human)	164
Q96P31	FCRL3_HUMAN	reviewed	Fc receptor-like protein 3, FcR-like protein 3, FcRL3 (Fc receptor homolog 3, FcRH3) (IFGP family protein 3, hIFGP3) (Immune receptor translocation-associated protein 3) (SH2 domain-containing phosphatase anchor protein 2) (CD antigen CD307c)	FCRL3 FCRH3 IFGP3 IRTA3 SPAP2	Homo sapiens (Human)	734



In [9]:
df = u.get_df("P43403")



In [11]:
df

Unnamed: 0,Entry,Entry Name,Gene Names,Gene Names (primary),Gene Names (synonym),Gene Names (ordered locus),Gene Names (ORF),Organism,Organism (ID),Protein names,...,Glycosylation,Initiator methionine,Lipidation,Modified residue,Peptide,Post-translational modification,Propeptide,Signal peptide,Transit peptide,PDB
0,P43403,ZAP70_HUMAN,ZAP70 SRK,ZAP70,SRK,,,Homo sapiens (Human),9606,"Tyrosine-protein kinase ZAP-70, EC 2.7.10.2 (7...",...,,,,"MOD_RES 248; /note=""Phosphotyrosine""; /evidenc...",,PTM: Phosphorylated on tyrosine residues upon ...,,,,1FBV;1M61;1U59;2CBL;2OQ1;2OZO;2Y1N;3ZNI;4A4B;4...


### Ensembl

In [12]:
s = Ensembl()



In [13]:
res = s.get_lookup_by_id('ENSG00000157764', expand=True)
res.keys()


dict_keys(['logic_name', 'id', 'description', 'start', 'source', 'strand', 'end', 'Transcript', 'species', 'db_type', 'version', 'canonical_transcript', 'seq_region_name', 'display_name', 'biotype', 'assembly_name', 'object_type'])

### WikiPathway

In [14]:
s = WikiPathways()

[32mINFO    [bioservices.WikiPathways:363]: [0m [32mInitialising WikiPathways service (REST)[0m
[32mINFO    [bioservices.WikiPathways:78]: [0m [32mFetching organisms...[0m


In [15]:
im = s.getColoredPathway("WP1471")

In [16]:
from IPython.display import SVG

In [18]:
SVG(im)

{'data': 'getColoredPathway service is currently not available'}

### ChEMBL

In [19]:
s = ChEMBL()



In [20]:
res = s.get_compounds_by_chemblId(['CHEMBL%s' % i for i in range(0,1000)])

AttributeError: 'ChEMBL' object has no attribute 'get_compounds_by_chemblId'

In [None]:
res[1]

In [None]:
# here this look a bit esoteric but what we do if to remove the 404 numbers
# and transform the data into a nice dataframe for easy plotting of some 
# relevant information 
import pandas as pd
df = pd.DataFrame(
    [dict(this['compound']) for this in res if this!=404], 
    index=[this['compound']['chemblId'] for this in res if this!=404])

df.plot(x='molecularWeight', y='alogp', marker='o', kind='scatter', 
        fontsize=20)

### KEGG

In [None]:
k = KEGG(verbose=True)

In [None]:
k.lookfor_pathway("B cell")

In [None]:
k.show_pathway("path:hsa04662")