In [1]:
from Bio._py3k import urlopen as _urlopen
from Bio._py3k import _binary_to_string_handle


def _q(op, arg1, arg2=None, arg3=None):
    URL = "http://rest.kegg.jp/%s"
    if arg2 and arg3:
        args = "%s/%s/%s/%s" % (op, arg1, arg2, arg3)
    elif arg2:
        args = "%s/%s/%s" % (op, arg1, arg2)
    else:
        args = "%s/%s" % (op, arg1)
    resp = _urlopen(URL % (args))

    if "image" == arg2:
        return resp

    return _binary_to_string_handle(resp)

def kegg_conv(target_db, source_db, option=None):
    if option and option not in ["turtle", "n-triple"]:
        raise Exception("Invalid option arg for kegg conv request.")

    if isinstance(source_db, list):
        source_db = "+".join(source_db)

    if target_db in ["ncbi-gi", "ncbi-geneid", "ncbi-proteinid", "uniprot", "genes"] or \
       source_db in ["ncbi-gi", "ncbi-geneid", "ncbi-proteinid", "uniprot", "genes"] or \
       (target_db in ["drug", "compound", "glycan"] and
           source_db in ["pubchem", "glycan"]) or \
       (target_db in ["pubchem", "glycan"] and
           source_db in ["drug", "compound", "glycan"]):

        if option:
            resp = _q("conv", target_db, source_db, option)
        else:
            resp = _q("conv", target_db, source_db)

        return resp
    else:
        raise Exception("Bad argument target_db or source_db for kegg conv request.")

In [2]:
from Bio.KEGG import REST

In [3]:
paths = REST.kegg_list("pathway").read()

In [4]:
print(paths)

path:map00010	Glycolysis / Gluconeogenesis
path:map00020	Citrate cycle (TCA cycle)
path:map00030	Pentose phosphate pathway
path:map00040	Pentose and glucuronate interconversions
path:map00051	Fructose and mannose metabolism
path:map00052	Galactose metabolism
path:map00053	Ascorbate and aldarate metabolism
path:map00061	Fatty acid biosynthesis
path:map00062	Fatty acid elongation
path:map00071	Fatty acid degradation
path:map00072	Synthesis and degradation of ketone bodies
path:map00073	Cutin, suberine and wax biosynthesis
path:map00100	Steroid biosynthesis
path:map00120	Primary bile acid biosynthesis
path:map00121	Secondary bile acid biosynthesis
path:map00130	Ubiquinone and other terpenoid-quinone biosynthesis
path:map00140	Steroid hormone biosynthesis
path:map00190	Oxidative phosphorylation
path:map00195	Photosynthesis
path:map00196	Photosynthesis - antenna proteins
path:map00220	Arginine biosynthesis
path:map00230	Purine metabolism
path:map00232	Caffeine metabolism
path:map00240	Pyrim

In [5]:
orgs = REST.kegg_list("organism")
orgs = list(orgs)

In [6]:
keggConv = kegg_conv('tcr', "ncbi-proteinid").read()

In [7]:
print(keggConv)

ncbi-proteinid:XP_802622	tcr:397923.10
ncbi-proteinid:XP_802481	tcr:397937.10
ncbi-proteinid:XP_802482	tcr:397937.5
ncbi-proteinid:XP_802144	tcr:398235.10
ncbi-proteinid:XP_802145	tcr:398235.20
ncbi-proteinid:XP_802488	tcr:398265.9
ncbi-proteinid:XP_802702	tcr:398343.9
ncbi-proteinid:XP_802294	tcr:398345.10
ncbi-proteinid:XP_804171	tcr:398477.10
ncbi-proteinid:XP_802142	tcr:398751.10
ncbi-proteinid:XP_802855	tcr:399033.10
ncbi-proteinid:XP_802856	tcr:399033.19
ncbi-proteinid:XP_802153	tcr:399245.9
ncbi-proteinid:XP_804452	tcr:399373.20
ncbi-proteinid:XP_804453	tcr:399373.9
ncbi-proteinid:XP_805501	tcr:399389.10
ncbi-proteinid:XP_802345	tcr:399997.10
ncbi-proteinid:XP_802312	tcr:400739.10
ncbi-proteinid:XP_802713	tcr:401469.10
ncbi-proteinid:XP_802714	tcr:401469.20
ncbi-proteinid:XP_802241	tcr:401473.9
ncbi-proteinid:XP_802427	tcr:401569.10
ncbi-proteinid:XP_802286	tcr:401961.10
ncbi-proteinid:XP_802215	tcr:402647.9
ncbi-proteinid:XP_804071	tcr:402857.10
ncbi-proteinid:XP_804072	tcr:402

In [8]:
from Bio.KEGG import Gene
from Bio.KEGG import REST

geneName = 'YOL097C'
orgCode = 'sce'
gene_id = orgCode + ':' + geneName

print('GENE ID: ', gene_id, '\n')

GENE ID:  sce:YOL097C 



In [9]:
request = REST.kegg_get(gene_id)
records = Gene.parse(request)
for record in records:
    m = record.__dict__
    print('entry ', record.entry)
    print('name ', record.name)
    print('definition ', record.definition)
    print('orthology ', record.orthology)
    print('organism ', record.organism)
    print('position ', record.position)
    print('motif ', record.motif)
    print('dblinks ', record.dblinks)

entry  YOL097C
name  ['WRS1, HRE342']
definition  (RefSeq) tryptophan--tRNA ligase WRS1
orthology  [('K01867', 'tryptophanyl-tRNA synthetase [EC:6.1.1.2]')]
organism  ('sce', 'Saccharomyces cerevisiae (budding yeast)')
position  XV:complement(136527..137825)
motif  [('Pfam', ['tRNA-synt_1b'])]
dblinks  [('NCBI-GeneID', ['854056']), ('NCBI-ProteinID', ['NP_014544']), ('SGD', ['S000005457']), ('UniProt', ['Q12109'])]


In [10]:
print(m)

{'entry': 'YOL097C', 'name': ['WRS1, HRE342'], 'definition': '(RefSeq) tryptophan--tRNA ligase WRS1', 'orthology': [('K01867', 'tryptophanyl-tRNA synthetase [EC:6.1.1.2]')], 'organism': ('sce', 'Saccharomyces cerevisiae (budding yeast)'), 'position': 'XV:complement(136527..137825)', 'motif': [('Pfam', ['tRNA-synt_1b'])], 'dblinks': [('NCBI-GeneID', ['854056']), ('NCBI-ProteinID', ['NP_014544']), ('SGD', ['S000005457']), ('UniProt', ['Q12109'])]}


In [11]:
# for getting a sequence of the gene
request = REST.kegg_get(gene_id).read()
sequence = request.split('NTSEQ')[1].split('1299')[1].replace('\n', '').replace(' ', '').replace('///', '')
len(sequence)

1299

In [12]:
from Bio.KEGG import Enzyme
ec_id = 'ec:' + '6.1.1.2'
print('EC NUMBER: ', ec_id, '\n')

request = REST.kegg_get(ec_id)

records= Enzyme.parse(request)

for record in records:
    record_dict = record.__dict__
    print('ENTRY: ', record.entry)
    print('NAME: ', record.name)
    print('CLASSNAME: ', record.classname)
    print('SYSNAME: ', record.sysname)
    print('REACTION: ', record.reaction)
    print('PATHWAYS: ', record.pathway)
    print('GENES: ', record.genes)
    print('DBLINKS: ', record.dblinks)

EC NUMBER:  ec:6.1.1.2 

ENTRY:  6.1.1.2
NAME:  ['tryptophan---tRNA ligase', 'tryptophanyl-tRNA synthetase', 'L-tryptophan-tRNATrp ligase (AMP-forming)', 'tryptophanyl-transfer ribonucleate synthetase', 'tryptophanyl-transfer ribonucleic acid synthetase', 'tryptophanyl-transfer RNA synthetase', 'tryptophanyl ribonucleic synthetase', 'tryptophanyl-transfer ribonucleic synthetase', 'tryptophanyl-tRNA synthase', 'tryptophan translase', 'TrpRS']
CLASSNAME:  ['Ligases;', 'Forming carbon-oxygen bonds;', 'Ligases forming aminoacyl-tRNA and related compounds']
SYSNAME:  ['L-tryptophan:tRNATrp ligase (AMP-forming)']
REACTION:  ['ATP + L-tryptophan + tRNATrp = AMP + diphosphate + L-tryptophyl-tRNATrp [RN:R03664]']
PATHWAYS:  [('PATH', 'ec00970', 'Aminoacyl-tRNA biosynthesis')]
GENES:  [('HSA', ['10352', '7453']), ('PTR', ['453161', '457162']), ('PPS', ['100979631', '100981300']), ('GGO', ['101137502', '101144542']), ('PON', ['100171952', '100456187']), ('NLE', ['100604458', '100607533']), ('MCC'

In [13]:
record_dict.keys()

dict_keys(['entry', 'name', 'classname', 'sysname', 'reaction', 'substrate', 'product', 'inhibitor', 'cofactor', 'effector', 'comment', 'pathway', 'genes', 'disease', 'structures', 'dblinks'])

In [14]:
from Bio.KEGG import Compound

comp_id = 'cpd:' + 'C00002'
print('Compound Id: ', comp_id, '\n')

request = REST.kegg_get(comp_id)

records= Compound.parse(request)

for record in records:
    print('entry ', record.entry)
    print('name ', record.name)
    print('formula ', record.formula)
    print('mass ', record.mass)
    print('enzyme ', record.enzyme)
    print('structures ', record.structures)
    print('dblinks ', record.dblinks)

Compound Id:  cpd:C00002 

entry  C00002
name  ['ATP', "Adenosine 5'-triphosphate"]
formula  C10H16N5O13P3
mass  
enzyme  ['1.1.98.6', '1.2.1.30', '1.2.1.95', '1.2.1.101', '1.3.7.7', '1.3.7.8', '1.3.7.14', '1.3.7.15', '1.13.12.7', '1.17.4.2', '1.18.6.1', '1.18.6.2', '1.19.6.1', '1.-.-.-', '2.1.2.-', '2.3.3.8', '2.4.2.17', '2.4.2.52', '2.5.1.6', '2.5.1.17', '2.5.1.112', '2.7.1.1', '2.7.1.2', '2.7.1.3', '2.7.1.4', '2.7.1.5', '2.7.1.6', '2.7.1.7', '2.7.1.8', '2.7.1.10', '2.7.1.11', '2.7.1.12', '2.7.1.13', '2.7.1.14', '2.7.1.15', '2.7.1.16', '2.7.1.17', '2.7.1.18', '2.7.1.19', '2.7.1.20', '2.7.1.21', '2.7.1.22', '2.7.1.23', '2.7.1.24', '2.7.1.25', '2.7.1.26', '2.7.1.27', '2.7.1.28', '2.7.1.29', '2.7.1.30', '2.7.1.31', '2.7.1.32', '2.7.1.33', '2.7.1.34', '2.7.1.35', '2.7.1.36', '2.7.1.39', '2.7.1.40', '2.7.1.43', '2.7.1.44', '2.7.1.45', '2.7.1.46', '2.7.1.47', '2.7.1.48', '2.7.1.49', '2.7.1.50', '2.7.1.51', '2.7.1.52', '2.7.1.53', '2.7.1.54', '2.7.1.55', '2.7.1.56', '2.7.1.58', '2.7.1.59', 

In [15]:
request = REST.kegg_get('rn:R00193').read()
print(request)

# C --> COMPOUND

ENTRY       R00193                      Reaction
NAME        S-Adenosyl-L-homocysteine aminohydrolase
DEFINITION  S-Adenosyl-L-homocysteine + H2O <=> S-Inosyl-L-homocysteine + Ammonia
EQUATION    C00021 + C00001 <=> C03431 + C00014
RCLASS      RC01168  C00021_C03431
ENZYME      3.5.4.28
DBLINKS     RHEA: 20719
///



In [16]:
import pandas as pd
blastResultsdf = pd.read_excel("./data/dfAllBlastResults_DRR076693.3.xlsx")

In [17]:
def getGeneinfo(gene_id):
    request = REST.kegg_get(gene_id)
    records= Gene.parse(request)
    return records

def getEnzymeinfo(enzyme_id):
    request = REST.kegg_get(enzyme_id)
    records= Enzyme.parse(request)
    return records

In [18]:
blastResultsdf

Unnamed: 0.1,Unnamed: 0,QueryName,Description,E-value,NCBI ID,KEGG ID,Organism,Length
0,0,DRR076693.3 3 length=522,tryptophan--tRNA ligase WRS1 [Saccharomyces ce...,[5.36634e-20],NP_014544,sce:YOL097C,Saccharomyces cerevisiae S288C,432
1,1,DRR076693.3 3 length=522,hypothetical protein Kpol_495p25 [Vanderwaltoz...,[2.47237e-19],XP_001643885,vpo:Kpol_495p25,Vanderwaltozyma polyspora DSM 70294,432
2,2,DRR076693.3 3 length=522,WRS1-like protein [Saccharomyces eubayanus],[3.42091e-19],XP_018221696,,Saccharomyces eubayanus,433
3,3,DRR076693.3 3 length=522,HBL382Wp [Eremothecium sinecaudum],[1.09368e-18],XP_017985516,,Eremothecium sinecaudum,429
4,4,DRR076693.3 3 length=522,hypothetical protein NCAS_0C04960 [Naumovozyma...,[1.47747e-18],XP_003675850,ncs:NCAS_0C04960,Naumovozyma castellii CBS 4309,435
5,5,DRR076693.3 3 length=522,KLTH0C09768p [Lachancea thermotolerans CBS 6340],[3.02664e-18],XP_002552643,lth:KLTH0C09768g,Lachancea thermotolerans CBS 6340,426
6,6,DRR076693.3 3 length=522,uncharacterized protein KLLA0_B07733g [Kluyver...,[3.03265e-18],XP_451874,kla:KLLA0B07733g,Kluyveromyces lactis,432
7,7,DRR076693.3 3 length=522,hypothetical protein TPHA_0P01010 [Tetrapisisp...,[4.16475e-18],XP_003688693,tpf:TPHA_0P01010,Tetrapisispora phaffii CBS 4417,430
8,8,DRR076693.3 3 length=522,tryptophanyl-tRNA synthetase [Ascoidea rubesce...,[4.73629e-18],XP_020045051,,Ascoidea rubescens DSM 1968,428
9,9,DRR076693.3 3 length=522,uncharacterized protein LALA0_S08e02520g [Lach...,[4.96565e-18],XP_022629658,,Lachancea lanzarotensis,422


In [19]:
dfHits = pd.DataFrame(columns=['ENTRY', 'NAME', 'DEFINITION', 'MOTIF', 'ORTHOLOGY', 'DBLINKS', 'ORGANISM'])

for b in blastResultsdf['KEGG ID']:
    try:
        if type(b) != float:
            print('Gene Name: ', b)
            records = getGeneinfo(b)
            for record in records:
                entry = record.entry
                name = record.name
                defin = record.definition
                mot = record.motif
                orth = record.orthology
                link = record.dblinks
                org = record.organism
            dfHits = dfHits.append({'ENTRY':entry, 'NAME':name, 'DEFINITION':defin, 'MOTIF':mot, 'ORTHOLOGY':orth,
                                    'DBLINKS':link, 'ORGANISM':org}, ignore_index=True)
    except:
        dfHits = dfHits.append({'ENTRY':'', 'NAME':'', 'DEFINITION':'', 'MOTIF':'', 'ORTHOLOGY':'',
                                    'DBLINKS':'', 'ORGANISM':''}, ignore_index=True)

Gene Name:  sce:YOL097C
Gene Name:  vpo:Kpol_495p25
Gene Name:  ncs:NCAS_0C04960
Gene Name:  lth:KLTH0C09768g
Gene Name:  kla:KLLA0B07733g
Gene Name:  tpf:TPHA_0P01010
Gene Name:  tbl:TBLA_0B02320


In [20]:
for org in orgs:
    org_code = org.split()[1]
    if org_code == 'kla':
        print(org)

T01025	kla	Kluyveromyces lactis	Eukaryotes;Fungi;Ascomycetes;Saccharomycetes



In [21]:
dfHits

Unnamed: 0,ENTRY,NAME,DEFINITION,MOTIF,ORTHOLOGY,DBLINKS,ORGANISM
0,YOL097C,"[WRS1, HRE342]",(RefSeq) tryptophan--tRNA ligase WRS1,"[(Pfam, [tRNA-synt_1b])]","[(K01867, tryptophanyl-tRNA synthetase [EC:6.1...","[(NCBI-GeneID, [854056]), (NCBI-ProteinID, [NP...","(sce, Saccharomyces cerevisiae (budding yeast))"
1,Kpol_495p25,[],(RefSeq) hypothetical protein,"[(Pfam, [tRNA-synt_1b])]","[(K01867, tryptophanyl-tRNA synthetase [EC:6.1...","[(NCBI-GeneID, [5544124]), (NCBI-ProteinID, [X...","(vpo, Vanderwaltozyma polyspora)"
2,NCAS_0C04960,[NCAS0C04960],(RefSeq) hypothetical protein,"[(Pfam, [tRNA-synt_1b, Sec7])]","[(K01867, tryptophanyl-tRNA synthetase [EC:6.1...","[(NCBI-GeneID, [11528192]), (NCBI-ProteinID, [...","(ncs, Naumovozyma castellii)"
3,KLTH0C09768g,[],(RefSeq) KLTH0C09768p,"[(Pfam, [tRNA-synt_1b])]","[(K01867, tryptophanyl-tRNA synthetase [EC:6.1...","[(NCBI-GeneID, [8291523]), (NCBI-ProteinID, [X...","(lth, Lachancea thermotolerans)"
4,,,,,,,
5,TPHA_0P01010,[TPHA0P01010],(RefSeq) hypothetical protein,"[(Pfam, [tRNA-synt_1b])]","[(K01867, tryptophanyl-tRNA synthetase [EC:6.1...","[(NCBI-GeneID, [11530878]), (NCBI-ProteinID, [...","(tpf, Tetrapisispora phaffii)"
6,TBLA_0B02320,[TBLA0B02320],(RefSeq) hypothetical protein,"[(Pfam, [tRNA-synt_1b])]","[(K01867, tryptophanyl-tRNA synthetase [EC:6.1...","[(NCBI-GeneID, [14494251]), (NCBI-ProteinID, [...","(tbl, Tetrapisispora blattae)"


In [23]:
from Bio.KEGG import Enzyme

dfEnzs = pd.DataFrame(columns=['ENTRY_enz','NAME_enz','REACTION', 'SUBSTRATE', 'PRODUCT', 'PATHWAY'])

for ort in dfHits['ORTHOLOGY']:
    if ort == '':
        dfEnzs = dfEnzs.append({'ENTRY_enz':'','NAME_enz':'','REACTION':'', 'SUBSTRATE':'', 
                            'PRODUCT':'', 'PATHWAY':''}, ignore_index=True)
    else:
        print(ort)
        ortInfo = ort[0][1]
        print('Orthology information: ', ortInfo)
        start = str(ortInfo).find('[')
        end = str(ortInfo).find(']')
        ecN = str(ortInfo)[start+1:end].lower()
        print('EC number: ', ecN)
        records = getEnzymeinfo(ecN)
        for record in records:
            entry = record.entry
            name = record.name
            rxn = record.reaction
            sub = record.substrate
            prod = record.product
            path = record.pathway
        dfEnzs = dfEnzs.append({'ENTRY_enz':entry,'NAME_enz':name,'REACTION':rxn, 'SUBSTRATE':sub, 
                                'PRODUCT':prod, 'PATHWAY':path}, ignore_index=True)
    print('\n')


dfAllInfo = pd.concat([dfHits, dfEnzs], axis=1)

dfAllInfo.columns = ['ENTRY', 'NAME', 'DEFINITION', 'MOTIF', 'ORTHOLOGY', 'DBLINKS', 'ORGANISM',
                    'ENTRY_enz','NAME_enz','REACTION', 'SUBSTRATE', 'PRODUCT', 'PATHWAY']

[('K01867', 'tryptophanyl-tRNA synthetase [EC:6.1.1.2]')]
Orthology information:  tryptophanyl-tRNA synthetase [EC:6.1.1.2]
EC number:  ec:6.1.1.2


[('K01867', 'tryptophanyl-tRNA synthetase [EC:6.1.1.2]')]
Orthology information:  tryptophanyl-tRNA synthetase [EC:6.1.1.2]
EC number:  ec:6.1.1.2


[('K01867', 'tryptophanyl-tRNA synthetase [EC:6.1.1.2]')]
Orthology information:  tryptophanyl-tRNA synthetase [EC:6.1.1.2]
EC number:  ec:6.1.1.2


[('K01867', 'tryptophanyl-tRNA synthetase [EC:6.1.1.2]')]
Orthology information:  tryptophanyl-tRNA synthetase [EC:6.1.1.2]
EC number:  ec:6.1.1.2




[('K01867', 'tryptophanyl-tRNA synthetase [EC:6.1.1.2]')]
Orthology information:  tryptophanyl-tRNA synthetase [EC:6.1.1.2]
EC number:  ec:6.1.1.2


[('K01867', 'tryptophanyl-tRNA synthetase [EC:6.1.1.2]')]
Orthology information:  tryptophanyl-tRNA synthetase [EC:6.1.1.2]
EC number:  ec:6.1.1.2




In [24]:
dfEnzs.head()

Unnamed: 0,ENTRY_enz,NAME_enz,REACTION,SUBSTRATE,PRODUCT,PATHWAY
0,6.1.1.2,"[tryptophan---tRNA ligase, tryptophanyl-tRNA s...",[ATP + L-tryptophan + tRNATrp = AMP + diphosph...,"[ATP [CPD:C00002], L-tryptophan [CPD:C00078], ...","[AMP [CPD:C00020], diphosphate [CPD:C00013], L...","[(PATH, ec00970, Aminoacyl-tRNA biosynthesis)]"
1,6.1.1.2,"[tryptophan---tRNA ligase, tryptophanyl-tRNA s...",[ATP + L-tryptophan + tRNATrp = AMP + diphosph...,"[ATP [CPD:C00002], L-tryptophan [CPD:C00078], ...","[AMP [CPD:C00020], diphosphate [CPD:C00013], L...","[(PATH, ec00970, Aminoacyl-tRNA biosynthesis)]"
2,6.1.1.2,"[tryptophan---tRNA ligase, tryptophanyl-tRNA s...",[ATP + L-tryptophan + tRNATrp = AMP + diphosph...,"[ATP [CPD:C00002], L-tryptophan [CPD:C00078], ...","[AMP [CPD:C00020], diphosphate [CPD:C00013], L...","[(PATH, ec00970, Aminoacyl-tRNA biosynthesis)]"
3,6.1.1.2,"[tryptophan---tRNA ligase, tryptophanyl-tRNA s...",[ATP + L-tryptophan + tRNATrp = AMP + diphosph...,"[ATP [CPD:C00002], L-tryptophan [CPD:C00078], ...","[AMP [CPD:C00020], diphosphate [CPD:C00013], L...","[(PATH, ec00970, Aminoacyl-tRNA biosynthesis)]"
4,,,,,,


In [25]:
for p in dfAllInfo['PATHWAY']:
    if p != '':
        print(p)
        print('Pathways ID: ', p[0][1])
        print('Pathways name: ', p[0][2], '\n')

[('PATH', 'ec00970', 'Aminoacyl-tRNA biosynthesis')]
Pathways ID:  ec00970
Pathways name:  Aminoacyl-tRNA biosynthesis 

[('PATH', 'ec00970', 'Aminoacyl-tRNA biosynthesis')]
Pathways ID:  ec00970
Pathways name:  Aminoacyl-tRNA biosynthesis 

[('PATH', 'ec00970', 'Aminoacyl-tRNA biosynthesis')]
Pathways ID:  ec00970
Pathways name:  Aminoacyl-tRNA biosynthesis 

[('PATH', 'ec00970', 'Aminoacyl-tRNA biosynthesis')]
Pathways ID:  ec00970
Pathways name:  Aminoacyl-tRNA biosynthesis 

[('PATH', 'ec00970', 'Aminoacyl-tRNA biosynthesis')]
Pathways ID:  ec00970
Pathways name:  Aminoacyl-tRNA biosynthesis 

[('PATH', 'ec00970', 'Aminoacyl-tRNA biosynthesis')]
Pathways ID:  ec00970
Pathways name:  Aminoacyl-tRNA biosynthesis 



In [27]:
import os
print(os.getcwd())

/Users/villons/Desktop/biosciences/Various assignment


In [38]:
from Bio.KEGG.KGML import KGML_parser
from Bio.KEGG.REST import *

from Bio.KEGG.KGML import KGML_pathway

def downloadKGMLfile(pathId, outputDir):
    kgmlMap = (kegg_get(pathId, "kgml").read())
    with open(os.path.join(outputDir, pathId +'.kgml'), 'w') as f:
        for m in kgmlMap:
            f.write(m)
            
lsPathways = []

for index, row in dfAllInfo.iterrows():    
    vals = list(row.values)
    if (vals.count(vals[0]) == len(vals)) == False:    
        pathID = row['PATHWAY'][0][1]
        print('pathID ', pathID)

        path = pathID.split('ec')[1]
        print('path ', path)

        organism = row['ORGANISM'][0]
        print('organism ', organism)

        gene = row['ENTRY']
        print('gene ', gene)

        mapID = organism + path
        print('mapID ', mapID)

        geneID = organism + ':' + gene
        print('geneID ', geneID)

        if mapID not in lsPathways:
            lsPathways.append(mapID)
            downloadKGMLfile(mapID, './')
            pMap = KGML_parser.read(kegg_get(mapID, "kgml"))
            for g in pMap.genes: 
                lsName = g.name.split()
                if geneID in lsName:
                    print(lsName, '\t', geneID, '\n')
                    for gg in g.graphics:
                        gg.fgcolor = '#ff0000'
                        gg.bgcolor = '#ff0000'

        kgml_map = KGMLCanvas(pMap, show_maps=True, import_imagemap = True)
        kgml_map.draw("./DRR076693.3.pdf")     
    
print('List of pathways: ', lsPathways)


pathID  ec00970
path  00970
organism  sce
gene  YOL097C
mapID  sce00970
geneID  sce:YOL097C
['sce:YDR268W', 'sce:YOL097C'] 	 sce:YOL097C 

pathID  ec00970
path  00970
organism  vpo
gene  Kpol_495p25
mapID  vpo00970
geneID  vpo:Kpol_495p25
['vpo:Kpol_495p25', 'vpo:Kpol_526p39'] 	 vpo:Kpol_495p25 

pathID  ec00970
path  00970
organism  ncs
gene  NCAS_0C04960
mapID  ncs00970
geneID  ncs:NCAS_0C04960
['ncs:NCAS_0C04960', 'ncs:NCAS_0C05320'] 	 ncs:NCAS_0C04960 

pathID  ec00970
path  00970
organism  lth
gene  KLTH0C09768g
mapID  lth00970
geneID  lth:KLTH0C09768g
['lth:KLTH0C09768g', 'lth:KLTH0F16808g'] 	 lth:KLTH0C09768g 

pathID  ec00970
path  00970
organism  tpf
gene  TPHA_0P01010
mapID  tpf00970
geneID  tpf:TPHA_0P01010
['tpf:TPHA_0C00300', 'tpf:TPHA_0P01010'] 	 tpf:TPHA_0P01010 

pathID  ec00970
path  00970
organism  tbl
gene  TBLA_0B02320
mapID  tbl00970
geneID  tbl:TBLA_0B02320
['tbl:TBLA_0A09590', 'tbl:TBLA_0B02320'] 	 tbl:TBLA_0B02320 

List of pathways:  ['sce00970', 'vpo00970', 'n

In [36]:
from Bio.Graphics.KGML_vis import KGMLCanvas

You should consider upgrading via the '/usr/local/opt/python/bin/python3.7 -m pip install --upgrade pip' command.[0m
