In [1]:
from hypergraphs import *
from kegg import *

In [2]:
from urllib.request import urlopen
from urllib.parse import quote_plus
from bs4 import BeautifulSoup
import requests
from Levenshtein import ratio
from chemspipy import ChemSpider
import logging
import pathlib
from Bio.KEGG.KGML.KGML_parser import read

In [3]:
superpathwayDict = {
    "Carbohydrate metabolism":
[
	("00010", "Glycolysis / Gluconeogenesis"),
	("00020", "Citrate cycle (TCA cycle)"),
	("00030", "Pentose phosphate pathway"),
	("00040", "Pentose and glucuronate interconversions"),
	("00051", "Fructose and mannose metabolism"),
	("00052", "Galactose metabolism"),
	("00053", "Ascorbate and aldarate metabolism"),
	("00500", "Starch and sucrose metabolism"),
	("00520", "Amino sugar and nucleotide sugar metabolism"),
	("00620", "Pyruvate metabolism"),
	("00630", "Glyoxylate and dicarboxylate metaboliscm"),
	("00640", "Propanoate metabolism"),
	("00650", "Butanoate metabolism"),
	("00562", "Inositol phosphate metabolism")
],
"Energy metabolism":
[
	("00190", "Oxidative phosphorylation"),
	("00910", "Nitrogen metabolism"),
	("00920", "Sulfur metabolism")
],
"Lipid metabolism":
[	("00061", "Fatty acid biosynthesis"),
	("00062", "Fatty acid elongation"),	
	("00071", "Fatty acid degradation"),
	("00100", "Steroid biosynthesis"),	
	("00120", "Primary bile acid biosynthesis"),
	("00140", "Steroid hormone biosynthesis"),
	("00561", "Glycerolipid metabolism"),
	("00564", "Glycerophospholipid metabolism"),
	("00565", "Ether lipid metabolism"),
	("00600", "Sphingolipid metabolism"),
	("00590", "Arachidonic acid metabolism"),
	("00591", "Linoleic acid metabolism"),
	("00592", "alpha-Linolenic acid metabolism"),
	("01040", "Biosynthesis of unsaturated fatty acids")
],
"Nucleotide metabolism":
[
	("00230", "Purine metabolism"),
	("00240", "Pyrimidine metabolism")
],
"Amino acid metabolism":
[
	("00250", "Alanine, aspartate and glutamate metabolism"),
	("00260", "Glycine, serine and threonine metabolism"),
	("00270", "Cysteine and methionine metabolism"),
	("00280", "Valine, leucine and isoleucine degradation"),
	("00290", "Valine, leucine and isoleucine biosynthesis"),
	("00310", "Lysine degradation"),
	("00220", "Arginine biosynthesis"),
	("00330", "Arginine and proline metabolism"),
	("00340", "Histidine metabolism"),
	("00350", "Tyrosine metabolism"),
	("00360", "Phenylalanine metabolism"),
	("00380", "Tryptophan metabolism"),
	("00400", "Phenylalanine, tyrosine and tryptophan biosynthesis")
],
"Metabolism of other amino acids":
[
	("00410", "beta-Alanine metabolism"),
	("00430", "Taurine and hypotaurine metabolism"),
	("00440", "Phosphonate and phosphinate metabolism"),
	("00450", "Selenocompound metabolism"),
	("00470", "D-Amino acid metabolism"),
	("00480", "Glutathione metabolism")
],
"Glycan biosynthesis and metabolism":
[
	("00510", "N-Glycan biosynthesis"),
	("00513", "Various types of N-glycan biosynthesis"),
	("00512", "Mucin type O-glycan biosynthesis"),
	("00515", "Mannose type O-glycan biosynthesis"),
	("00514", "Other types of O-glycan biosynthesis"),
	("00532", "Glycosaminoglycan biosynthesis - chondroitin sulfate / dermatan sulfate"),
	("00534", "Glycosaminoglycan biosynthesis - heparan sulfate / heparin"),
	("00533", "Glycosaminoglycan biosynthesis - keratan sulfate"),
	("00531", "Glycosaminoglycan degradation"),
	("00563", "Glycosylphosphatidylinositol (GPI)-anchor biosynthesis"),
	("00601", "Glycosphingolipid biosynthesis - lacto and neolacto series"),
	("00603", "Glycosphingolipid biosynthesis - globo and isoglobo series"),
	("00604", "Glycosphingolipid biosynthesis - ganglio series"),
	("00511", "Other glycan degradation")
],
"Metabolism of cofactors and vitamins":
[
	("00730", "Thiamine metabolism"),
	("00740", "Riboflavin metabolism"),
	("00750", "Vitamin B6 metabolism"),
	("00760", "Nicotinate and nicotinamide metabolism"),
	("00770", "Pantothenate and CoA biosynthesis"),
	("00780", "Biotin metabolism"),
	("00785", "Lipoic acid metabolism"),
	("00790", "Folate biosynthesis"),
	("00670", "One carbon pool by folate"),
	("00830", "Retinol metabolism"),
	("00860", "Porphyrin metabolism"),
	("00130", "Ubiquinone and other terpenoid-quinone biosynthesis")
],
"Metabolism of terpenoids and polyketides":
[
	("00900", "Terpenoid backbone biosynthesis")
],
"Biosynthesis of other secondary metabolites":
[
	("00232", "Caffeine metabolism"),
	("00524", "Neomycin, kanamycin and gentamicin biosynthesis")
],
"Xenobiotics biodegradation and metabolism":
[
	("00980", "Metabolism of xenobiotics by cytochrome P450"),
	("00982", "Drug metabolism - cytochrome P450"),
	("00983", "Drug metabolism - other enzymes")
]
}

In [4]:
organismDict = {"hsa": "Homo sapiens (human)"}

In [5]:
outputDirectory = "/Users/boldi/Desktop/pw/"
dataDirectory = "../../LaTeX/Data/KEGG-Pathways/"

In [6]:
def lratio(needle, haystack):
    """
        Given a string needle and a list of strings haystack, returns the maximum 
        Levenshtein ratio between needle and the elements of haystack (case insensitive match). This value ranges
        from 1 (the needle is present in the haystack) to 0.
    """
    return max([ratio(needle.lower(), hay.lower()) for hay in haystack])

In [7]:
def search_compound_KEGG(compound_name, compound_formula=None):
    """
        Search for compound on KEGG. If compound_formula is provided, the formula is used instead.
        
        Returns ID, list of names as from KEGG, best Levenshtein ratio (w.r.t. the names).
        If nothing is found, IDs and list are both None, and ratio is 0.0.
    """
    if compound_formula is None:
        url1 = "https://www.kegg.jp/kegg-bin/search_ligand?query=" + quote_plus(compound_name) + "&column=entry%2Bname%2Bformula&DATABASE=compound&STEP=1000"
    else:
        url1 = "https://www.kegg.jp/kegg-bin/search_ligand?query=" + quote_plus(compound_formula) + "&column=entry%2Bname%2Bformula&DATABASE=compound&STEP=1000"
    response1 = requests.get(url1, allow_redirects=False)    
    url2 = "https://www.kegg.jp/kegg-bin/" + response1.headers["Location"]
    response2 = requests.get(url2)
    soup = BeautifulSoup(response2.text, "html.parser")
    tds = [tds for tds in soup.find_all("td", {"class": "data1"})]
    cd = []
    d = {}
    for i in range(len(tds)//5):
        compound_id = tds[5*i+1].getText()
        names = tds[5*i+3].getText(strip=True, separator="|").split("|")
        cd += [compound_id]
        d[compound_id]=names
    if cd:
        i = max(range(len(cd)), key = lambda i: lratio(compound_name, d[cd[i]])) # Best match
        return cd[i], d[cd[i]], lratio(compound_name, d[cd[i]])
    else:
        return None, None, 0.0

In [8]:
def search_compound_chemspider(compound_name, chemspider_api_key="ZOoKYwWPa9AIpeSu3f90LWZ9NHn5TY2A"):
    """
        Search for compound on ChemSpider, using a valid API key. 
        
        Returns common name, molecular formula, best Levenshtein ratio (w.r.t. the names).
        If nothing is found, the first two entries are both None, and ratio is 0.0.
    """

    if chemspider_api_key is None:
        return None, None, 0.0
    cs = ChemSpider(chemspider_api_key)
    results = cs.search(compound_name)
    results.wait()
    if results.status != "Complete":
        logging.info(f"Failed search from ChemSpider, status: {results.status}")
        logging.info(f"Message: {results.message}")
        return None, None, 0.0
    if len(results) == 0:
        return None, None, 0.0
    i = max(range(len(results)), key = lambda i: lratio(compound_name, results[i].common_name))
    return results[i].common_name, results[i].molecular_formula, lratio(compound_name, results[i].common_name)

In [9]:
def search_compound(compound_name):
    """
        Search for compound on KEGG and (as a last resort, if no or insufficient match is found) on ChemSpider and
        then again on KEGG through molecular formula. 
        
        Returns ID, list of names as from KEGG, best Levenshtein ratio (w.r.t. the names).
        If nothing is found, IDs and list are both None, and ratio is 0.0.
    """    
    logging.info(f"Looking for {compound_name} on KEGG")
    cid, cnames, keggratio = search_compound_KEGG(compound_name)
    logging.info(f"Best match {cnames} with ratio {keggratio}")
    if keggratio < 0.5: #Bad or no match
        logging.info(f"Insufficient! Trying ChemSpider")
        spname, spformula, spratio = search_compound_chemspider(compound_name)
        logging.info(f"ChemSpider best match {spname} with ratio {spratio} and formula {spformula}")
        if spformula is None:
            return cid, cnames, keggratio
        compound_formula = spformula.translate(str.maketrans("", "", "_{}"))   # Remove special characters
        logging.info(f"Looking for {compound_formula} on KEGG")
        sid, snames, skeggratio = search_compound_KEGG(compound_name, compound_formula)
        logging.info(f"Best match {snames} with ratio {skeggratio}")
        if skeggratio > keggratio:
            return sid, snames, skeggratio
    return cid, cnames, keggratio

In [10]:
def convert_list_to_csv(input_filename, output_filename):
    basename = input_filename
    with open(input_filename) as f:
        compounds = f.read().splitlines()
    with open(output_filename, "w") as f:
        for compound in compounds:
            if len(compound.strip()) == 0:
                continue
            cid, cnames, lr = search_compound(compound)
            if cid is not None:
                jnames = " | ".join(cnames)
                f.write(f"{cid},\"{jnames}\",\"{compound}\",{lr}\n")
            else:
                f.write(f"?,?,\"{compound}\",{lr}\n")
            f.flush()

In [11]:
def read_compound_csv(filename, min_levenshtein=0.9):
    res = []
    with open(filename, "r") as f:
        csvreader = csv.reader(f)
        for row in csvreader:
            if float(row[3]) < min_levenshtein:
                continue
            res += [(row[0], row[2])]
    return res

In [12]:
valA = "#FF0000"
valB = "#0000FF"

def writeKEGGpdfcolors(outputDirectory, organism, superpathway, superpathwayDict, compound2color):
    """
    """
    index = []
    safeSuperpathway = makesafe(superpathway)
    for pw in superpathwayDict[superpathway]:
        countColor = {}
        for v in set(compound2color.values()):
            countColor[v] = set([])
        try:
            pathway = KGML_parser.read(kegg_get(organism + pw[0], "kgml"))
        except:
            print("Pathway", pw, "could not be downloaded: ignoring")
            continue
        canvas = KGMLCanvas(pathway)
        for k in pathway.entries:
            t = pathway.entries[k].type
            pathway.entries[k].graphics[0].bgcolor = "#FFFFFF"
            if t == "compound":
                compound = pathway.entries[k].name[4:]
                if compound in compound2color:
                    pathway.entries[k].graphics[0].bgcolor = compound2color[compound]
                    pathway.entries[k].graphics[0].fgcolor = compound2color[compound]
                    countColor[compound2color[compound]] |= set([compound])
        canvas.import_imagemap = True
        pdfName = organism + pw[0] + ".pdf"
        pathlib.Path(os.path.join(outputDirectory, organism, safeSuperpathway)).mkdir(parents=True, exist_ok=True)
        canvas.draw(os.path.join(outputDirectory, organism, safeSuperpathway, pdfName))
        index += [(pdfName, pw[1], len(countColor[valA]), len(countColor[valB]))]
    return index

In [13]:
def writeIndex(directory, index, title):
    with open(os.path.join(directory, "index.html"), "w") as file:
        file.write("<!DOCTYPE html>\n<html>\n")
        file.write(f"\t<h1>{title}</h1>\n")
        file.write("\t<table border=1>\n")
        file.write("\t\t<thead><tr><th>Pathway<th>ListA<th>ListB</thead>\n")
        file.write("\t\t<tbody>\n")
        for link, anchor, cA, cB in index:
            file.write("\t\t<tr><td><a href=\"{}\">{}</a><td>{}<td>{}\n".format(link, anchor, cA, cB))
        file.write("\t</tbody>\n")
        file.write("\t</table>\n")
        file.write("</html>\n")


In [14]:
tA = read_compound_csv("/Users/boldi/Desktop/listA-complete.csv")
tB = read_compound_csv("/Users/boldi/Desktop/listB-complete.csv")

compound2color = {}
for x in tA:
    compound2color[x[0]] = valA
for x in tB:
    compound2color[x[0]] = valB

In [15]:
### PRODUCE all KEGG pdfs and index.html

for organism in []: #organismDict.keys():
    superIndex = []
    for superpathway in superpathwayDict.keys():
        safeSuperpathway = makesafe(superpathway)
        index = writeKEGGpdfcolors(outputDirectory, organism, superpathway, superpathwayDict, compound2color)
        sumA = sum([x[2] for x in index])
        sumB = sum([x[3] for x in index])
        writeIndex(os.path.join(outputDirectory, organism, safeSuperpathway), index, 
               f"{organismDict[organism]}, {superpathway} (A: {sumA}, B: {sumB})")
        superIndex += [(safeSuperpathway, superpathway, sumA, sumB)]
    sumA = sum([x[2] for x in superIndex])
    sumB = sum([x[3] for x in superIndex])
    writeIndex(os.path.join(outputDirectory, organism, "index.html"), superIndex, 
               f"{organismDict[organism]} (A: {sumA}, B: {sumB})")

In [16]:
### PRODUCE a dictionary compounds with the set of compounds appearing in the whole organism, in each
### superpathway and in each pathway
compounds = {}
for organism in organismDict.keys():
    compounds[organism] = set([])
    for superpathway in superpathwayDict.keys():
        compounds[superpathway] = set([])
        for pw in superpathwayDict[superpathway]:
            pathway = read(open(os.path.join(dataDirectory, organism, f"{organism}{pw[0]}.xml"), "r"))
            compounds[pw[0]] = set([x.name[4:] for x in pathway.compounds])
            compounds[superpathway] |= compounds[pw[0]]
            compounds[organism] |= compounds[pw[0]]

In [17]:

def printLine(key, compoundDict, listsOfInterest):
    """
        Interrogate compoundDict with key, and obtain a set.
        Print the key, the size of the set, and for each of the (key,value) pairs in the
        dictionary, the size of the intersection between the set and the value
    """
    whole = compoundDict[key]
    nwhole = len(whole)
    intersections = ",".join([str(len(whole & value)) for key, value in listsOfInterest.items()])
    print(f"{key},{nwhole},{intersections}")
            
tA = read_compound_csv("/Users/boldi/Desktop/listA-complete.csv")
tB = read_compound_csv("/Users/boldi/Desktop/listB-complete.csv")
setA = set([x[0] for x in tA])
setB = set([x[0] for x in tB])


        
for organism in organismDict.keys():
    printLine(organism, compounds, {'A': setA, 'B': setB})
    for superpathway in superpathwayDict.keys():
        printLine(superpathway, compounds, {'A': setA, 'B': setB})
        for pw in superpathwayDict[superpathway]:
            printLine(pw[0], compounds, {'A': setA, 'B': setB})

hsa,3023,58,15
Carbohydrate metabolism,503,21,6
00010,31,3,0
00020,20,0,0
00030,36,5,0
00040,58,4,2
00051,54,2,0
00052,46,1,0
00053,57,1,1
00500,37,0,2
00520,118,4,1
00620,32,1,0
00630,64,3,1
00640,41,3,1
00650,47,4,1
00562,47,2,0
Energy metabolism,67,5,3
00190,16,1,3
00910,19,2,0
00920,33,2,0
Lipid metabolism,659,8,2
00061,58,1,0
00062,40,0,0
00071,50,1,0
00100,57,1,0
00120,48,1,0
00140,99,0,0
00561,38,2,2
00564,56,3,2
00565,25,0,0
00600,27,0,0
00590,79,0,0
00591,28,0,0
00592,44,0,0
01040,74,0,0
Nucleotide metabolism,159,9,2
00230,101,5,1
00240,64,4,1
Amino acid metabolism,550,17,3
00250,28,0,1
00260,48,3,0
00270,66,4,0
00280,41,4,1
00290,23,2,0
00310,51,2,0
00220,23,1,0
00330,69,3,2
00340,47,1,0
00350,78,1,0
00360,49,1,0
00380,83,0,0
00400,34,1,0
Metabolism of other amino acids,220,8,2
00410,32,1,2
00430,24,0,0
00440,53,1,0
00450,27,2,0
00470,69,3,0
00480,38,4,1
Glycan biosynthesis and metabolism,270,2,0
00510,38,0,0
00513,31,0,0
00512,17,0,0
00515,27,0,0
00514,0,0,0
00532,12,1,0
005

In [18]:
H, sr2r, r2ss, rid2rname, mid2mname = read_hg_from_SBML("../../LaTeX/Data/KEGG-Pathways/hsa/hsa00040-sbml.xml")
G, dd = hg2g(H)
cc = nx.closeness_centrality(G)

ccA = []
for x in setA:
    for node in H.nodes():
        if str(node)==x:
            ccA += [cc[x]]

ccB = []
for x in setB:
    for node in H.nodes():
        if str(node)==x:
            ccB += [cc[x]]


In [19]:
ccA, ccB

([0.0878048780487805], [0.0821566110397946])

In [20]:
compounds['00040']

{'C00022',
 'C00026',
 'C00029',
 'C00103',
 'C00111',
 'C00116',
 'C00167',
 'C00181',
 'C00191',
 'C00199',
 'C00204',
 'C00216',
 'C00231',
 'C00259',
 'C00266',
 'C00309',
 'C00310',
 'C00312',
 'C00333',
 'C00379',
 'C00433',
 'C00470',
 'C00474',
 'C00476',
 'C00502',
 'C00508',
 'C00514',
 'C00532',
 'C00558',
 'C00618',
 'C00714',
 'C00789',
 'C00800',
 'C00817',
 'C00905',
 'C01068',
 'C01101',
 'C01508',
 'C01904',
 'C02266',
 'C02273',
 'C02426',
 'C02753',
 'C03033',
 'C03291',
 'C03826',
 'C04053',
 'C04349',
 'C04575',
 'C05385',
 'C05411',
 'C05412',
 'C06118',
 'C06441',
 'C14899',
 'C15930',
 'C20680',
 'C22337'}

In [21]:
for x in H.nodes():
    print(x, str(x) in compounds['00040'])

C03033 True
C00191 True
C00309 True
C00199 True
C00231 True
C01068 True
C00789 True
C00310 True
C00379 True
C00312 True
C00181 True
C00800 True
C00618 True
C00103 True
C00029 True
C00167 True
C02266 True


In [22]:
document = libsbml.readSBMLFromFile("../../LaTeX/Data/KEGG-Pathways/hsa/hsa00040-sbml.xml")
model = document.getModel()


In [23]:
species = model.getListOfSpecies()
for specie in species:
    if specie.getId() == "C00022":
        print(specie.getId(), specie.getAnnotationString())

C00022 <annotation>
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/">
    <rdf:Description rdf:about="#meta_C00022">
      <bqbiol:is>
        <rdf:Bag>
          <rdf:li rdf:resource="http://identifiers.org/kegg.compound/C00022"/>
        </rdf:Bag>
      </bqbiol:is>
      <bqbiol:isDescribedBy>
        <rdf:Bag>
          <rdf:li rdf:resource="http://identifiers.org/obo.eco/ECO:0000313"/>
        </rdf:Bag>
      </bqbiol:isDescribedBy>
    </rdf:Description>
  </rdf:RDF>
</annotation>


In [24]:
species[0]

<Species K01184 "K01184">

In [77]:
import matplotlib
def gnagna(outputDirectory, organism, superpathway, superpathwayDict, compound2color):
    """
    """
    index = []
    safeSuperpathway = makesafe(superpathway)
    for pw in superpathwayDict[superpathway]:
        countColor = {}
        for v in set(compound2color.values()):
            countColor[v] = set([])
        try:
            pathway = KGML_parser.read(kegg_get("hsa00040", "kgml"))
        except:
            print("Pathway", pw, "could not be downloaded: ignoring")
            continue
        canvas = KGMLCanvas(pathway)
        for k in pathway.entries:
            t = pathway.entries[k].type
            print(k, pathway.entries[k].name, t)
            pathway.entries[k].graphics[0].bgcolor = compound2color[t]
            pathway.entries[k].graphics[0].fgcolor = compound2color[t]
        for k in range(len(pathway.reaction_entries)):
            pathway.reaction_entries[k].graphics[0].bgcolor = compound2color["reaction"]
            pathway.reaction_entries[k].graphics[0].fgcolor = compound2color["reaction"]
        canvas.import_imagemap = True
        pdfName = organism + pw[0] + ".pdf"
        pathlib.Path(os.path.join(outputDirectory, organism, safeSuperpathway)).mkdir(parents=True, exist_ok=True)
        canvas.draw(os.path.join(outputDirectory, organism, safeSuperpathway, pdfName))
    return index


gnagna("/tmp", "hsa", "Carbohydrate metabolism", {"Carbohydrate metabolism": ("00040", "Pentose and glucuronate interconversions")},
{
    "ortholog": matplotlib.colors.cnames["blue"], 
    "compound": matplotlib.colors.cnames["red"],
    "map": matplotlib.colors.cnames["yellow"],
    "gene": matplotlib.colors.cnames["brown"],
    "reaction": matplotlib.colors.cnames["green"]
}
)


66 ko:K01184 ortholog
67 cpd:C14899 compound
68 ko:K03078 ortholog
69 ko:K00880 ortholog
70 cpd:C03033 compound
71 hsa:2990 hsa:9365 gene
72 ko:K01804 ortholog
74 ko:K00853 ko:K24707 ortholog
75 ko:K00039 ortholog
76 hsa:55277 gene
77 ko:K00853 ko:K24707 ortholog
78 hsa:6120 hsa:729020 gene
79 ko:K03077 ortholog
80 ko:K21680 ortholog
81 hsa:729920 gene
82 ko:K03079 ortholog
83 ko:K00007 ortholog
84 hsa:9942 gene
85 ko:K09988 ortholog
86 ko:K22397 ortholog
87 ko:K14275 ko:K22186 ko:K22396 ortholog
88 ko:K00880 ortholog
89 ko:K01820 ortholog
90 ko:K05351 ortholog
91 ko:K01805 ortholog
92 ko:K14274 ortholog
93 hsa:231 hsa:57016 gene
94 ko:K25880 ortholog
95 hsa:51181 gene
96 hsa:231 hsa:57016 gene
97 ko:K22185 ortholog
99 hsa:51084 gene
100 hsa:7360 gene
101 hsa:10720 hsa:10941 hsa:54490 hsa:54575 hsa:54576 hsa:54577 hsa:54578 hsa:54579 hsa:54600 hsa:54657 hsa:54658 hsa:54659 hsa:574537 hsa:7363 hsa:7364 hsa:7365 hsa:7366 hsa:7367 hsa:79799 gene
102 hsa:7358 gene
104 ko:K16190 ortholog
10

[]

In [64]:
pathway = KGML_parser.read(kegg_get("hsa00040", "kgml"))
print("Entries: ",len(pathway.entries))
print("Orthologs: ", len(pathway.orthologs), "Compounds: ", len(pathway.compounds), 
      "Maps: ", len(pathway.maps), "Genes: ", len(pathway.genes))
print()
print(pathway.entries[86])

Entries:  149
Orthologs:  62 Compounds:  60 Maps:  12 Genes:  15

Entry node ID: 86
Names: ko:K22397
Type: ortholog
Components: set()
Reactions: rn:R01782
Graphics elements: 1 [<Bio.KEGG.KGML.KGML_pathway.Graphics object at 0x13b9ebaf0>]



In [32]:
print(pathway.orthologs[12])

Entry node ID: 86
Names: ko:K22397
Type: ortholog
Components: set()
Reactions: rn:R01782
Graphics elements: 1 [<Bio.KEGG.KGML.KGML_pathway.Graphics object at 0x13b31b280>]



In [60]:
for i in range(len(pathway.reaction_entries)):
    print(i, pathway.reaction_entries[i].reaction)
    
print()
lpr=list(pathway.reactions)
for i in range(len(lpr)):
    print(i, lpr[i].name)


0 rn:R01478
1 rn:R01526
2 rn:R01529
3 rn:R02921
4 rn:R01639
5 rn:R01758 rn:R01759
6 rn:R01904
7 rn:R01431
8 rn:R02640
9 rn:R00289
10 rn:R01383
11 rn:R00286
12 rn:R01430
13 rn:R01896
14 rn:R01481

0 rn:R01478
1 rn:R01526
2 rn:R01529
3 rn:R02921
4 rn:R01639
5 rn:R01758 rn:R01759
6 rn:R01904
7 rn:R01431
8 rn:R02640
9 rn:R00289
10 rn:R01383
11 rn:R00286
12 rn:R01430
13 rn:R01896
14 rn:R01481


In [63]:
print(pathway.entries[0].name)

KeyError: 0

In [71]:
for k,v in pathway.entries.items():
    print(k,v.name)

66 ko:K01184
67 cpd:C14899
68 ko:K03078
69 ko:K00880
70 cpd:C03033
71 hsa:2990 hsa:9365
72 ko:K01804
74 ko:K00853 ko:K24707
75 ko:K00039
76 hsa:55277
77 ko:K00853 ko:K24707
78 hsa:6120 hsa:729020
79 ko:K03077
80 ko:K21680
81 hsa:729920
82 ko:K03079
83 ko:K00007
84 hsa:9942
85 ko:K09988
86 ko:K22397
87 ko:K14275 ko:K22186 ko:K22396
88 ko:K00880
89 ko:K01820
90 ko:K05351
91 ko:K01805
92 ko:K14274
93 hsa:231 hsa:57016
94 ko:K25880
95 hsa:51181
96 hsa:231 hsa:57016
97 ko:K22185
99 hsa:51084
100 hsa:7360
101 hsa:10720 hsa:10941 hsa:54490 hsa:54575 hsa:54576 hsa:54577 hsa:54578 hsa:54579 hsa:54600 hsa:54657 hsa:54658 hsa:54659 hsa:574537 hsa:7363 hsa:7364 hsa:7365 hsa:7366 hsa:7367 hsa:79799
102 hsa:7358
104 ko:K16190
105 ko:K12447
106 ko:K01812
107 ko:K00040
108 ko:K01686 ko:K08323
109 ko:K01685 ko:K16849 ko:K16850
110 ko:K01812
111 ko:K00041
112 path:hsa00010
113 path:hsa00562
114 path:hsa00053
115 path:hsa00040
116 path:hsa00030
117 path:hsa00052
118 path:hsa00520
119 path:hsa00500
120 pa

In [72]:
pathway.reaction_entries[0].graphics[]

<Bio.KEGG.KGML.KGML_pathway.Entry at 0x1395f05e0>