# Mapping Path2Models whole genome metabolism model to KEGG pathway

## Software Requirements

* pandas
* python-libsbml

In [1]:
import libsbml
import pandas as pd
import re

ImportError: No module named libsbml

## Retrieving the whole-genome metabolic model from path2models

Whole Genome Metabolism of Escherichia coli str. K-12 substr. MG1655 http://www.ebi.ac.uk/biomodels-main/download?mid=BMID000000140222
This file is big, so I recommend you download this once.

In [2]:
!curl -o BMID000000140222.xml http://www.ebi.ac.uk/biomodels-main/download?mid=BMID000000140222

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 9112k    0 9112k    0     0  1431k      0 --:--:--  0:00:06 --:--:-- 1826k


### reading path2models SBML

In [9]:
document = libsbml.readSBML('BMID000000140222.xml')
model = document.getModel()

#### construct regex patterns

In [10]:
bigg = re.compile(r'BIGG:.*</p>')
brenda = re.compile(r'BRENDA:.*</p>')
gene_association = re.compile(r'GENE_ASSOCIATION:.*</p>')
kegg = re.compile(r'KEGG:.*</p>')
metacyc = re.compile(r'METACYC:.*</p>')
reactome = re.compile(r'REACTOME:.*</p>')
rhea = re.compile(r'RHEA:.*</p>')
seed = re.compile(r'SEED:.*</p>')
upa = re.compile(r'UPA:.*</p>')

#### create pandas dataframe for reactions

In [13]:
id_ary = []
bigg_ary = []
brenda_ary = []
gene_association_ary = []
kegg_ary = []
metacyc_ary =[]

for i in range(0,model.getNumReactions()):
    r = model.getReaction(i)

    notes = r.getNotesString()

    bigg_re = bigg.search(notes)
    brenda_re = brenda.search(notes)
    gene_association_re = gene_association.search(notes)
    kegg_re = kegg.search(notes)
    metacyc_re = metacyc.search(notes)
    
    id_ary.append(r.id)
    bigg_ary.append(None)
    brenda_ary.append(None)
    gene_association_ary.append(None)
    kegg_ary.append(None)
    metacyc_ary.append(None)

    if bigg_re:
        bigg_ary.pop()
        big = bigg_re.group()[6:-4]
        bigg_ary.append(big)
    if brenda_re:
        brenda_ary.pop()
        br = brenda_re.group()[8:-4]
        brenda_ary.append(br)
    if gene_association_re:
        gene_association_ary.pop()
        gene = gene_association_re.group()[18:-4]
        gene_association_ary.append(gene)
    if kegg_re:
        kegg_ary.pop()
        keg = kegg_re.group()[6:-4]
        kegg_ary.append(keg)
    if metacyc_re:
        metacyc_ary.pop()
        meta = metacyc_re.group()[9:-4]
        metacyc_ary.append(meta)


In [14]:
id_se = pd.Series(id_ary, name='ID')
bigg_se = pd.Series(bigg_ary, name='BIGG')
brenda_se = pd.Series(brenda_ary, name='BRENDA')
gene_association_se = pd.Series(gene_association_ary, name='GENE_ASSOCIATION')
kegg_se = pd.Series(kegg_ary, name='KEGG')
metacyc_se = pd.Series(metacyc_ary, name='METACYC')

df = pd.concat([id_se, bigg_se, brenda_se, gene_association_se, kegg_se, metacyc_se], axis=1)

In [2]:
df.head()

NameError: name 'df' is not defined

## Integrating path2models dataframe with KEGG pathway

### importing E.coli glycolysis(eco00010) pathway

In [18]:
import requests
import json

# Basic Setup
PORT_NUMBER = 1234
BASE_URL = "http://localhost:" + str(PORT_NUMBER) + "/v1/"

# Header for posting data to the server as JSON
HEADERS = {'Content-Type': 'application/json'}

requests.get(BASE_URL)

<Response [200]>

In [19]:
pathway_location = "http://rest.kegg.jp/get/eco00010/kgml"
res1 = requests.post(BASE_URL + "networks?source=url", data=json.dumps([pathway_location]), headers=HEADERS)
result = json.loads(res1.content)
pathway_suid = result[0]["networkSUID"][0]
print("Pathway SUID = " + str(pathway_suid))

Pathway SUID = 114311
