# Enzymatic Constraints Enhancement of AGORA models

### Context



### Goals:

- 

In [22]:
from tqdm import tqdm
from reframed.io.sbml import load_cbmodel
from cobra.io import read_sbml_model, write_sbml_model
from mewpy.cobra.util import add_enzyme_constraints
from mewpy.simulation import get_simulator
from mewpy.simulation.environment import Environment as Environment
from mewpy.util.request import retreive_gene,retreive_protein,get_smiles
import pandas as pd
import numpy as np
from urllib.request import urlopen
from functools import reduce
import json

In [2]:
model = read_sbml_model("../models/non-ec/Escherichia_coli_ED1a.xml")

Set parameter Username
Academic license - for non-commercial use only - expires 2024-03-01


In [3]:
sim = get_simulator(model)
sim.set_objective("biomass")

In [4]:
sim.simulate()

objective: 186.53129283071723
Status: OPTIMAL
Constraints: OrderedDict()
Method:SimulationMethod.FBA

In [5]:
sim.reactions

['15DAPtex',
 '15DAPtpp',
 '1HIBUPGLU_St2pp',
 '1HIBUPGLU_Stex',
 '1HIBUP_S_GLCAASE',
 '1HIBUP_Stepp',
 '1HIBUP_Stex',
 '1HMDGLUC_GLCAASE',
 '1HMDGLUCt2pp',
 '1HMDGLUCtex',
 '1OHMDZtepp',
 '1OHMDZtex',
 '1P4H2CBXLAH',
 '23DHMPO',
 '23PDE2',
 '23PDE4',
 '23PDE7',
 '23PDE9',
 '2AHBUTI',
 '2DDGLCNtex',
 '2DDPENGAL',
 '2DDPENTHL',
 '2DH3DGLCA',
 '2DHPL',
 '2DMMQ8abcpp',
 '2DMMQ8tex',
 '2DOHMC5DPT',
 '2H3OGL',
 '2HAGH',
 '2HATVACIDGLUC_GLCAASE',
 '2HATVACIDGLUCt2pp',
 '2HATVACIDGLUCtex',
 '2HATVACIDtepp',
 '2HATVACIDtex',
 '2HATVLACGLUC_GLCAASE',
 '2HATVLACGLUCt2pp',
 '2HATVLACGLUCtex',
 '2HATVLACtepp',
 '2HATVLACtex',
 '2HBO',
 '2HIBUPGLU_St2pp',
 '2HIBUPGLU_Stex',
 '2HIBUP_S_GLCAASE',
 '2HIBUP_Stepp',
 '2HIBUP_Stex',
 '2HYOXPLACt2rpp',
 '2HYOXPLACtex',
 '2IMZS',
 '2INSD',
 '2IPDPIPT',
 '2MBCOATA',
 '2OBUTt2rpp',
 '2OBUTtex',
 '2OH_CBZ_GLC_GLCAASE',
 '2OH_CBZ_GLCt2pp',
 '2OH_CBZ_GLCtex',
 '2OH_CBZtepp',
 '2OH_CBZtex',
 '2OH_MTZ_GLC_GLCAASE',
 '2OH_MTZ_GLCt2pp',
 '2OH_MTZ_GLCtex',
 '2OH_MTZ

## Annotation scraping

In [6]:
sim.get_metabolite('15dap[e]')

                          value
attribute                      
id                     15dap[e]
name         1,5-Diaminopentane
compartment                   e
formula                 C5H16N2

In [7]:
ls_rxn = []

for rxn in sim.reactions:
    anno = sim.get_reaction(rxn).annotations
    seed_id = anno.get('seed.reactions')
    ecnumber = anno.get('ec-code')
    metanetx = anno.get('metanetx.reaction')
    kegg = anno.get('kegg.reaction')
    res = [rxn,seed_id,metanetx,kegg,ecnumber]
    ls_rxn.append(res)
        
df_rxn = pd.DataFrame(ls_rxn,columns=[['Reaction', 'ModelSEED_id','MetaNetX','KEGG_id','ecNumber']])

df_rxn

Unnamed: 0,Reaction,ModelSEED_id,MetaNetX,KEGG_id,ecNumber
0,15DAPtex,rxn08305,,,
1,15DAPtpp,,,,
2,1HIBUPGLU_St2pp,,,,
3,1HIBUPGLU_Stex,,,,
4,1HIBUP_S_GLCAASE,,,,
...,...,...,...,...,...
3581,rtranscription,rxn13784,,,
3582,sink_dmbzid,,,,
3583,sink_s,,,,
3584,sink_thissh[c],,,,


In [8]:
mask = df_rxn['Reaction'].values=='23PDE9'
df_rxn[mask]

Unnamed: 0,Reaction,ModelSEED_id,MetaNetX,KEGG_id,ecNumber
17,23PDE9,rxn03483,MNXR37,,3.1.4.16


In [9]:
seed_id = df_rxn['ModelSEED_id'].values.tolist()

seed_id = [reduce(lambda x: x, inner_list) for inner_list in seed_id]

metanetx_id = df_rxn['MetaNetX'].values.tolist()

metanetx_id = [reduce(lambda x: x, inner_list) for inner_list in metanetx_id]

kegg_id = df_rxn['KEGG_id'].values.tolist()

kegg_id = [reduce(lambda x: x, inner_list) for inner_list in kegg_id]

In [10]:
print(seed_id)

['rxn08305', None, None, None, None, None, None, None, None, None, None, None, 'rxn01635', 'rxn03435', 'rxn02522', 'rxn02762', 'rxn02521', 'rxn03483', 'rxn03436', 'rxn08320', 'rxn01307', 'rxn01633', 'rxn02346', 'rxn00905', None, None, 'rxn00107', 'rxn00332', 'rxn06584', None, None, None, None, None, None, None, None, None, None, 'rxn00741', None, None, None, None, None, None, None, 'rxn42709', 'rxn02007', 'rxn07841', 'rxn05358', None, None, None, None, None, None, None, None, None, None, None, None, 'rxn04139', 'rxn12224', 'rxn04675', 'rxn03333', 'rxn01827', 'rxn11349', 'rxn02376', 'rxn01912', 'rxn07899', 'rxn03860', None, None, 'rxn05373', 'rxn05398', 'rxn05377', 'rxn05402', 'rxn05381', 'rxn05406', 'rxn05361', 'rxn05386', 'rxn05365', 'rxn05390', 'rxn05369', 'rxn05394', 'rxn02345', 'rxn04602', None, None, None, None, None, 'rxn02181', 'rxn01174', 'rxn04601', None, None, 'rxn00902', None, None, 'rxn05338', 'rxn05422', 'rxn05372', 'rxn05397', 'rxn05340', 'rxn05426', 'rxn05376', 'rxn05401

In [11]:
print(metanetx_id)

[None, None, None, None, None, None, None, None, None, None, None, None, 'MNXR93752', 'MNXR83171', 'MNXR34', 'MNXR35', 'MNXR36', 'MNXR37', 'MNXR76597', None, None, 'MNXR7131', 'MNXR6097', 'MNXR7147', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'MNXR94796', None, None, None, None, None, None, None, None, 'MNXR73960', None, 'MNXR53357', None, None, None, None, None, None, None, None, None, None, None, None, 'MNXR60025', 'MNXR80794', 'MNXR27537', 'MNXR94833', 'MNXR94843', None, 'MNXR79613', 'MNXR5409', None, None, None, None, 'MNXR27647', 'MNXR27672', 'MNXR27651', 'MNXR27676', 'MNXR27655', 'MNXR27680', 'MNXR27635', 'MNXR27660', 'MNXR27639', 'MNXR27664', 'MNXR27643', 'MNXR27668', 'MNXR6494', 'MNXR156', None, None, None, None, None, None, 'MNXR161', 'MNXR162', None, None, 'MNXR3135', None, None, 'MNXR68183', 'MNXR27696', 'MNXR27646', 'MNXR27671', 'MNXR68184', 'MNXR27700', 'MNXR27650', 'MNXR27675', 'MNXR68185', 'MNXR27704', 'MNXR27654', 'MNXR2767

In [12]:
print(kegg_id)

[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'R01000', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'R04880', 'R02521', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, Non

 ## ModelSEED query

In [13]:
SOLR_URL='https://modelseed.org'

connection = urlopen(SOLR_URL+f'/solr/reactions/select?wt=json&q=id:rxn08305&fl=name,id,formula,charge,aliases')
response = json.load(connection)
for document in response['response']['docs']:
    print(document.get('name'),document.get('id'),document.get('formula'),document.get('charge'),document.get('aliases'))  
    ms_name = document.get('name')
    ls_alias = document.get('aliases')
    ms_bigg = list(filter(lambda a: 'BiGG:' in a, document.get('aliases')))
    if len(ms_bigg)== 0:
        ms_bigg = None
    else:
        ms_bigg = list(ms_bigg)[0]
        ms_bigg = ms_bigg.replace('BiGG: ','')
    ms_kegg = list(filter(lambda a: 'KEGG:' in a, document.get('aliases')))
    if len(ms_kegg)== 0:
        ms_kegg = None
    else:
        ms_kegg = list(ms_kegg)[0]
        ms_kegg = ms_kegg.replace('KEGG: ','')
        
print(ms_bigg,ms_kegg)

1,5-Diaminopentane transport via diffusion (extracellular to periplasm) rxn08305 None None ['BiGG: DAPtex', 'iAF1260: DAPtex', 'iMA945: DAPtex', 'Name: 1,5-Diaminopentane transport via diffusion']
DAPtex None


In [None]:
'''
SOLR_URL='https://modelseed.org'
ls_name = []
ls_kegg = []
ls_bigg = []

for mseed_id in tqdm(seed_id):
    connection = urlopen(SOLR_URL+f'/solr/reactions/select?wt=json&q=id:{mseed_id}&fl=name,id,formula,charge,aliases')
    response = json.load(connection)
    for document in response['response']['docs']:
        ms_name = document.get('name')
        ms_alias = document.get('aliases')
    ms_bigg = list(filter(lambda a: 'BiGG:' in a, ms_alias))
    if len(ms_bigg)== 0:
        ms_bigg = None
    else:
        ms_bigg = list(ms_bigg)[0]
        ms_bigg = ms_bigg.replace('BiGG: ','')
    ms_kegg = list(filter(lambda a: 'KEGG:' in a, ms_alias))
    if len(ms_kegg)== 0:
        ms_kegg = None
    else:
        ms_kegg = list(ms_kegg)[0]
        ms_kegg = ms_kegg.replace('KEGG: ','') 
        
    ls_name.append(ms_name)
    ls_bigg.append(ms_bigg)
    ls_kegg.append(ms_kegg)
'''

In [None]:
print(ls_name)

In [None]:
print(ls_bigg)

In [None]:
print(ls_kegg)

In [None]:
#df_rxn['Name'] = ls_name 

## MetaNetX query

## BiGG query

## KEGG query

## Substrates

In [14]:
ls_sub = []

for rxn in sim.reactions:
    sub = list(sim.get_substrates(rxn).keys())
    ls_sub.append(sub)
    
df_rxn["Substrates"] = ls_sub   

df_rxn

Unnamed: 0,Reaction,ModelSEED_id,MetaNetX,KEGG_id,ecNumber,Substrates
0,15DAPtex,rxn08305,,,,[15dap[e]]
1,15DAPtpp,,,,,[15dap[c]]
2,1HIBUPGLU_St2pp,,,,,"[1hibupglu_S[p], h[p]]"
3,1HIBUPGLU_Stex,,,,,[1hibupglu_S[e]]
4,1HIBUP_S_GLCAASE,,,,,"[1hibupglu_S[c], h2o[c]]"
...,...,...,...,...,...,...
3581,rtranscription,rxn13784,,,,[]
3582,sink_dmbzid,,,,,[dmbzid[c]]
3583,sink_s,,,,,[s[c]]
3584,sink_thissh[c],,,,,[thissh[c]]


In [15]:
sub_na = df_rxn['Substrates'].values.tolist()

ls_sub = []
ls_smile =[]


#print(sub_na)

for sub_l in tqdm(sub_na):
    sub_ls_sub = []
    sub_ls_smile = []
    for sub_s in sub_l:
        for sub in sub_s:
            sub_name = sim.get_metabolite(sub).get('name')
            smile = get_smiles(sub_name)
            sub_ls_smile.append(smile)
            sub_ls_sub.append(sub_name)
            #print(sub_ls_sub)
            #print(sub_ls_smile)
        ls_sub.append(sub_ls_sub)
        ls_smile.append(sub_ls_smile)

df_rxn['Substrate Name'] = ls_sub
df_rxn['Substrate SMILE'] = ls_smile

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3586/3586 [1:24:04<00:00,  1.41s/it]


In [27]:
df_rxn.to_csv('../data/rxn_data.csv')
df_rxn

Unnamed: 0,Reaction,ModelSEED_id,MetaNetX,KEGG_id,ecNumber,Substrates,Substrate Name,Substrate SMILE
0,15DAPtex,rxn08305,,,,[15dap[e]],"[1,5-Diaminopentane]",[C(CCN)CCN]
1,15DAPtpp,,,,,[15dap[c]],"[1,5-Diaminopentane]",[C(CCN)CCN]
2,1HIBUPGLU_St2pp,,,,,"[1hibupglu_S[p], h[p]]","[1-hydroxy S-ibuprofen-glucuronide, proton]","[None, [H+]]"
3,1HIBUPGLU_Stex,,,,,[1hibupglu_S[e]],[1-hydroxy S-ibuprofen-glucuronide],[None]
4,1HIBUP_S_GLCAASE,,,,,"[1hibupglu_S[c], h2o[c]]","[1-hydroxy S-ibuprofen-glucuronide, Water]","[None, O]"
...,...,...,...,...,...,...,...,...
3581,rtranscription,rxn13784,,,,[],[],[]
3582,sink_dmbzid,,,,,[dmbzid[c]],"[5,6-Dimethylbenzimidazole]",[CC1=CC2=C(C=C1C)N=CN2]
3583,sink_s,,,,,[s[c]],[Sulfur],[[S]]
3584,sink_thissh[c],,,,,[thissh[c]],[Thiocarboxy-[ThiS protein]],[None]


## BRENDA query

In [33]:
kcat_ls = []

for ec in df_rxn['ecNumber'].values.tolist():
    sub_kcat_ls = []
    for i in range(len(ec)):
        ec_n = ec[i]
        kcat = kcat = brenda_query(user = 'pg45962@uminho.pt',password='Mentafrio+15',ecNumber=ec_n,organism = 'Escherichia coli')
        sub_kcat_ls.append(kcat)
    kcat_ls.append(sub_kcat_ls)        

None
None
None
None
None
None
None
None
None
None
None
None
3.5.4.22
1.1.1.86
3.1.4.16
3.1.4.16
3.1.4.16
3.1.4.16
1.1.1.86, 5.4.99.3
None
None
4.2.1.43
4.1.2.20
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
1.1.1.27
None
None
None
None
None
None
None
None
4.2.1.44
None
2.3.1.180
None
None
None
None
None
None
None
None
None
None
None
None
1.14.13.-
1.14.13.-
4.2.99.20,2.2.1.9
1.1.1.1
1.13.11.27
None
1.2.4.2
1.1.1.130
None
None
None
None
4.2.1.59
4.2.1.59
4.2.1.59
4.2.1.59
4.2.1.59
4.2.1.59
4.2.1.59
4.2.1.59
4.2.1.59
4.2.1.59
4.2.1.59
4.2.1.59
5.1.2.3
1.14.13.-
None
None
None
None
None
None
1.1.1.59
1.14.13.-
None
None
2.3.3.13,4.1.3.12
None
None
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
1.1.1.100
2.3.1.41
2.3.1.41
2.3.1.41
2.3.1.41
2.3.1.41
2.3.1.41

## DLKcat - Kcat prediction