In [1]:
from Gapfilling import *
import re
import gurobipy
import cobra
from cobra.flux_analysis.gapfilling import GapFiller
from cobra.io import read_sbml_model
from cobra import exceptions
from contextlib import contextmanager
import sys, os
import pandas as pd

In [2]:
# Function for avoiding stdout
@contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:  
            yield
        finally:
            sys.stdout = old_stdout

This notebook will be used for testing the homology_gafilling() function as well as its following options:
- One or more templates
- Changing model/template objective
- Using all templates
- Changing the integer threshold
- Adding exchange reactions
- Adding transport reactions (3 different ways)

### Models


#### *E. coli*

In [3]:
# E. coli core
# bigg.ucsd.edu/models/e_coli_core
ECC = read_sbml_model("BiGG_files/e_coli_core.xml")
ECC

Using license file /home/fco/gurobi.lic
Academic license - for non-commercial use only - expires 2021-06-01


0,1
Name,e_coli_core
Memory address,0x07f383c3d6fa0
Number of metabolites,72
Number of reactions,95
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol"


In [4]:
# E. coli str. C
# bigg.ucsd.edu/models/iEC1344_C
iEC1344_C = read_sbml_model("BiGG_files/iEC1344_C.xml")
iEC1344_C

0,1
Name,iEC1344_C
Memory address,0x07f3819d45fd0
Number of metabolites,1934
Number of reactions,2726
Number of groups,0
Objective expression,1.0*BIOMASS_Ec_iJO1366_core_53p95M - 1.0*BIOMASS_Ec_iJO1366_core_53p95M_reverse_5c8b1
Compartments,"cytosol, periplasm, extracellular space"


In [5]:
# E. coli str. W
# bigg.ucsd.edu/models/iEC1364_W
iEC1364_W = read_sbml_model("BiGG_files/iEC1364_W.xml")
iEC1364_W

0,1
Name,iEC1364_W
Memory address,0x07f383c3d69a0
Number of metabolites,1927
Number of reactions,2764
Number of groups,0
Objective expression,1.0*BIOMASS_Ec_iJO1366_core_53p95M - 1.0*BIOMASS_Ec_iJO1366_core_53p95M_reverse_5c8b1
Compartments,"cytosol, extracellular space, periplasm"


#### *P. putida*

In [6]:
# P. putida str. KT2440 
# bigg.ucsd.edu/models/iJN746
iJN746 = read_sbml_model("BiGG_files/iJN746.xml")
iJN746

0,1
Name,iJN746
Memory address,0x07f37b83776d0
Number of metabolites,907
Number of reactions,1054
Number of groups,0
Objective expression,1.0*BIOMASS_KT_TEMP - 1.0*BIOMASS_KT_TEMP_reverse_d18f7
Compartments,"extracellular space, cytosol, periplasm"


In [7]:
# P. putida str. KT2440 (full)
# bigg.ucsd.edu/models/iJN1463
iJN1463 = read_sbml_model("BiGG_files/iJN1463.xml")
iJN1463

0,1
Name,iJN1463
Memory address,0x07f3819d45bb0
Number of metabolites,2153
Number of reactions,2927
Number of groups,0
Objective expression,1.0*BIOMASS_KT2440_WT3 - 1.0*BIOMASS_KT2440_WT3_reverse_d86d5
Compartments,"cytosol, extracellular space, periplasm"


#### *C. reinhardtii*

In [8]:
# C. reinhardtii
# bigg.ucsd.edu/models/iRC1080
iRC1080 = read_sbml_model("BiGG_files/iRC1080.xml")
iRC1080

No objective coefficients in model. Unclear what should be optimized


0,1
Name,iRC1080
Memory address,0x07f37b8377d00
Number of metabolites,1706
Number of reactions,2191
Number of groups,0
Objective expression,0
Compartments,"cytosol, mitochondria, chloroplast, flagellum, peroxisome/glyoxysome, nucleus, golgi apparatus, extracellular space, eyespot, thylakoid"


#### *B. subtilis*

In [9]:
# B. subtilis subsp. subtilis str. 168
# bigg.ucsd.edu/models/iYO844
iY0844 = read_sbml_model("BiGG_files/iYO844.xml")
iY0844

0,1
Name,iYO844
Memory address,0x07f3819d45f10
Number of metabolites,990
Number of reactions,1250
Number of groups,0
Objective expression,1.0*BIOMASS_BS_10 - 1.0*BIOMASS_BS_10_reverse_8788b
Compartments,"cytosol, extracellular space"


#### *S. cerevisiae*

In [10]:
# S. cerevisiae S288C
# bigg.ucsd.edu/models/iMM904
iMM904 = read_sbml_model("BiGG_files/iMM904.xml")
iMM904

0,1
Name,iMM904
Memory address,0x07f37b3131250
Number of metabolites,1226
Number of reactions,1577
Number of groups,0
Objective expression,1.0*BIOMASS_SC5_notrace - 1.0*BIOMASS_SC5_notrace_reverse_93090
Compartments,"cytosol, extracellular space, mitochondria, peroxisome/glyoxysome, endoplasmic reticulum, vacuole, golgi apparatus, nucleus"


In [11]:
# S. cerevisiae S288C (another version)
# bigg.ucsd.edu/models/iND750
iND750 = read_sbml_model("BiGG_files/iND750.xml")
iND750

0,1
Name,iND750
Memory address,0x07f37b427f490
Number of metabolites,1059
Number of reactions,1266
Number of groups,0
Objective expression,1.0*BIOMASS_SC4_bal - 1.0*BIOMASS_SC4_bal_reverse_bb385
Compartments,"extracellular space, cytosol, mitochondria, peroxisome/glyoxysome, nucleus, golgi apparatus, vacuole, endoplasmic reticulum"


#### *M. musculus*

In [12]:
# M. musculus
# bigg.ucsd.edu/models/iMM1415
iMM1415 = read_sbml_model("BiGG_files/iMM1415.xml")
iMM1415

No objective coefficients in model. Unclear what should be optimized


0,1
Name,iMM1415
Memory address,0x07f37b2a24940
Number of metabolites,2775
Number of reactions,3726
Number of groups,0
Objective expression,0
Compartments,"cytosol, extracellular space, golgi apparatus, lysosome, mitochondria, nucleus, endoplasmic reticulum, peroxisome/glyoxysome"


### Gap filling with default settings
- integer_threshold = 1e-06
- add_transport = False
- add_exchange = False

#### Filling a simple model with itself
E. coli core model will be used as it contains just 95 reactions.

First, we need to remove some reactions from the model. At the same time we will create another model with those reactions.

In [80]:
with suppress_stdout():
    x = read_sbml_model("BiGG_files/e_coli_core.xml")
x

0,1
Name,e_coli_core
Memory address,0x07f3768890940
Number of metabolites,72
Number of reactions,95
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol"


In [81]:
x.optimize().objective_value

0.8739215069684302

In [82]:
# Removing all reactions involving essential metabolite glc__D_e
y = cobra.Model("glc__D_e reactions")
for i in [i.id for i in x.metabolites.glc__D_e.reactions]:
    reaction = x.reactions.get_by_id(i)
    y.add_reaction(reaction.copy())
    x.remove_reactions([reaction])
y

0,1
Name,glc__D_e reactions
Memory address,0x07f378954a1f0
Number of metabolites,4
Number of reactions,2
Number of groups,0
Objective expression,0
Compartments,"e, c"


Calculating flux of the model:

In [83]:
print(x.optimize().objective_value)

None




Optimization is infeasible because we have deleted essential reactions. Carrying out gap filling:

In [84]:
with suppress_stdout():
    x_gp, added_reactions = homology_gapfilling(x, [y])

In [85]:
x_gp.optimize().objective_value

0.8739215069684302

Let's see the added reactions and some information about them.

In [86]:
pd.DataFrame(added_reactions)

Unnamed: 0,glc__D_e reactions
0,"(GLCpts, [b2416, b1101, b2417, b2415, b1819, b..."
1,"(EX_glc__D_e, Exchange reaction)"


- The title of the table shows the template name
- Each line represents an added reaction
- Each reaction is represented as a tuple with reaction name (left) and involved genes (right)

Now we will create two templates with these reactions.

In [87]:
with suppress_stdout():
    x = read_sbml_model("BiGG_files/e_coli_core.xml")

In [88]:
y = cobra.Model("GLCpts")
y.add_reaction(x.reactions.GLCpts.copy())
x.remove_reactions(x.reactions.GLCpts)

  warn("need to pass in a list")


In [89]:
z = cobra.Model("EX_glc__D_e")
z.add_reaction(x.reactions.EX_glc__D_e.copy())
x.remove_reactions(x.reactions.EX_glc__D_e)

In [90]:
print(y.reactions, z.reactions)

[<Reaction GLCpts at 0x7f37736c9a00>] [<Reaction EX_glc__D_e at 0x7f37718545e0>]


In [91]:
x.optimize().objective_value



In [92]:
with suppress_stdout():
    X, added_reactions = homology_gapfilling(x, [y,z])

In [93]:
X

0,1
Name,e_coli_core
Memory address,0x07f3771c57910
Number of metabolites,72
Number of reactions,93
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol"


It appears that if several essential reactions (required for the model to work) are into different templates, they won't be added. That is reasonable if we take account of the algorithm's process.

#### Filling a more complex model with itself
A *S. cerevisiae* model with 1266 reactions will be used.

In [94]:
with suppress_stdout():
    x = read_sbml_model("BiGG_files/iND750.xml")

In [95]:
x

0,1
Name,iND750
Memory address,0x07f37f6953dc0
Number of metabolites,1059
Number of reactions,1266
Number of groups,0
Objective expression,1.0*BIOMASS_SC4_bal - 1.0*BIOMASS_SC4_bal_reverse_bb385
Compartments,"extracellular space, cytosol, mitochondria, peroxisome/glyoxysome, nucleus, golgi apparatus, vacuole, endoplasmic reticulum"


Let's take a look into the objective reaction (biomass) in order not to remove it accidentally.

In [96]:
x.reactions.BIOMASS_SC4_bal.metabolites

{<Metabolite 13BDglcn_c at 0x7f37709bcfa0>: -1.1348,
 <Metabolite ala__L_c at 0x7f378a1c2c40>: -0.4588,
 <Metabolite amp_c at 0x7f379053fdf0>: -0.046,
 <Metabolite arg__L_c at 0x7f37737967f0>: -0.1607,
 <Metabolite asn__L_c at 0x7f376e5a7a00>: -0.1017,
 <Metabolite asp__L_c at 0x7f3788c49dc0>: -0.2975,
 <Metabolite atp_c at 0x7f379053f4c0>: -59.276,
 <Metabolite cmp_c at 0x7f3788aacdf0>: -0.0447,
 <Metabolite cys__L_c at 0x7f3772822340>: -0.0066,
 <Metabolite damp_c at 0x7f37dda2f5b0>: -0.0036,
 <Metabolite dcmp_c at 0x7f37dda2fca0>: -0.0024,
 <Metabolite dgmp_c at 0x7f37710a7880>: -0.0024,
 <Metabolite dtmp_c at 0x7f37738efd00>: -0.0036,
 <Metabolite ergst_c at 0x7f377546e280>: -0.0007,
 <Metabolite gln__L_c at 0x7f376f6a4c40>: -0.1054,
 <Metabolite glu__L_c at 0x7f3776a0f670>: -0.3018,
 <Metabolite gly_c at 0x7f378a1c2190>: -0.2904,
 <Metabolite glycogen_c at 0x7f377546e3d0>: -0.5185,
 <Metabolite gmp_c at 0x7f378aae4070>: -0.046,
 <Metabolite h2o_c at 0x7f3773796fd0>: -59.276,
 <Met

In [97]:
x.optimize().objective_value

0.09732337590376772

In [98]:
y = cobra.Model("co2_c reactions")
for i in [i.id for i in x.metabolites.co2_c.reactions]:
    reaction = x.reactions.get_by_id(i)
    y.add_reaction(reaction.copy())
    x.remove_reactions([reaction])
y

0,1
Name,co2_c reactions
Memory address,0x07f37765c66d0
Number of metabolites,102
Number of reactions,55
Number of groups,0
Objective expression,0
Compartments,"c, x, n, e, m, v, g"


55 reactions have been taken from the model.

In [99]:
x.optimize().objective_value

0.0

In [100]:
len(x.reactions)

1211

In [101]:
with suppress_stdout():
    X, added_reactions = homology_gapfilling(x, [y])

In [102]:
X.optimize().objective_value

0.0

In [103]:
len(X.reactions)

1211

Adding exchange and transport reactions (we will get into this options later):

In [104]:
with suppress_stdout():
    X, added_reactions = homology_gapfilling(x, [y], force_transport=True, force_exchange=True)

In [105]:
X.optimize().objective_value

0.0

In [106]:
len(X.reactions)

1217

Lowering integrality threshold:

In [109]:
X, added_reactions = homology_gapfilling(x, [y], force_transport=True, force_exchange=True, integer_threshold=1e-9)

Read LP format model from file /tmp/tmpf7q31fvh.lp
Reading time = 0.03 seconds
: 1059 rows, 2434 columns, 9984 nonzeros
Read LP format model from file /tmp/tmptv43ia4f.lp
Reading time = 0.01 seconds
: 1059 rows, 2434 columns, 9984 nonzeros
Read LP format model from file /tmp/tmp55avv5k6.lp
Reading time = 0.00 seconds
: 102 rows, 110 columns, 640 nonzeros
Read LP format model from file /tmp/tmpzn90asba.lp
Reading time = 0.00 seconds
: 102 rows, 110 columns, 640 nonzeros

co2_c reactions: failed to validate gapfilled model, try lowering the integer_threshold


In [110]:
X.optimize().objective_value

0.0

In [111]:
len(X.reactions)

1217

In this case all the options seem not to be enough for the gap filling to be efficient. Also the algorithm demands to lower the integrality value but it can't be lowered anymore. This situation is just too complex for this algorithm. 

#### Filling a model with another
We will try to improve the biomass production of *E. coli* core model using other templates.

In [112]:
with suppress_stdout():
    x = read_sbml_model("BiGG_files/e_coli_core.xml")

In [113]:
print(x.objective)

Maximize
1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5


In [114]:
x.optimize().objective_value

0.8739215069684302

In [115]:
with suppress_stdout():
    X, added_reactions = homology_gapfilling(x, [iEC1344_C, iEC1364_W])

In [116]:
pd.DataFrame(added_reactions)

Unnamed: 0,iEC1344_C


No reaction has been added through gap filling. Probably the *E. coli* core model is optimized to carry out the best biomass production. It stopped after first template because they are supposed to be sorted by homology/identity with the model so if the first templated didn't work the second one isn't expected to do so. Let's force the use of both templates just in case.

In [117]:
with suppress_stdout():
    X, added_reactions = homology_gapfilling(x, [iEC1344_C, iEC1364_W], use_all_templates = True)

In [49]:
pd.DataFrame(added_reactions)

Unnamed: 0,iEC1344_C,iEC1364_W


It didn't add any reaction neither. Now we will use all available templates.

In [118]:
with suppress_stdout():
    x = read_sbml_model("BiGG_files/e_coli_core.xml")

In [119]:
T = load_template_models([iEC1344_C, iEC1364_W, iND750, iJN1463, iJN746, iMM904, iMM1415, iY0844])

In [120]:
T

[<Model iEC1344_C at 0x7f3819d45fd0>,
 <Model iEC1364_W at 0x7f383c3d69a0>,
 <Model iND750 at 0x7f37b427f490>,
 <Model iJN1463 at 0x7f3819d45bb0>,
 <Model iJN746 at 0x7f37b83776d0>,
 <Model iMM904 at 0x7f37b3131250>,
 <Model iMM1415 at 0x7f37b2a24940>,
 <Model iYO844 at 0x7f3819d45f10>]

In [121]:
with suppress_stdout():
    X, added_reactions = homology_gapfilling(x, T, use_all_templates=True)

In [122]:
pd.DataFrame(added_reactions)

Unnamed: 0,iEC1344_C,iEC1364_W,iND750,iJN1463,iMM904,iMM1415,iYO844


The only model which has not been able to use is iJN746.
Changing model's objective:

In [123]:
with suppress_stdout():
    X, added_reactions = homology_gapfilling(x, T, use_all_templates=True, model_obj="CO2t")

In [124]:
X.optimize().objective_value

11.104242424242424

In [125]:
pd.DataFrame(added_reactions)

Unnamed: 0,iEC1344_C,iEC1364_W,iND750,iJN1463,iMM904,iMM1415,iYO844


The objective has been successfully changed but no reaction has been added.

Next we will try the same but using a complex model as query.

In [150]:
with suppress_stdout():
    x = read_sbml_model("BiGG_files/iEC1364_W.xml")

In [151]:
print(x.objective, x.optimize().objective_value)

Maximize
1.0*BIOMASS_Ec_iJO1366_core_53p95M - 1.0*BIOMASS_Ec_iJO1366_core_53p95M_reverse_5c8b1 0.985364865731541


In [152]:
T = [iEC1344_C, iJN746, iMM904, iY0844]

In [153]:
with suppress_stdout():
    X, added_reactions = Homology_gapfilling(x, T, use_all_templates=False)

In [154]:
X.optimize().objective_value

0.985364865731541

In [155]:
pd.DataFrame(added_reactions)

Unnamed: 0,iEC1344_C


In [156]:
# forcing the use of all templates
with suppress_stdout():
    X, added_reactions = Homology_gapfilling(x, T, use_all_templates=True)

Ignoring reaction 'BIOMASS_KT_TEMP' since it already exists.
Ignoring reaction 'BIOMASS_KT_TEMP' since it already exists.
Ignoring reaction 'BIOMASS_KT_TEMP' since it already exists.
Ignoring reaction 'BIOMASS_KT_TEMP' since it already exists.


In [157]:
X.optimize().objective_value

0.18761640097381724

In [160]:
added_reactions

{'iEC1344_C': [],
 'iJN746': [('BIOMASS_KT_TEMP', [])],
 'iMM904': [('BIOMASS_KT_TEMP', [])],
 'iYO844': [('BIOMASS_KT_TEMP', [])]}

This is a strange behavior. The algorithm has filled the model with a reaction which lowers the objective value. Moreover, it seems like the templates coming after iJN746 have been transformed into that one. It may be an error caused by the "use_all_templates = True" option. Let's see.

In [162]:
with suppress_stdout():
    x = read_sbml_model("BiGG_files/iEC1364_W.xml")
    T = [iJN746, iEC1344_C]
    X, added_reactions = homology_gapfilling(x, T)

Ignoring reaction 'BIOMASS_KT_TEMP' since it already exists.
Ignoring reaction 'BIOMASS_KT_TEMP' since it already exists.


In [163]:
added_reactions

{'iJN746': [('BIOMASS_KT_TEMP', [])], 'iEC1344_C': [('BIOMASS_KT_TEMP', [])]}