In [1]:
import pandas as pd
import os
import cobra
import cobra.test
from cobra.io import read_sbml_model
import re
import copy as cp
from cobra.flux_analysis import gapfill
import gurobipy
import warnings

In [2]:
# Just for testing
def load_query_model_file(file, obj = None):
    """
    Reads a XML SBML file, returning a model object. 
    Needs cobra.io.
    If obj parameter is specified, changes the objective of the model.
    """
    model = read_sbml_model(file)
    if obj != None:
        model.objective = obj
        return model
    return model
    

In [3]:
cobra_config = cobra.Configuration()

In [44]:
# solvers: glpk, glpk-exact, scipy, gurobi
cobra_config.solver = 'glpk'

In [51]:
# Test
x = load_query_model_file("iEC1344_C.xml")

In [6]:
def load_query_model(model, obj = None):
    """
    Loads a model object. Changes objective if specified.
    """
    # Error with deepcopy when gurobi solver set
    model = cp.deepcopy(model)
    if obj != None:
        model.objective = obj
        return model
    return model

In [52]:
# Tests
y = load_query_model(x)
y

0,1
Name,iEC1344_C
Memory address,0x07f03546999d0
Number of metabolites,1934
Number of reactions,2726
Number of groups,0
Objective expression,1.0*BIOMASS_Ec_iJO1366_core_53p95M - 1.0*BIOMASS_Ec_iJO1366_core_53p95M_reverse_5c8b1
Compartments,"cytosol, periplasm, extracellular space"


In [53]:
z = load_query_model(x, obj = "2MAHMP")
z

0,1
Name,iEC1344_C
Memory address,0x07f035e785700
Number of metabolites,1934
Number of reactions,2726
Number of groups,0
Objective expression,1.0*2MAHMP - 1.0*2MAHMP_reverse_e4fd2
Compartments,"cytosol, periplasm, extracellular space"


In [9]:
# If deepcopy works, y and z objectives should differ
y

0,1
Name,iEC1344_C
Memory address,0x07f041010e160
Number of metabolites,1934
Number of reactions,2726
Number of groups,0
Objective expression,1.0*BIOMASS_Ec_iJO1366_core_53p95M - 1.0*BIOMASS_Ec_iJO1366_core_53p95M_reverse_5c8b1
Compartments,"cytosol, periplasm, extracellular space"


In [10]:
def load_template_models(template_list, obj = None):
    """
    Takes a list of template models and changes objective if specified.
    Objective can be either biomass or a specific reaction.
    It also returns a list with the models which objective couldn't be changed.
    """
    # deepcopy doesn't work when solver = gurobi
    templates = cp.deepcopy(template_list)
    templates = template_list
    failures = []
    if obj == None:
        return templates, failures
    if obj == "biomass":
        b = re.compile("biomass", re.IGNORECASE)
        bc = re.compile("(biomass){1}.*(core){1}", re.IGNORECASE) 
        for model in templates:
            reactions = [reaction.id for reaction in model.reactions]
            # Searching for a biomass_core reaction
            core = list(filter(bc.match, reactions))
            if core:
                model.objective = core[0]
                continue
            # Searching for a non core biomass reaction
            biomass = list(filter(b.match, reactions))
            if biomass:
                model.objective = biomass[0]
            # If biomass reactions are not found, the model name is stored into "failures" list.
            # The model won't change its objective, but it will be used anyway for gap filling
            else:
                failures.append(model.name)
        return templates, failures
    if obj != None and obj != "biomass":
        for model in templates:
            try:
                model.objective = obj
            except ValueError:
                failures.append(model.name)
        return templates, failures

In [54]:
# Function testing
templ = [x, y, z]
# The objective for x and y is biomass, as for z is the reaction 2MAHMP
A, failures = load_template_models(templ)
A[0]

0,1
Name,iEC1344_C
Memory address,0x07f037e436040
Number of metabolites,1934
Number of reactions,2726
Number of groups,0
Objective expression,1.0*BIOMASS_Ec_iJO1366_core_53p95M - 1.0*BIOMASS_Ec_iJO1366_core_53p95M_reverse_5c8b1
Compartments,"cytosol, periplasm, extracellular space"


In [12]:
A[2]

0,1
Name,iEC1344_C
Memory address,0x07f03f2317250
Number of metabolites,1934
Number of reactions,2726
Number of groups,0
Objective expression,1.0*2MAHMP - 1.0*2MAHMP_reverse_e4fd2
Compartments,"cytosol, periplasm, extracellular space"


In [13]:
failures

[]

In [14]:
B, failures = load_template_models(templ, obj = "biomass")
B[2]

0,1
Name,iEC1344_C
Memory address,0x07f03f2317250
Number of metabolites,1934
Number of reactions,2726
Number of groups,0
Objective expression,1.0*BIOMASS_Ec_iJO1366_core_53p95M - 1.0*BIOMASS_Ec_iJO1366_core_53p95M_reverse_5c8b1
Compartments,"cytosol, periplasm, extracellular space"


In [15]:
C, failures = load_template_models(templ, obj = "2MAHMP")
C[0]

0,1
Name,iEC1344_C
Memory address,0x07f04101718b0
Number of metabolites,1934
Number of reactions,2726
Number of groups,0
Objective expression,1.0*2MAHMP - 1.0*2MAHMP_reverse_e4fd2
Compartments,"cytosol, periplasm, extracellular space"


In [16]:
C[2]

0,1
Name,iEC1344_C
Memory address,0x07f03f2317250
Number of metabolites,1934
Number of reactions,2726
Number of groups,0
Objective expression,1.0*2MAHMP - 1.0*2MAHMP_reverse_e4fd2
Compartments,"cytosol, periplasm, extracellular space"


In [17]:
D, failures = load_template_models(templ, obj = "qwerty")
D[0]

0,1
Name,iEC1344_C
Memory address,0x07f04101718b0
Number of metabolites,1934
Number of reactions,2726
Number of groups,0
Objective expression,1.0*2MAHMP - 1.0*2MAHMP_reverse_e4fd2
Compartments,"cytosol, periplasm, extracellular space"


In [18]:
failures
# model.name doesn't work

['', '', '']

In [19]:
x.optimize().objective_value

231.85

In [20]:
# Gapfilling test

In [20]:
cobra_config.solver = 'gurobi'

In [55]:
model = load_query_model_file("iJN746.xml")

In [32]:
model.optimize().objective_value

1.4

In [22]:
model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux
fe2_e,EX_fe2_e,0.0014,0,0.00%
glc__D_e,EX_glc__D_e,8.262,6,87.72%
nh4_e,EX_nh4_e,14.44,0,0.00%
o2_e,EX_o2_e,4.065,0,0.00%
pi_e,EX_pi_e,1.072,0,0.00%
so4_e,EX_so4_e,0.3263,0,0.00%
mhpglu_c,SK_5mthglu_c,0.2744,25,12.14%
dna5mtc_c,SK_dna5mtc_c,0.007,11,0.14%

Metabolite,Reaction,Flux,C-Number,C-Flux
co2_e,EX_co2_e,-2.919,1,28.15%
glyclt_e,EX_glyclt_e,-0.07,2,1.35%
h2o_e,EX_h2o_e,-30.52,0,0.00%
h_e,EX_h_e,-13.31,0,0.00%
hco3_e,EX_hco3_e,-0.6526,1,6.29%
dna_c,SK_dna_c,-0.007,10,0.68%
hpglu_c,SK_thglu_c,-0.2744,24,63.52%


In [23]:
universal = cobra.Model("universal_reactions")
for i in [i.id for i in model.metabolites.glc__D_e.reactions]:
    reaction = model.reactions.get_by_id(i)
    universal.add_reaction(reaction.copy())
    model.remove_reactions([reaction])


In [24]:
value = model.optimize().objective_value



In [25]:
print(value)

None


In [26]:
solution = gapfill(model, universal, demand_reactions=False)

Read LP format model from file /tmp/tmpuiq93n_r.lp
Reading time = 0.01 seconds
: 907 rows, 2104 columns, 8796 nonzeros
Read LP format model from file /tmp/tmp25xnroie.lp
Reading time = 0.01 seconds
: 907 rows, 2104 columns, 8796 nonzeros
Read LP format model from file /tmp/tmp9r582osl.lp
Reading time = 0.00 seconds
: 2 rows, 4 columns, 6 nonzeros
Read LP format model from file /tmp/tmpxedpufbb.lp
Reading time = 0.00 seconds
: 2 rows, 4 columns, 6 nonzeros


In [27]:
for reaction in solution[0]:
    print(reaction.id)

GLCtex
EX_glc__D_e


In [28]:
# solution is a list with the reactions needed to make the model work
type(solution)

list

In [29]:
for reaction in solution[0]:
    model.add_reaction(reaction.copy())

In [30]:
model.optimize().objective_value

1.4

In [38]:
model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux
fe2_e,EX_fe2_e,0.0014,0,0.00%
glc__D_e,EX_glc__D_e,8.262,6,87.72%
nh4_e,EX_nh4_e,14.44,0,0.00%
o2_e,EX_o2_e,4.065,0,0.00%
pi_e,EX_pi_e,1.072,0,0.00%
so4_e,EX_so4_e,0.3263,0,0.00%
mhpglu_c,SK_5mthglu_c,0.2744,25,12.14%
dna5mtc_c,SK_dna5mtc_c,0.007,11,0.14%

Metabolite,Reaction,Flux,C-Number,C-Flux
co2_e,EX_co2_e,-2.919,1,28.15%
glyclt_e,EX_glyclt_e,-0.07,2,1.35%
h2o_e,EX_h2o_e,-30.52,0,0.00%
h_e,EX_h_e,-13.31,0,0.00%
hco3_e,EX_hco3_e,-0.6526,1,6.29%
dna_c,SK_dna_c,-0.007,10,0.68%
hpglu_c,SK_thglu_c,-0.2744,24,63.52%


In [41]:
# first draft
def homology_gapfilling(model, templates, model_obj = None, template_obj = None):
    """
    Performs gap filling on a model using homology models as templates
    """
    model = load_query_model(model, obj = model_obj)
    model.solver = 'gurobi'
    templates, template_failures = load_template_models(templates, obj = template_obj)
    # This dict will store used models and reactions
    added_reactions = {}
    # Initial flux value
    value = model.optimize().objective_value
    if value == None:
        value = 0.0
    for template in templates:
        template.solver = 'gurobi'
        # result will store the reactions ids
        result = gapfill(model, template, demand_reactions=False)
        # dict
        reactions = [reaction.id for reaction in result[0]]
        # template.name does not work. Must find a solution to store the name of the used templates
        added_reactions[template.name] = reactions
        # Adding reactions to the model
        [model.add_reaction(reaction.copy()) for reaction in result[0]]
        # Flux will be evaluated here
        new_value = model.optimize().objective_value
        if new_value != None and new_value > value:
            value = new_value
        elif new_value == None:
            continue
        elif new_value != None and new_value == value:
            break
    return model, added_reactions

In [56]:
# model and templ default solver should be glpk in order to avoid deepcopy error
X = homology_gapfilling(model, templ)

Read LP format model from file /tmp/tmpzlrb3pc8.lp
Reading time = 0.00 seconds
: 907 rows, 2108 columns, 8802 nonzeros
Read LP format model from file /tmp/tmp0o9n402z.lp
Reading time = 0.02 seconds
: 907 rows, 2108 columns, 8802 nonzeros
Read LP format model from file /tmp/tmpuoqos0cz.lp
Reading time = 0.01 seconds
: 1934 rows, 5452 columns, 21416 nonzeros
Read LP format model from file /tmp/tmpi7_7okpf.lp
Reading time = 0.01 seconds
: 1934 rows, 5452 columns, 21416 nonzeros


In [59]:
X[0].optimize().objective_value

1.4