In [1]:
import cobra
import re
import gurobipy
from cobra.flux_analysis.gapfilling import GapFiller
from Gapfilling import *
import os
from cobra.io import read_sbml_model
import pandas as pd
import matplotlib as plt
from cobra import exceptions

In [2]:
x = read_sbml_model("BiGG_files/e_coli_core.xml")

Using license file /home/fco/gurobi.lic
Academic license - for non-commercial use only - expires 2021-06-01


In [3]:
r1 = x.reactions.PFK
r2 = x.reactions.ATPS4r
r3 = x.reactions.CO2t
r4 = x.reactions.EX_h_e
r5 = x.reactions.SUCCt2_2

1. the splitted lists must have the same length
2. the metabolites in each one must be the same (besides compartments)

In [4]:
def is_transport(reaction, compartments_list, all_compounds = False, ignore_H = False):
    """
    Takes a cobra model reaction as input and determines if it is a transport reaction.
    all_compounds = True -> both sides of the reaction must be the same (besides compartment)
    all_compounds = False -> at least one compound must be in both sides
    ignore_H = True -> 
    """
    # left part of the reaction
    r = list(str(x) for x in reaction.reactants)
    # right part
    p = list(str(x) for x in reaction.products)
    #if len(r) != len(p):
    #    return False
    c = compartments_list
    # removing terminations
    R = []
    for i in r:
        for e in c:
            i = i.replace(e, "") 
        R.append(i)
    P = []
    for i in p:
        for e in c:
            i = i.replace(e, "")
        P.append(i)
    # we sort the lists to avoid missing transport reactions where the sequence of the compounds is not maintained
    R = sorted(R)
    P = sorted(P)
    if all_compounds == False:
        if ignore_H == True:
            if "h" in R:
                R.remove("h")
            if "h" in P:
                P.remove("h")
        for i in R:
            if i in P:
                return True
            else:
                return False
    else:
        if R == P:
            return True
        else:
            return False

Some points to take into account:
 - this function doesn't assure that reagents and products are into different compartments; it just detects if the compounds are the same
 - transport reactions that involve unilateral metabolites aren't added
 - exchange reactions aren't added (there's another function for that purpose)
 - this function doesn't consider the stoichiometric coefficients 

In [5]:
# All compounds
for reaction in x.reactions:
    print(reaction, is_transport(reaction, ["_c", "_e", "_p"], all_compounds = True))

PFK: atp_c + f6p_c --> adp_c + fdp_c + h_c False
PFL: coa_c + pyr_c --> accoa_c + for_c False
PGI: g6p_c <=> f6p_c False
PGK: 3pg_c + atp_c <=> 13dpg_c + adp_c False
PGL: 6pgl_c + h2o_c --> 6pgc_c + h_c False
ACALD: acald_c + coa_c + nad_c <=> accoa_c + h_c + nadh_c False
AKGt2r: akg_e + h_e <=> akg_c + h_c True
PGM: 2pg_c <=> 3pg_c False
PIt2r: h_e + pi_e <=> h_c + pi_c True
ALCD2x: etoh_c + nad_c <=> acald_c + h_c + nadh_c False
ACALDt: acald_e <=> acald_c True
ACKr: ac_c + atp_c <=> actp_c + adp_c False
PPC: co2_c + h2o_c + pep_c --> h_c + oaa_c + pi_c False
ACONTa: cit_c <=> acon_C_c + h2o_c False
ACONTb: acon_C_c + h2o_c <=> icit_c False
ATPM: atp_c + h2o_c --> adp_c + h_c + pi_c False
PPCK: atp_c + oaa_c --> adp_c + co2_c + pep_c False
ACt2r: ac_e + h_e <=> ac_c + h_c True
PPS: atp_c + h2o_c + pyr_c --> amp_c + 2.0 h_c + pep_c + pi_c False
ADK1: amp_c + atp_c <=> 2.0 adp_c False
AKGDH: akg_c + coa_c + nad_c --> co2_c + nadh_c + succoa_c False
ATPS4r: adp_c + 4.0 h_e + pi_c <=> at

FN:
- GLNabc

In [6]:
# At least one compound
for reaction in x.reactions:
    print(reaction, is_transport(reaction, ["_c", "_e", "_p"], all_compounds = False))

PFK: atp_c + f6p_c --> adp_c + fdp_c + h_c False
PFL: coa_c + pyr_c --> accoa_c + for_c False
PGI: g6p_c <=> f6p_c False
PGK: 3pg_c + atp_c <=> 13dpg_c + adp_c False
PGL: 6pgl_c + h2o_c --> 6pgc_c + h_c False
ACALD: acald_c + coa_c + nad_c <=> accoa_c + h_c + nadh_c False
AKGt2r: akg_e + h_e <=> akg_c + h_c True
PGM: 2pg_c <=> 3pg_c False
PIt2r: h_e + pi_e <=> h_c + pi_c True
ALCD2x: etoh_c + nad_c <=> acald_c + h_c + nadh_c False
ACALDt: acald_e <=> acald_c True
ACKr: ac_c + atp_c <=> actp_c + adp_c False
PPC: co2_c + h2o_c + pep_c --> h_c + oaa_c + pi_c False
ACONTa: cit_c <=> acon_C_c + h2o_c False
ACONTb: acon_C_c + h2o_c <=> icit_c False
ATPM: atp_c + h2o_c --> adp_c + h_c + pi_c False
PPCK: atp_c + oaa_c --> adp_c + co2_c + pep_c False
ACt2r: ac_e + h_e <=> ac_c + h_c True
PPS: atp_c + h2o_c + pyr_c --> amp_c + 2.0 h_c + pep_c + pi_c False
ADK1: amp_c + atp_c <=> 2.0 adp_c False
AKGDH: akg_c + coa_c + nad_c --> co2_c + nadh_c + succoa_c False
ATPS4r: adp_c + 4.0 h_e + pi_c <=> at

FP:
- CYTBD
- NADH16

FN:
- GLNabc



In [7]:
# At least one compound; ignoring protons
for reaction in x.reactions:
    print(reaction, is_transport(reaction, ["_c", "_e", "_p"], all_compounds = False, ignore_H = True))

PFK: atp_c + f6p_c --> adp_c + fdp_c + h_c False
PFL: coa_c + pyr_c --> accoa_c + for_c False
PGI: g6p_c <=> f6p_c False
PGK: 3pg_c + atp_c <=> 13dpg_c + adp_c False
PGL: 6pgl_c + h2o_c --> 6pgc_c + h_c False
ACALD: acald_c + coa_c + nad_c <=> accoa_c + h_c + nadh_c False
AKGt2r: akg_e + h_e <=> akg_c + h_c True
PGM: 2pg_c <=> 3pg_c False
PIt2r: h_e + pi_e <=> h_c + pi_c True
ALCD2x: etoh_c + nad_c <=> acald_c + h_c + nadh_c False
ACALDt: acald_e <=> acald_c True
ACKr: ac_c + atp_c <=> actp_c + adp_c False
PPC: co2_c + h2o_c + pep_c --> h_c + oaa_c + pi_c False
ACONTa: cit_c <=> acon_C_c + h2o_c False
ACONTb: acon_C_c + h2o_c <=> icit_c False
ATPM: atp_c + h2o_c --> adp_c + h_c + pi_c False
PPCK: atp_c + oaa_c --> adp_c + co2_c + pep_c False
ACt2r: ac_e + h_e <=> ac_c + h_c True
PPS: atp_c + h2o_c + pyr_c --> amp_c + 2.0 h_c + pep_c + pi_c False
ADK1: amp_c + atp_c <=> 2.0 adp_c False
AKGDH: akg_c + coa_c + nad_c --> co2_c + nadh_c + succoa_c False
ATPS4r: adp_c + 4.0 h_e + pi_c <=> at

FN:
- GLNabc

In [8]:
x.reactions.CYTBD

0,1
Reaction identifier,CYTBD
Name,Cytochrome oxidase bd (ubiquinol-8: 2 protons)
Memory address,0x07f0a8ccd38b0
Stoichiometry,2.0 h_c + 0.5 o2_c + q8h2_c --> h2o_c + 2.0 h_e + q8_c  2.0 H+ + 0.5 O2 O2 + Ubiquinol-8 --> H2O H2O + 2.0 H+ + Ubiquinone-8
GPR,( b0978 and b0979 ) or ( b0733 and b0734 )
Lower bound,0.0
Upper bound,1000.0


In [9]:
x.reactions.NADH16

0,1
Reaction identifier,NADH16
Name,NADH dehydrogenase (ubiquinone-8 & 3 protons)
Memory address,0x07f0a8cbebbb0
Stoichiometry,4.0 h_c + nadh_c + q8_c --> 3.0 h_e + nad_c + q8h2_c  4.0 H+ + Nicotinamide adenine dinucleotide - reduced + Ubiquinone-8 --> 3.0 H+ + Nicotinamide adenine dinucleotide + Ubiquinol-8
GPR,b2276 and b2277 and b2278 and b2279 and b2280 and b2281 and b2282 and b2283 and b2284 and b2285 a...
Lower bound,0.0
Upper bound,1000.0


In [10]:
print(r1)
is_transport(r1, ['_e', '_c'])

PFK: atp_c + f6p_c --> adp_c + fdp_c + h_c


False

In [11]:
print(r2)
is_transport(r2, ['_e', '_c'])

ATPS4r: adp_c + 4.0 h_e + pi_c <=> atp_c + h2o_c + 3.0 h_c


False

In [12]:
print(r3)
is_transport(r3, ['_e', '_c'])

CO2t: co2_e <=> co2_c


True

In [13]:
print(r4)
is_transport(r4, ['_e', '_c'])

EX_h_e: h_e <=> 


False

In [14]:
print(r5)
is_transport(r5, ['_e', '_c'])

SUCCt2_2: 2.0 h_e + succ_e --> 2.0 h_c + succ_c


True

In [15]:
def add_transport(model, template):
    """
    Adds transport reactions from a template which metabolites are present in the model.
    """
    # PREPARATION
    # template compartments
    t_compartments = list(template.compartments.keys())
    t_Compartments = []
    for i in t_compartments:
        t_Compartments.append("_" + str(i))
    # getting compartments and metabolites from query model for further use
    m_compartments = list(model.compartments.keys())
    m_Compartments = []
    for i in m_compartments:
        m_Compartments.append("_" + str(i))
    m_metabolites = list(str(x) for x in model.metabolites)
    # removing suffixes from metabolites
    m_Metabolites = []
    for i in m_metabolites:
        for e in m_Compartments:
            i = i.replace(e, "")
        m_Metabolites.append(i)
    # sorting and removing duplicates
    m_Metabolites = list(dict.fromkeys(m_Metabolites))
    # REACTION ADDING
    for reaction in template.reactions:
        if is_transport(reaction, t_Compartments, all_compounds=False, ignore_H=True) and reaction not in model.reactions:
            # we will only use reactants as they'll be the same as products (apart from location)
            m = list(str(x) for x in reaction.reactants)
            M = []
            for i in m:
                for e in t_Compartments:
                    i = i.replace(e, "")
                M.append(i)
            if all(x in m_Metabolites for x in M):  
                model.add_reaction(reaction.copy())
    return model     

In [16]:
y = read_sbml_model("BiGG_files/iJN1463.xml")

In [17]:
x

0,1
Name,e_coli_core
Memory address,0x07f0aa4b6c1f0
Number of metabolites,72
Number of reactions,95
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol"


In [18]:
y

0,1
Name,iJN1463
Memory address,0x07f0a8cc888b0
Number of metabolites,2153
Number of reactions,2927
Number of groups,0
Objective expression,1.0*BIOMASS_KT2440_WT3 - 1.0*BIOMASS_KT2440_WT3_reverse_d86d5
Compartments,"cytosol, extracellular space, periplasm"


In [19]:
model = add_transport(x, y)

In [20]:
model

0,1
Name,e_coli_core
Memory address,0x07f0aa4b6c1f0
Number of metabolites,100
Number of reactions,139
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol, p"


In [21]:
# TESTING
x = read_sbml_model("BiGG_files/e_coli_core.xml")
y = read_sbml_model("BiGG_files/iEC1344_C.xml")

In [22]:
x

0,1
Name,e_coli_core
Memory address,0x07f0a6a494070
Number of metabolites,72
Number of reactions,95
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol"


In [23]:
x.optimize().objective_value

0.8739215069684302

In [24]:
x_gp, reactions = homology_gapfilling(x, [y])

Read LP format model from file /tmp/tmpwmt2u7zc.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros
Read LP format model from file /tmp/tmp23im9cy8.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros
Read LP format model from file /tmp/tmpc1as3nmw.lp
Reading time = 0.01 seconds
: 1934 rows, 5452 columns, 21416 nonzeros
Read LP format model from file /tmp/tmpjpfymt50.lp
Reading time = 0.01 seconds
: 1934 rows, 5452 columns, 21416 nonzeros


In [25]:
x_gp

0,1
Name,e_coli_core
Memory address,0x07f0a6a494070
Number of metabolites,72
Number of reactions,95
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol"


In [26]:
x_gp.optimize().objective_value

0.8739215069684302

In [27]:
x_t = add_transport(x, y)

In [28]:
x_t

0,1
Name,e_coli_core
Memory address,0x07f0a6a494070
Number of metabolites,103
Number of reactions,148
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol, p"


In [29]:
x_t.optimize().objective_value

1.3159193341448168

In [30]:
x_t_gp, reactions = homology_gapfilling(x_t, [y])

Read LP format model from file /tmp/tmpaoaz1jpq.lp
Reading time = 0.00 seconds
: 103 rows, 296 columns, 1016 nonzeros
Read LP format model from file /tmp/tmpnbqfeld4.lp
Reading time = 0.00 seconds
: 103 rows, 296 columns, 1016 nonzeros
Read LP format model from file /tmp/tmp3o9gcxis.lp
Reading time = 0.01 seconds
: 1934 rows, 5452 columns, 21416 nonzeros
Read LP format model from file /tmp/tmpa01o9kmb.lp
Reading time = 0.01 seconds
: 1934 rows, 5452 columns, 21416 nonzeros


In [31]:
x_t_gp

0,1
Name,e_coli_core
Memory address,0x07f0a6a494070
Number of metabolites,103
Number of reactions,148
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol, p"


In [56]:
x_t_gp.optimize().objective_value

1.3159193341448168