In [1]:
import cobra
import re
import gurobipy
from cobra.flux_analysis.gapfilling import GapFiller
from Gapfilling import *
import os
from cobra.io import read_sbml_model
import pandas as pd
import matplotlib as plt
from cobra import exceptions

In [2]:
x = read_sbml_model("BiGG_files/e_coli_core.xml")


--------------------------------------------
--------------------------------------------

Using license file /home/fco/gurobi.lic
Academic license - for non-commercial use only - expires 2021-03-12


In [3]:
for reaction in x.reactions:
    print(reaction)

PFK: atp_c + f6p_c --> adp_c + fdp_c + h_c
PFL: coa_c + pyr_c --> accoa_c + for_c
PGI: g6p_c <=> f6p_c
PGK: 3pg_c + atp_c <=> 13dpg_c + adp_c
PGL: 6pgl_c + h2o_c --> 6pgc_c + h_c
ACALD: acald_c + coa_c + nad_c <=> accoa_c + h_c + nadh_c
AKGt2r: akg_e + h_e <=> akg_c + h_c
PGM: 2pg_c <=> 3pg_c
PIt2r: h_e + pi_e <=> h_c + pi_c
ALCD2x: etoh_c + nad_c <=> acald_c + h_c + nadh_c
ACALDt: acald_e <=> acald_c
ACKr: ac_c + atp_c <=> actp_c + adp_c
PPC: co2_c + h2o_c + pep_c --> h_c + oaa_c + pi_c
ACONTa: cit_c <=> acon_C_c + h2o_c
ACONTb: acon_C_c + h2o_c <=> icit_c
ATPM: atp_c + h2o_c --> adp_c + h_c + pi_c
PPCK: atp_c + oaa_c --> adp_c + co2_c + pep_c
ACt2r: ac_e + h_e <=> ac_c + h_c
PPS: atp_c + h2o_c + pyr_c --> amp_c + 2.0 h_c + pep_c + pi_c
ADK1: amp_c + atp_c <=> 2.0 adp_c
AKGDH: akg_c + coa_c + nad_c --> co2_c + nadh_c + succoa_c
ATPS4r: adp_c + 4.0 h_e + pi_c <=> atp_c + h2o_c + 3.0 h_c
PTAr: accoa_c + pi_c <=> actp_c + coa_c
PYK: adp_c + h_c + pep_c --> atp_c + pyr_c
BIOMASS_Ecoli_cor

In [9]:
r1 = x.reactions.PFK
r2 = x.reactions.ATPS4r
r3 = x.reactions.CO2t
r4 = x.reactions.EX_h_e
r5 = x.reactions.SUCCt2_2

In [10]:
r5.metabolites

{<Metabolite h_e at 0x7f5d0d290d30>: -2.0,
 <Metabolite succ_e at 0x7f5d952392e0>: -1.0,
 <Metabolite h_c at 0x7f5d0d290bb0>: 2.0,
 <Metabolite succ_c at 0x7f5d952393d0>: 1.0}

In [11]:
l = [str(x) for x in r5.metabolites]
l

['h_e', 'succ_e', 'h_c', 'succ_c']

In [12]:
length = len(l)
length

4

In [8]:
half = length//2
half

2

In [10]:
l1 = l[:half]
l2 = l[half:]
print(l1, l2)

['h_e', 'succ_e'] ['h_c', 'succ_c']


1. the splitted lists must have the same length
2. the metabolites in each one must be the same (besides compartments)

In [12]:
x.compartments

{'e': 'extracellular space', 'c': 'cytosol'}

In [13]:
c = list(x.compartments.keys())
c

['e', 'c']

In [14]:
cc = []
for element in c:
    cc.append("_" + str(element))

In [15]:
cc

['_e', '_c']

In [16]:
s = "x_e"
s.replace("ewe", "e")

'x_e'

In [17]:
print(s)
for i in cc:
    print(i)
    s = s.replace(i, "")
    print(s)

x_e
_e
x
_c
x


In [18]:
print(l1)
l11 = []
for i in l1:
    for c in cc:
        i = i.replace(c, "")
    l11.append(i)
print(l11)

['h_e', 'succ_e']
['h', 'succ']


In [19]:
L1 = ["b", "d", "a", "5", "19", "c"]
L1.sort()
L2 = ["a", "19", "c", "d", "5", "b"]
L2.sort()
assert L1 == L2

In [20]:
a = ["3", "a", "i"]
b = ["5", "k"]
assert a == b

AssertionError: 

In [17]:
def is_transport(reaction, compartments_list):
    """
    Takes a cobra model reaction as input and determines if it is a transport reaction.
    """
    # left part of the reaction
    r = list(str(x) for x in reaction.reactants)
    # right part
    p = list(str(x) for x in reaction.products)
    if len(r) != len(p):
        return False
    c = compartments_list
    # removing terminations
    R = []
    for i in r:
        for e in c:
            i = i.replace(e, "") 
        R.append(i)
    P = []
    for i in p:
        for e in c:
            i = i.replace(e, "")
        P.append(i)
    # we sort the lists to avoid missing transport reactions where the sequence of the compounds is not maintained
    R = sorted(R)
    P = sorted(P)
    if R == P:
        return True
    else:
        return False

Some points to take into account:
 - this function doesn't assure that reagents and products are into different compartments; it just detects if the compounds are the same
 - transport reactions that involve unilateral metabolites aren't added
 - exchange reactions aren't added (there's another function for that purpose)
 - this function doesn't consider the stoichiometric coefficients 

In [18]:
print(r1)
is_transport(r1, ['_e', '_c'])

PFK: atp_c + f6p_c --> adp_c + fdp_c + h_c


False

In [19]:
print(r2)
is_transport(r2, ['_e', '_c'])

ATPS4r: adp_c + 4.0 h_e + pi_c <=> atp_c + h2o_c + 3.0 h_c


False

In [20]:
print(r3)
is_transport(r3, ['_e', '_c'])

CO2t: co2_e <=> co2_c


True

In [21]:
print(r4)
is_transport(r4, ['_e', '_c'])

EX_h_e: h_e <=> 


False

In [22]:
print(r5)
is_transport(r5, ['_e', '_c'])

SUCCt2_2: 2.0 h_e + succ_e --> 2.0 h_c + succ_c


True

In [13]:
def add_transport(model, template):
    """
    Adds transport reactions from a template which metabolites are present in the model.
    """
    # PREPARATION
    # template compartments
    t_compartments = list(template.compartments.keys())
    t_Compartments = []
    for i in t_compartments:
        t_Compartments.append("_" + str(i))
    # getting compartments and metabolites from query model for further use
    m_compartments = list(model.compartments.keys())
    m_Compartments = []
    for i in m_compartments:
        m_Compartments.append("_" + str(i))
    m_metabolites = list(str(x) for x in model.metabolites)
    # removing suffixes from metabolites
    m_Metabolites = []
    for i in m_metabolites:
        for e in m_Compartments:
            i = i.replace(e, "")
        m_Metabolites.append(i)
    # sorting and removing duplicates
    m_Metabolites = list(dict.fromkeys(m_Metabolites))
    # REACTION ADDING
    for reaction in template.reactions:
        if is_transport(reaction, t_Compartments) and reaction not in model.reactions:
            # we will only use reactants as they'll be the same as products (apart from location)
            m = list(str(x) for x in reaction.reactants)
            M = []
            for i in m:
                for e in t_Compartments:
                    i = i.replace(e, "")
                M.append(i)
            if all(x in m_Metabolites for x in M):  
                model.add_reaction(reaction.copy())
    return model     

In [57]:
y = read_sbml_model("BiGG_files/iJN1463.xml")

In [69]:
x

0,1
Name,e_coli_core
Memory address,0x07fcfa9636670
Number of metabolites,72
Number of reactions,95
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol"


In [70]:
y

0,1
Name,iJN1463
Memory address,0x07fcf882b52e0
Number of metabolites,2153
Number of reactions,2927
Number of groups,0
Objective expression,1.0*BIOMASS_KT2440_WT3 - 1.0*BIOMASS_KT2440_WT3_reverse_d86d5
Compartments,"cytosol, extracellular space, periplasm"


In [71]:
add_transport(x, y)

In [72]:
x

0,1
Name,e_coli_core
Memory address,0x07fcfa9636670
Number of metabolites,100
Number of reactions,140
Number of groups,0
Objective expression,1.0*BIOMASS_Ecoli_core_w_GAM - 1.0*BIOMASS_Ecoli_core_w_GAM_reverse_712e5
Compartments,"extracellular space, cytosol, p"
