In [1]:
import cobra
import re
import gurobipy
from cobra.flux_analysis.gapfilling import GapFiller
from Gapfilling import *
import os
from cobra.io import read_sbml_model
import pandas as pd
import matplotlib as plt
from cobra import exceptions

In [2]:
x = read_sbml_model("BiGG_files/e_coli_core.xml")

Using license file /home/fco/gurobi.lic
Academic license - for non-commercial use only - expires 2021-03-12


In [3]:
for reaction in x.reactions:
    print(reaction)

PFK: atp_c + f6p_c --> adp_c + fdp_c + h_c
PFL: coa_c + pyr_c --> accoa_c + for_c
PGI: g6p_c <=> f6p_c
PGK: 3pg_c + atp_c <=> 13dpg_c + adp_c
PGL: 6pgl_c + h2o_c --> 6pgc_c + h_c
ACALD: acald_c + coa_c + nad_c <=> accoa_c + h_c + nadh_c
AKGt2r: akg_e + h_e <=> akg_c + h_c
PGM: 2pg_c <=> 3pg_c
PIt2r: h_e + pi_e <=> h_c + pi_c
ALCD2x: etoh_c + nad_c <=> acald_c + h_c + nadh_c
ACALDt: acald_e <=> acald_c
ACKr: ac_c + atp_c <=> actp_c + adp_c
PPC: co2_c + h2o_c + pep_c --> h_c + oaa_c + pi_c
ACONTa: cit_c <=> acon_C_c + h2o_c
ACONTb: acon_C_c + h2o_c <=> icit_c
ATPM: atp_c + h2o_c --> adp_c + h_c + pi_c
PPCK: atp_c + oaa_c --> adp_c + co2_c + pep_c
ACt2r: ac_e + h_e <=> ac_c + h_c
PPS: atp_c + h2o_c + pyr_c --> amp_c + 2.0 h_c + pep_c + pi_c
ADK1: amp_c + atp_c <=> 2.0 adp_c
AKGDH: akg_c + coa_c + nad_c --> co2_c + nadh_c + succoa_c
ATPS4r: adp_c + 4.0 h_e + pi_c <=> atp_c + h2o_c + 3.0 h_c
PTAr: accoa_c + pi_c <=> actp_c + coa_c
PYK: adp_c + h_c + pep_c --> atp_c + pyr_c
BIOMASS_Ecoli_cor

In [4]:
r1 = x.reactions.PFK
r2 = x.reactions.ATPS4r
r3 = x.reactions.CO2t
r4 = x.reactions.EX_h_e
r5 = x.reactions.SUCCt2_2

In [5]:
r5.metabolites

{<Metabolite h_e at 0x7f9ad21a7e20>: -2.0,
 <Metabolite succ_e at 0x7f9b5a1f41c0>: -1.0,
 <Metabolite h_c at 0x7f9ad21a7ca0>: 2.0,
 <Metabolite succ_c at 0x7f9b5a1f4fd0>: 1.0}

In [6]:
l = [str(x) for x in r5.metabolites]
l

['h_e', 'succ_e', 'h_c', 'succ_c']

In [7]:
length = len(l)
length

4

In [8]:
half = length//2
half

2

In [9]:
l1 = l[:half]
l2 = l[half:]
print(l1, l2)

['h_e', 'succ_e'] ['h_c', 'succ_c']


1. the splitted lists must have the same length
2. the metabolites in each one must be the same (besides compartments)

In [10]:
x.compartments

{'e': 'extracellular space', 'c': 'cytosol'}

In [11]:
c = list(x.compartments.keys())
c

['e', 'c']

In [12]:
cc = []
for element in c:
    cc.append("_" + str(element))

In [13]:
cc

['_e', '_c']

In [14]:
s = "x_e"
s.replace("ewe", "e")

'x_e'

In [15]:
print(s)
for i in cc:
    print(i)
    s = s.replace(i, "")
    print(s)

x_e
_e
x
_c
x


In [16]:
print(l1)
l11 = []
for i in l1:
    for c in cc:
        i = i.replace(c, "")
    l11.append(i)
print(l11)

['h_e', 'succ_e']
['h', 'succ']


In [17]:
L1 = ["b", "d", "a", "5", "19", "c"]
L1.sort()
L2 = ["a", "19", "c", "d", "5", "b"]
L2.sort()
assert L1 == L2

In [38]:
a = ["3", "a", "i"]
b = ["5", "k"]
assert a == b

AssertionError: 

In [72]:
def is_transport(reaction, compartments_list):
    """
    Takes a cobra model reaction as input and determines if it is a transport reaction.
    """
    l = [str(x) for x in reaction.metabolites]
    half = len(l)//2
    # left part of the reaction (if transport)
    l1 = l[:half]
    # right part
    l2 = l[half:]
    if len(l1) != len(l2):
        return False
    # compartments
    c = []
    for comp in compartments_list:
        c.append("_" + str(comp))
    # removing terminations
    L1 = []
    for i in l1:
        for e in c:
            i = i.replace(e, "") 
        L1.append(i)
    L2 = []
    for i in l2:
        for e in c:
            i = i.replace(e, "")
        L2.append(i)
    # we sort the lists to avoid missing transport reactions where the sequence of the compounds is not maintained
    L1 = sorted(L1)
    L2 = sorted(L2)
    if L1 == L2:
        return True
    else:
        return False

Some points to take into account:
 - this function doesn't assure that reagents and products are into different compartments; it just detects if the compounds are the same
 - transport reactions that involve unilateral metabolites aren't added
 - exchange reactions aren't added (there's another function for that purpose)
 - this function doesn't consider the stoichiometric coefficients 

In [73]:
print(r1)
is_transport(r1, ['e', 'c'])

PFK: atp_c + f6p_c --> adp_c + fdp_c + h_c


False

In [74]:
print(r2)
is_transport(r2, ['e', 'c'])

ATPS4r: adp_c + 4.0 h_e + pi_c <=> atp_c + h2o_c + 3.0 h_c


False

In [75]:
print(r3)
is_transport(r3, ['e', 'c'])

CO2t: co2_e <=> co2_c


True

In [76]:
print(r4)
is_transport(r4, ['e', 'c'])

EX_h_e: h_e <=> 


False

In [77]:
print(r5)
is_transport(r5, ['e', 'c'])

SUCCt2_2: 2.0 h_e + succ_e --> 2.0 h_c + succ_c


True