In [1]:
species = 'lumbricus_rubellus' # either 'lumbricus_rubellus' or 'lumbricus_terrestris'

In [2]:
import cobra
from functools import reduce
import os
import warnings
from collections import defaultdict, Counter
from cobra.core.dictlist import DictList
cobra.Configuration.solver = 'glpk'
warnings.simplefilter("ignore", category=UserWarning)

def get_biocyc_id(met: cobra.Metabolite):
    biocyc_id = met.annotation.get('biocyc')
    if biocyc_id:
        biocyc_id = biocyc_id[biocyc_id.index(':')+1:]
    return biocyc_id

# function to collect all compartment instances of the metabolites, but only for metabolites that have an annotation with a given key
def annotation_map(items: cobra.DictList, key: str) -> defaultdict:
    anno_map = defaultdict(list)
    for it in items:
        a = it.annotation.get(key)
        if a and isinstance(a, str):
            anno_map[a].append(it.id)
    return anno_map

def replace_metabolite(reac: cobra.Reaction, old: cobra.Metabolite, new: cobra.Metabolite):
    coeff = reac.metabolites[old]
    reac.add_metabolites({old: -coeff, new: coeff})

def move_metabolite_compartments(model, source, target):
    obsolete_mets = []
    for m in model.metabolites:
        if m.id.endswith('_' + source):
            new = m.id[:-len(source)] + target
            print(m.id, '->', new)
            if new not in model.metabolites:
                m.id = new
                m.compartment = target
            else:
                for r in m.reactions:
                    replace_metabolite(r, m, new)
                obsolete_mets.append(m)
    model.remove_metabolites(obsolete_mets)

def metabolite_id_for_compartment(met: cobra.Metabolite, compartment: str) -> str:
    if met.id.endswith('_'+met.compartment): # this is the regular case
        return(met.id[:-len(m.compartment)] + compartment)
    else:
        idx = met.id.rfind('_')
        if idx < 0:
            raise ValueError
        else:
            return(met.id[:idx+1] + compartment)

# this function is copied from cnapy.core so that a complete installation of cnapy won't be necessary
def replace_ids(dict_list: DictList, annotation_key: str, unambiguous_only: bool = False,
                unique_only: bool = True, candidates_separator: str ="") -> None:
    # can be used to replace IDs of reactions or metabolites with ones that are taken from the anotation
    # use model.compartments.keys() as compartment_ids if the metabolites have compartment suffixes
    # does not rename exchange reactions
    all_candidates = [None] * len(dict_list)
    if unique_only:
        candidates_count: Counter = Counter()
    for i, entry in enumerate(dict_list):
        candidates = entry.annotation.get(annotation_key, [])
        if not isinstance(candidates, list):
            if len(candidates_separator) > 0:
                candidates = candidates.split(candidates_separator)
            else:
                candidates = [candidates]
        if len(candidates) > 0 and hasattr(entry, 'compartment'):
            candidates = [c+"_"+entry.compartment for c in candidates]
        if unique_only:
            candidates_count.update(candidates)
        all_candidates[i] = candidates

    for entry, candidates in zip(dict_list, all_candidates):
        if unique_only:
            candidates = [c for c in candidates if candidates_count[c] == 1]
        if unambiguous_only and len(candidates) > 1:
            continue
        old_id = entry.id
        for new_id in candidates:
            if new_id == old_id:
                print(old_id, "remains unchanged")
                break
            try:
                entry.id = new_id
                entry.annotation['original ID'] = old_id
                break
            except ValueError: # new_id already in use
                pass
        if len(candidates) > 0 and new_id != old_id and old_id == entry.id:
            print("Could not find a new ID for", entry.id, "in", candidates)

In [3]:
icel = cobra.io.read_sbml_model("wormflux_iCEL1314/iCEL1314.xml.gz")

In [4]:
model = cobra.io.read_sbml_model(os.path.join(species, "MetaFlux/SBML_export_augmented.sbml"))
print(model.slim_optimize())
# prevent free ATP generation (check via ATPASE_RXN)
model.reactions.RXN_10862.lower_bound = 0
model.reactions.RXN_14218.lower_bound = 0
model.slim_optimize()

9.27869407879553


0.9653119288525618

In [5]:
# remove CPD-16459 (H2PO4) which is only involved in reaction H2PO4 = HPO4 + H+
# otherwise, this metabolite would interfere with the correct mapping of phospate 
# because it is just a different protonation state
model.remove_metabolites([model.metabolites.CPD_16459_c], destructive=True)

validate the current BiGG mapping (for uniqe IDs only) between model and iCEL1314

In [6]:
# update some annotations
icel.metabolites.get_by_id('g6p-A_c').annotation['bigg.metabolite'] = 'g6p_A'
icel.metabolites.get_by_id('g6p-B_c').annotation['bigg.metabolite'] = 'g6p_B'

In [7]:
icel_bigg_compounds = annotation_map(icel.metabolites, 'bigg.metabolite')

In [8]:
# add ETF (basically an equivalent of FAD) annotations
# in the earthworm model, FAD is only a metabolite in its synthesis pathway while ETF are the electron carriers
model.metabolites.ETF_Reduced_c.annotation['bigg.metabolite'] = 'etfrd'
model.metabolites.get_by_id('ETF_Reduced_c').annotation['bigg.metabolite'] = 'etfrd'
model.metabolites.ETF_Oxidized_c.annotation['bigg.metabolite'] = 'etfox'
model.metabolites.get_by_id('ETF_Oxidized_c').annotation['bigg.metabolite'] = 'etfox'

In [9]:
# this block sets up the mapping from base BioCyc ID (without compartment identifier) 
# to all iCEL1314 metabolites (with compartment variants) and shows charge and formula differences
metabolite_compartments = defaultdict(set)
biocyc2icel = defaultdict(set) # maping form BioCyc ID to iCEL1314 metabolites
for m in model.metabolites:
    biocyc_id = get_biocyc_id(m)
    bigg_id = m.annotation.get('bigg.metabolite', None)
    if bigg_id and isinstance(bigg_id, str):
        bigg_id = bigg_id.replace('__', '-') # BioCyc uses e.g. glc__D, iCEL1314 glc-D
        for icel_met in icel.metabolites.get_by_any(icel_bigg_compounds.get(bigg_id, [])):
            if m.charge != icel_met.charge:
                d = m.charge - icel_met.charge
                el = defaultdict(int)
                el.update(icel_met.elements)
                el['H'] += d
                if el['H'] == 0:
                    del el['H']
                if el == m.elements: # charge difference because of protonation difference
                    print(m.id, bigg_id, icel_met.id, d)
                else:
                    print(m.id, bigg_id, icel_met.id, d, m.formula, icel_met.formula)
            elif m.elements != icel_met.elements:
                print(m.id, bigg_id, icel_met.id, m.formula, icel_met.formula)
            metabolite_compartments[biocyc_id].add(icel_met.compartment)
            biocyc2icel[biocyc_id].add(icel_met)

3_HYDROXY_ANTHRANILATE_c 3hanthrn 3hanthrn_c -1
3_HYDROXY_ANTHRANILATE_c 3hanthrn 3hanthrn_e -1
3_SULFINOALANINE_c 3sala 3sala_m 1
3_SULFINOALANINE_c 3sala 3sala_c 1
5_PHOSPHORIBOSYL_5_AMINOIMIDAZOLE_c air air_c 1
ALA_tRNAs_c trnaala trnaala_c -3 C28H34N11O21P3Uj ROH
ALA_tRNAs_c trnaala trnaala_m -3 C28H34N11O21P3Uj ROH
ASP_tRNAs_c trnaasp trnaasp_c -3 C28H34N11O21P3Ug ROH
ASP_tRNAs_c trnaasp trnaasp_m -3 C28H34N11O21P3Ug ROH
CO3_c hco3 hco3_e -1
CO3_c hco3 hco3_c -1
CO3_c hco3 hco3_m -1
CPD_13612_c sphgn sphgn_ce_c C18H40NO2 C19H42NO2
CPD_13612_c sphgn sphgn_ce_e C18H40NO2 C19H42NO2
CPD_3743_c tchola tchola_c -1
CPD_3743_c tchola tchola_e -1
CPD_649_c sph1p sph1p_ce_c C18H39NO5P C19H41NO5P
CPD_649_c sph1p sph1p_ce_e C18H39NO5P C19H41NO5P
CPD_678_c seln seln_c 1
CPD_846_c h2s h2s_m -1
CPD_846_c h2s h2s_c -1
DEHYDROSPHINGANINE_c 3dsphgn 3dsphgn_ce_c C18H38NO2 C19H40NO2
ETF_Oxidized_c etfox etfox_m -1 C13H10N4O2Wn R
ETF_Reduced_c etfrd etfrd_m C13H13N4O2Wn RH2
FAD_c fad fad_e -1
FAD_c fa

In [10]:
biocyc2icel['Glucopyranose']

{<Metabolite glc-D-A_c at 0x2a3c2c0e880>,
 <Metabolite glc-D-B_c at 0x2a3c2b90670>,
 <Metabolite glc-D_c at 0x2a3b2240e50>,
 <Metabolite glc-D_e at 0x2a3c2b90940>}

In [11]:
biocyc2icel['Glucopyranose'] = set(icel.metabolites.get_by_any(['glc-D_e', 'glc-D_e'])) # do not map to the alpha/beta instances

In [12]:
# use KEGG mappings to find additional relations between earthworm model and iCEL1314
icel_kegg_compounds = annotation_map(icel.metabolites, 'kegg.compound')
for m in model.metabolites:
    biocyc_id = m.annotation.get('biocyc')
    if biocyc_id:
        biocyc_id = biocyc_id[biocyc_id.index(':')+1:]
    else:
        continue
    kegg_id = m.annotation.get('kegg.compound', None)
    if kegg_id and isinstance(kegg_id, str):
        for icel_met in icel.metabolites.get_by_any(icel_kegg_compounds.get(kegg_id, [])):
            if m.charge != icel_met.charge:
                d = m.charge - icel_met.charge
                el = defaultdict(int)
                el.update(icel_met.elements)
                el['H'] += d
                if el['H'] == 0:
                    del el['H']
                if el == m.elements: # charge difference because of protonation difference
                    print(m.id, kegg_id, icel_met.id, d)
                else:
                    print(m.id, kegg_id, icel_met.id, m.formula, icel_met.formula)
            elif m.elements != icel_met.elements:
                print(m.annotation.get('biocyc', m.id), kegg_id, icel_met.id, m.formula, icel_met.formula)
            biocyc2icel[biocyc_id].add(icel_met)
            metabolite_compartments[biocyc_id].add(icel_met.compartment)
            biocyc2icel[biocyc_id].add(icel_met)

LRU:1-PHOSPHATIDYL-1D-MYO-INOSITOL-34-BISPH C11554 pail34p_c C11H14O19RRP3 C46.1308H77.1556O19.0P3.0
LRU:1-PHOSPHATIDYL-1D-MYO-INOSITOL-35-BISPH C11556 pail35p_c C11H14O19RRP3 C46.1308H77.1556O19.0P3.0
LRU:1-PHOSPHATIDYL-1D-MYO-INOSITOL-5-PHOSPHA C11557 pail5p_c C11H15O16RRP2 C46.1308H78.1556O16.0P2.0
LRU:2-Lysophosphatidylcholines C04230 lpchol_e C9H19NO7RP C26.5654H50.5778N1.0O7.0P1.0
LRU:2-Lysophosphatidylcholines C04230 lpchol_c C9H19NO7RP C26.5654H50.5778N1.0O7.0P1.0
3_HYDROXY_ANTHRANILATE_c C00632 3hanthrn_c -1
3_HYDROXY_ANTHRANILATE_c C00632 3hanthrn_e -1
3_SULFINOALANINE_c C00606 3sala_m 1
3_SULFINOALANINE_c C00606 3sala_c 1
5_PHOSPHORIBOSYL_5_AMINOIMIDAZOLE_c C03373 air_c 1
LRU:ACYL-COA C00040 Rtotalcoa_c C22H31N7O17RP3S C39.5654H62.5778N7.0O17.0P3.0S1.0
ALA_tRNAs_c C01635 trnaala_c C28H34N11O21P3Uj ROH
ALA_tRNAs_c C01635 trnaala_m C28H34N11O21P3Uj ROH
AMMONIA_c C00014 nh4_e -1
AMMONIA_c C00014 nh4_c -1
AMMONIA_c C00014 nh4_m -1
ASP_tRNAs_c C01638 trnaasp_c C28H34N11O21P3Ug RO

In [13]:
len(biocyc2icel)

454

check that the glucose mappings are correct

In [14]:
print(biocyc2icel['Glucopyranose'])
print(biocyc2icel['ALPHA-GLUCOSE'])
print(biocyc2icel['GLC'])
biocyc2icel['GLC-6-P'] # should be beta-glucose

{<Metabolite glc-D_e at 0x2a3c2b90940>, <Metabolite glc-D_c at 0x2a3b2240e50>}
{<Metabolite glc-D-A_c at 0x2a3c2c0e880>}
{<Metabolite glc-D-B_c at 0x2a3c2b90670>}


{<Metabolite g6p-B_c at 0x2a3c2cba220>}

In [15]:
# metabolites only in mitochondrion according to iCEL1314
only_mitochondrial_metabolites = [k for k,v in biocyc2icel.items() if not any(m.compartment == 'c' for m in v) and any(m.compartment == 'm' for m in v)]
len(only_mitochondrial_metabolites)

33

In [16]:
only_mitochondrial_metabolites = set(only_mitochondrial_metabolites)
only_mitochondrial_metabolites.update(
    [m.id for m in model.metabolites if m.compartment == 'm' and metabolite_id_for_compartment(m, 'c') not in model.metabolites])

In [17]:
fully_mapped_reactions = [r for r in model.reactions if all(get_biocyc_id(m) in biocyc2icel for m in r.metabolites)]
len(fully_mapped_reactions)

568

In [18]:
mitochondrial = [r for r in fully_mapped_reactions if not r.boundary and all('m' in metabolite_compartments[get_biocyc_id(m)] for m in r.metabolites)]
len(mitochondrial) # acutally possibly mitochondrial only, a corresponding mitochondrial reaction should also be in iCEL1314

183

In [19]:
biocyc2icel_mit = dict()
biocyc2icel_cyt = dict()
for ml, mi in biocyc2icel.items():
    mi_mit = [m for m in mi if m.compartment == 'm']
    if len(mi_mit):
        biocyc2icel_mit[ml] = mi_mit[0]
    mi_cyt = [m for m in mi if m.compartment == 'c']
    if len(mi_cyt):
        biocyc2icel_cyt[ml] = mi_cyt[0]
print(len(biocyc2icel_mit), len(biocyc2icel_cyt))

204 421


In [20]:
# show differences of reactions that occur both in the cytosol and in the mitochondrion of iCEL1314
icel_cyto_and_mito_rxns = {r.id[2:] for r in icel.reactions if r.id.startswith('RM') and ('RC' + r.id[2:]) in icel.reactions}
for r in icel_cyto_and_mito_rxns:
    rc = icel.reactions.get_by_id('RC' + r)
    rm = icel.reactions.get_by_id('RM' + r)
    mc = {m.id[:-2] for m in rc.metabolites}
    mm = {m.id[:-2] for m in rm.metabolites}
    if mc != mm:
        print(rc.name, rm.name, mc - mm, mm - mc)

Beta oxidation of fa17c9 (peroxisomal) Beta oxidation of fa17c9 (mitochondrial) {'o2', 'h2o2'} {'fadh2', 'fad'}
Beta oxidation of fa17p0iso (peroxisomal) Beta oxidation of fa17p0iso (mitochondrial) {'o2', 'h2o2'} {'fadh2', 'fad'}
Beta oxidation of stcoa (peroxisomal) Beta oxidation of stcoa (mitochondrial) {'o2', 'h2o2'} {'fadh2', 'fad'}
Beta oxidation of intermediary fatty acid C14:2 (peroxisomal) Beta oxidation of intermediary fatty acid C14:2 (mitochondrial) {'o2', 'h2o2'} {'fadh2', 'fad'}
Beta oxidation of eicosapentanoic acid (peroxisomal) Beta oxidation of eicosapentanoic acid (mitochondrial) {'o2', 'h2o2'} {'fadh2', 'fad'}
Beta oxidation of fa19c11 (peroxisomal) Beta oxidation of fa19c11 (mitochondrial) {'o2', 'h2o2'} {'fadh2', 'fad'}
Beta oxidation of oleic acid (peroxisomal) Beta oxidation of oleic acid (mitochondrial) {'o2', 'h2o2'} {'fadh2', 'fad'}
Beta oxidation of linoleic acid (peroxisomal) Beta oxidation of linoleic acid (mitochondrial) {'o2', 'h2o2'} {'fadh2', 'fad'}
Be

differences occur only in cytosolic (or rather peroximsomal) and mitochondrial electron acceptors of beta-oxidation

In [21]:
# it is only checked that a reaction in iCEL1314 exists that has the same metabolites,
# except for protons, no stoichiometry check
# BioCyc metabolite and compartment information is ignored here
mito_rxns = set() # model reactions that have an instance in the mitochondrion according to iCEL1314 ('set' in case there are isozymes in iCEL1314)
only_mito_rxns = set()
model_rxn2icel_rxn = defaultdict(list)
for r in model.reactions:
    if not r.boundary:
        if '[CCO_MIT_LUM]' in r.id:
            mito_rxns.add(r.id)
            if r.id.replace('[CCO_MIT_LUM]', '[CCO_CYTOSOL]') not in model.reactions:
                only_mito_rxns.add(r.id)
        elif all(get_biocyc_id(m) in biocyc2icel_mit for m in r.metabolites):
            # collect all mitochondrial reactions in iCEL1314 which share at least one metabolite (excluding protons) with model reaction r
            rxns = [{ri for ri in biocyc2icel_mit[get_biocyc_id(m)].reactions if ri.id.startswith("RM")} for m in r.metabolites if m.name != 'H+']
            if len(rxns):
                rxns = reduce(set.intersection, rxns) # reduce to mitochondrial iCEL1314 reactions that share all metabolites
                for ri in rxns:
                    mi = list(ri.metabolites)
                    for ml in r.metabolites:
                        if ml.name != 'H+' and biocyc2icel_mit[get_biocyc_id(ml)] in mi:
                            mi.remove(biocyc2icel_mit[get_biocyc_id(ml)])
                    if len(mi) == 0 or (len(mi) == 1 and mi[0].name == 'H+'): # identified mitochondrial iCEL1314 reaction with the same metabolites (excluding protons)
                        mito_rxns.add(r.id)
                        model_rxn2icel_rxn[r].append(ri.id)
                        if ('RC' + ri.id[2:]) not in icel.reactions:
                            only_mito_rxns.add(r.id)
                            print("* ", end="")
                        print(r.id, r.build_reaction_string(), ri.id, ri.build_reaction_string())
print(len(mito_rxns), len(only_mito_rxns))                    

* BUTYRYL_COA_DEHYDROGENASE_RXN BUTYRYL_COA_c + ETF_Oxidized_c + PROTON_c <=> CROTONYL_COA_c + ETF_Reduced_c RM01175 btcoa_m + etfox_m <=> b2coa_m + etfrd_m
* BRANCHED_CHAINAMINOTRANSFERLEU_RXN 2_KETOGLUTARATE_c + LEU_c <=> 2K_4CH3_PENTANOATE_c + GLT_c RM01090 akg_m + leu-L_m <=> 4mop_m + glu-L_m
* BRANCHED_CHAINAMINOTRANSFERILEU_RXN 2_KETOGLUTARATE_c + ILE_c <=> 2_KETO_3_METHYL_VALERATE_c + GLT_c RM02199 akg_m + ile-L_m <=> 3mop_m + glu-L_m
FADSYN_RXN ATP_c + FMN_c + PROTON_c <=> FAD_c + PPI_c RM00161 atp_m + fmn_m + h_m --> fad_m + ppi_m
* 2_METHYLACYL_COA_DEHYDROGENASE_RXN 2_METHYL_BUTYRYL_COA_c + ETF_Oxidized_c + PROTON_c <=> CPD_1083_c + ETF_Reduced_c RM03172 2mbcoa_m + etfox_m <=> 2mb2coa_m + etfrd_m
4_HYDROXYGLUTAMATE_AMINOTRANSFERASE_RXN 2_KETOGLUTARATE_c + L_ERYTHRO_4_HYDROXY_GLUTAMATE_c <=> D_4_HYDROXY_2_KETO_GLUTARATE_c + GLT_c RM05052 akg_m + e4hglu_m --> 4h2oglt_m + glu-L_m
* RXN_9958_MALONATE_S_ALD/CO_A/NADP//ACETYL_COA/CARBON_DIOXIDE/NADPH.58. CO_A_c + MALONATE_S_ALD_c +

In [22]:
model.reactions.query(lambda x: '[CCO_CYTOSOL]' in x, attribute='id')

[<Reaction RXN66_1[CCO_CYTOSOL]_ETOH/HYDROGEN_PEROXIDE//ACETALD/WATER.51. at 0x2a3c51dd820>,
 <Reaction ACONITATEHYDR_RXN[CCO_CYTOSOL]_CIS_ACONITATE/WATER//THREO_DS_ISO_CITRATE.55. at 0x2a3c51ddeb0>,
 <Reaction ADENYL_KIN_RXN[CCO_CYTOSOL]_ATP/AMP//ADP.26. at 0x2a3c5390e20>,
 <Reaction FUMHYDR_RXN[CCO_CYTOSOL]_MAL//FUM/WATER.28. at 0x2a3c53cbe80>,
 <Reaction RXN_13451[CCO_CYTOSOL]_CPD_22003/ETF_Oxidized/PROTON//CPD_22006/ETF_Reduced.66. at 0x2a3c54cd430>,
 <Reaction ISOCITRATE_DEHYDROGENASE_NAD+_RXN[CCO_CYTOSOL]_THREO_DS_ISO_CITRATE/NAD//2_KETOGLUTARATE/CARBON_DIOXIDE/NADH.75. at 0x2a3c54d9340>,
 <Reaction RXN_13451[CCO_CYTOSOL]_CPD_22020/ETF_Oxidized/PROTON//CPD_22023/ETF_Reduced.66. at 0x2a3c55c5ca0>,
 <Reaction ACONITATEDEHYDR_RXN[CCO_CYTOSOL]_CIT//CIS_ACONITATE/WATER.38. at 0x2a3c55ad130>,
 <Reaction CITSYN_RXN[CCO_CYTOSOL]_ACETYL_COA/OXALACETIC_ACID/WATER//CIT/CO_A/PROTON.63. at 0x2a3c56a2f10>,
 <Reaction CARNITINE_O_PALMITOYLTRANSFERASE_RXN[CCO_CYTOSOL]_PALMITYL_COA/CARNITINE//CPD

In [23]:
model.reactions.query(lambda x: '[CCO_MIT_LUM]' in x, attribute='id')

[<Reaction ORNCARBAMTRANSFER_RXN[CCO_MIT_LUM]_L_ORNITHINE/CARBAMOYL_P//L_CITRULLINE/Pi/PROTON.61. at 0x2a3c51ea880>,
 <Reaction SUCCCOASYN_RXN[CCO_MIT_LUM]_SUC/CO_A/ATP//SUC_COA/ADP/Pi.42. at 0x2a3c5231460>,
 <Reaction RXN_13451[CCO_MIT_LUM]_CPD_22020/ETF_Oxidized/PROTON//CPD_22023/ETF_Reduced.66. at 0x2a3c533c340>,
 <Reaction MALATE_DEH_RXN[CCO_MIT_LUM]_MAL/NAD//OXALACETIC_ACID/NADH/PROTON.50. at 0x2a3c536cd00>,
 <Reaction ACONITATEHYDR_RXN[CCO_MIT_LUM]_CIS_ACONITATE/WATER//THREO_DS_ISO_CITRATE.55. at 0x2a3c5478c10>,
 <Reaction CARNITINE_O_PALMITOYLTRANSFERASE_RXN[CCO_MIT_LUM]_PALMITYL_COA/CARNITINE//CPD_419/CO_A.50. at 0x2a3c5556100>,
 <Reaction RXN_13451[CCO_MIT_LUM]_CPD1G_277/ETF_Oxidized/PROTON//CPD_14283/ETF_Reduced.66. at 0x2a3c5636e20>,
 <Reaction RXN_13451[CCO_MIT_LUM]_CPD_22011/ETF_Oxidized/PROTON//CPD_22014/ETF_Reduced.66. at 0x2a3c57089d0>,
 <Reaction SUCCINATE__COA_LIGASE_GDP_FORMING_RXN[CCO_MIT_LUM]_SUC/GTP/CO_A//SUC_COA/GDP/Pi.42. at 0x2a3c5751ac0>,
 <Reaction RXN_13037[

In [24]:
for r in model.reactions:
    if not r.boundary and all(get_biocyc_id(m) in biocyc2icel_cyt for m in r.metabolites):
        rxns = [{ri for ri in biocyc2icel_cyt[get_biocyc_id(m)].reactions if ri.id.startswith("RC")} for m in r.metabolites if m.name != 'H+']
        if len(rxns):
            rxns = reduce(set.intersection, rxns)
            for ri in rxns:
                mi = list(ri.metabolites)
                for ml in r.metabolites:
                    if ml.name != 'H+' and biocyc2icel_cyt[get_biocyc_id(ml)] in mi:
                        mi.remove(biocyc2icel_cyt[get_biocyc_id(ml)])
                if len(mi) == 0 or (len(mi) == 1 and mi[0].name == 'H+'):
                    model_rxn2icel_rxn[r].append(ri.id)

ATPSYN_RXN is not contained in mito_rxns

In [25]:
mapped_icel_mets = reduce(set.union, biocyc2icel.values())
cyt_mit_trans = []
for r in icel.reactions:
    if r.id.startswith("TCM"):
        if all(m in mapped_icel_mets for m in r.metabolites):
            cyt_mit_trans.append(r)
            #print(r.build_reaction_string())
len(cyt_mit_trans)

182

In [26]:
model.compartments

{'c': 'CCO-CYTOSOL',
 'm': 'CCO-MIT-LUM',
 'i': 'CCO-MIT-IM-SPC',
 'T': 'T',
 'CCO__45__MIT__45__IMEM': 'CCO-MIT-IMEM',
 'CCO__45__IN': 'CCO-IN',
 'CCO__45__OUT': 'CCO-OUT',
 'e': 'CCO-EXTRACELLULAR'}

In [27]:
additional_mito_rxns = [r for r in model.reactions.list_attr('id') if 'CCO_MIT_LUM' in r]

In [28]:
# TODO: are these only mitochondrial?
additional_mito_rxns += [
    '2OXOGLUTDECARB_RXN', 'RXN0_1147', 'RXN_7716', # 2-oxoglutarate dehydrogenase is split into three subreactions
    'SUCCINATE_DEHYDROGENASE_UBIQUINONE_RXN_SUC/UBIQUINONE_8//FUM/CPD_9956.31.', # missing because of incomplete quinone mapping
    'RXN_20154', 'RXN_20155' # subreactions of acetyl-CoA-acetyltransferase
]
# move beta-oxidation (reactions of pathway FAO-PWY) of regular fatty acids
# into mitochondrion (peroxisome, if present, is probably used for (very-)long chain FA)
for r in model.reactions:
    if 'FAO-PWY' in r.annotation.get('biocyc.pathway', []):
        additional_mito_rxns.append(r.id)
        only_mito_rxns.add(r.id)

iCEL1314 has lumped palmitate synthesis in cytsol (RCC0020) and beta-oxidation also in the cytosol. There are some FA-related reactions in the mitochondrion. ACP metabolites are not in this model.

In [29]:
# make decarboxylases irreversible (most already are)
if species == 'lumbricus_rubellus':
    for r in model.reactions.get_by_any([
        'ASPDECARBOX_RXN', 'PHOSPHASERDECARB_RXN', 'SULFINOALANINE_DECARBOXYLASE_RXN']):
        r.lower_bound = 0

In [30]:
# make some reactions from respiration irreversible in their physiological direction
for r in model.reactions.get_by_any([
    'RXN_15829_CPD_9956/Oxidized_CycA1_cytochromes/PROTON//UBIQUINONE_8/Reduced_CycA1_cytochromes/PROTON.90.', 'NADH_DEHYDROG_A_RXN_NADH/UBIQUINONE_8/PROTON//NAD/CPD_9956/PROTON.46.']):
    r.lower_bound = 0

In [31]:
# properly name exchange reactions (with BiGG ID where possible)
for r in model.boundary:
    m = list(r.metabolites.keys())[0]
    r.id = 'EX_' + m.annotation.get('bigg.metabolite', m.id) + '_' + m.compartment

Pyruvate is transported into the mitochondrion and PDC produces Acetyl-CoA there. A malate/aspartate shuttle transports NADH from the cytosol to the mitochondrion lumen.

In iCEL1314, respiration and ATP synthase exchange protons between mitochondrion and cytosol, not the intermembrane space. 46 ATP per glucose are being produced which is too high.

In [32]:
for m in model.metabolites:
    biocyc_id = get_biocyc_id(m)
    if biocyc_id in biocyc2icel and any(mi.compartment == 'm' for mi in biocyc2icel[biocyc_id]): # always copy to prevent reactions with mixed metabolites
        mm = m.copy()
        mm.id = metabolite_id_for_compartment(mm, 'm')
        mm.compartment = 'm'
        model.add_metabolites([mm])

In [33]:
# add additional mitochondrial metabolites where necessary
for m in reduce(set.union, (set(r.metabolites) for r in model.reactions.get_by_any(additional_mito_rxns))):
    mm_id = metabolite_id_for_compartment(m, 'm')
    if mm_id not in model.metabolites:
        mm = m.copy()
        mm.id = mm_id
        mm.compartment = 'm'
        model.add_metabolites([mm])

In [34]:
icel2model = dict() # mapping of metabolite ID stems
for m in model.metabolites:
    biocyc_id = get_biocyc_id(m)
    if biocyc_id in biocyc2icel:
        stem = {mi.id[:mi.id.rfind('_')] for mi in biocyc2icel[biocyc_id]}
        if len(stem) != 1:
            print(biocyc_id, stem)
        for s in stem:
            icel2model[s] = m.id[:m.id.rfind('_')]

In [35]:
for r in cyt_mit_trans:
    rl = cobra.Reaction(r.id, name=r.name)
    rl.bounds = r.bounds
    model.add_reactions([rl])
    rl.add_metabolites({icel2model[m.id[:m.id.rfind('_')]]+'_'+m.compartment: c for m,c in r.metabolites.items()})
    mb = rl.check_mass_balance()
    if len(mb):
        print(rl.id, mb, rl.build_reaction_string())

In [36]:
for r in model.reactions.get_by_any(list(mito_rxns.union(additional_mito_rxns))):
    if any(m.id.endswith("_CCO-OUT") for m in r.metabolites):
        print(r.id, r.build_reaction_string())
        continue
    rm = r.copy() # for now make copies in all cases
    rm.id = rm.id + '_m'
    model.add_reactions([rm])
    for m in list(rm.metabolites.keys()): # make a list copy because rm.metabolites changes in this loop
        replace_metabolite(rm, m, model.metabolites.get_by_id(metabolite_id_for_compartment(m, 'm')))

In [37]:
model.reactions.EX_co2_c.bounds = (-1, 1000)

In [38]:
replace_ids(model.metabolites, 'bigg.metabolite', unique_only=True)

ACP_c remains unchanged
ACP_m remains unchanged


ATP hydrolysis in the cytosol produces one proton while ATP synthesis in the mitochondrion takes up one proton. To ensure this is equibrilated the phospate transport between these compartments is modified (cf. Edmund RS Kunji, Alan J Robinson, Coupling of proton and substrate translocation in the transport cycle of mitochondrial carriers, Current Opinion in Structural Biology, 2010).

In BiGG the typical eukaryotic mitochondrial phosphate transport is also a proton symport.

In [149]:
model.reactions.TCM1292.id = 'PIt2m'
model.reactions.PIt2m.name = "Phosphate transporter mitochondrial"
model.reactions.PIt2m.build_reaction_from_string("pi_c + h_c <=> pi_m + h_m")
model.reactions.PIt2m.annotation.clear()

In [150]:
model.remove_reactions(only_mito_rxns, remove_orphans=True)

In [151]:
# there is only mitochondrial NAD-dependent pyruvate dehydrogenase (RM00209) in the iCEL1314 model
model.reactions.get_by_id('PYRUVATE_DEHYDROGENASE_NADP+_RXN').id = 'PDHm'
model.reactions.PDHm.name = 'Pyruvate dehydrogenase'
model.reactions.PDHm.build_reaction_from_string("coa_m + nad_m + pyr_m --> accoa_m + co2_m + nadh_m")
model.reactions.PDHm.annotation.clear()
model.reactions.PDHm.annotation['kegg.reaction'] = 'R00209'

In [152]:
# in BiGG the typical eukaryotic pyruvate mitochondrial transport is via proton symport.
# without the proton symport the model uses weird ways to equilibrate the proton imbalance.
model.reactions.TCM1280.id = 'PYRt2m' 
model.reactions.PYRt2m.name = "Pyruvate mitochondrial transport via proton symport"
model.reactions.PYRt2m.build_reaction_from_string("pyr_c + h_c <=> pyr_m + h_m")
model.reactions.PYRt2m.annotation.clear()

In [153]:
# remove obsolete metabolites of obsolete compartments and associated reactions
obsolete = []
for m in model.metabolites:
    if m.compartment in ['T', 'CCO__45__IN', 'CCO__45__OUT']:
        obsolete.append(m)
model.remove_metabolites(obsolete, destructive=True)

In [154]:
# reactions without metabolites
for r in model.reactions:
    if len(r.metabolites) == 0:
        print(r)

RXN_14812_FRUCTOSE_6P//FRUCTOSE_6P.25.:  <=> 


In [155]:
model.remove_reactions(['RXN_14812_FRUCTOSE_6P//FRUCTOSE_6P.25.'])

In [156]:
for r in model.reactions:
    if not r.boundary and all('bigg.metabolite' in m.annotation for m in r.metabolites) and all(m.id in icel.metabolites for m in r.metabolites if m.name != 'H+'):
        rxns = reduce(set.intersection, (set(icel.metabolites.get_by_id(m.id).reactions) for m in r.metabolites if m.name != 'H+'))
        model_mets = {m.id for m in r.metabolites if m.name != 'H+'}
        candidates = []
        for ri in rxns:
            if model_mets == {m.id for m in ri.metabolites if m.name != 'H+'}:
                candidates.append(ri.id)
        if len(candidates) != 1:
            print(r, candidates)
        r.annotation['iCEL1314.reaction'] = candidates
        if r.id == 'BUTYRYL_COA_DEHYDROGENASE_RXN_m':
            break

RXN_14187: dcdp_c + h2o_c --> dcmp_c + h_c + pi_c []
GLYOXYLATE_REDUCTASE_NADP+_RXN: glx_c + h_c + nadph_c --> glyclt_c + nadp_c []
RXN_18356: 3sala_c + h2o_c + nad_c --> Lcyst_c + h_c + nadh_c []
DEOXYGUANOSINE_KINASE_RXN: atp_c + dgsn_c --> adp_c + dgmp_c + h_c []
NUCLEOSIDE_TRIPHOSPHATASE_RXN_ITP/WATER//IDP/Pi/PROTON.25.: h2o_c + itp_c --> h_c + idp_c + pi_c []
OXALODECARB_RXN: h_c + oaa_c --> co2_c + pyr_c []
ATPSYN_RXN[CCO_EXTRACELLULAR_CCO_CYTOSOL]_ATP/WATER/PROTON//ADP/Pi/PROTON.63.: adp_c + 4.0 h_e + pi_c --> atp_c + h2o_c + 3.0 h_c ['RCC0005', 'TCE1132']
RXN_7904_STEARIC_ACID/CO_A/ATP//STEAROYL_COA/AMP/PPI.44.: atp_c + coa_c + ocdca_c --> amp_c + ppi_c + stcoa_c []
DCTP_PYROPHOSPHATASE_RXN: dctp_c + h2o_c --> dcmp_c + h_c + ppi_c []
RXN_14217: dgtp_c + h2o_c --> dgdp_c + h_c + pi_c []
RXN_14214: datp_c + h2o_c --> dadp_c + h_c + pi_c []
GLYC3PDEHYDROGBIOSYN_RXN_GLYCEROL_3P/NADP//DIHYDROXY_ACETONE_PHOSPHATE/NADPH/PROTON.59.: dhap_c + h_c + nadph_c --> glyc3p_c + nadp_c []
2.7.4

In [157]:
# remove ATP synthase in outer membrane
model.remove_reactions(['ATPSYN_RXN[CCO_EXTRACELLULAR_CCO_CYTOSOL]_ATP/WATER/PROTON//ADP/Pi/PROTON.63.'])

In [158]:
# prevent unrealistic solutions that lead to a P/O ratio of 3
model.reactions.get_by_id('FORMYLTETRAHYDROFOLATE_DEHYDROGENASE_RXN').lower_bound = 0

Some metabolites like ubiquinone are more specific in LRU compared to iCEL1314 and therefore not mapped, for these an 'iCEL1314.metabolite' annotation could be introduced.

PDH is not a complex in the earthworm models because BioCyc models the three steps of the PDH complex separately as distinct reactions.

In [159]:
for r in model.reactions:
    if not r.boundary:
        mb = r.check_mass_balance()
        if len(mb):
            print(r.id, mb, r.build_reaction_string())

BIOMASS_EQUATION {'charge': 0.8254660530000137, 'C': -6.6968572335000545, 'H': -11.093942165000044, 'N': -2.1429917300000056, 'O': -5.013521882000248, 'P': -0.6358068730000141, 'S': -0.078938609} 1e-05 CPD1G_277_c + 1e-05 CPD_22020_c + 0.070647635 ala__L_c + 0.011312218 amet_c + 0.040727146 arg__L_c + 0.033155132 asn__L_c + 0.033155132 asp__L_c + 20.03057 atp_c + 0.011312218 coa_c + 0.026018102 ctp_c + 0.012571015 cys__L_c + 0.010855213 datp_c + 0.01120361 dctp_c + 0.01120361 dgtp_c + 0.010855213 dttp_c + 0.011312218 fad_c + 0.036169235 gln__L_c + 0.036169235 glu__L_c + 0.084174335 gly_c + 0.03500503 gtp_c + 20.0 h2o_c + 1e-05 hdca_c + 0.013085618 his__L_c + 0.039918486 ile__L_c + 0.061972897 leu__L_c + 0.047122926 lys__L_c + 0.021098722 met__L_c + 0.011312218 nad_c + 0.011312218 nadp_c + 1e-05 ocdca_c + 0.025509601 phe__L_c + 0.030435089 pro__L_c + 0.011312218 pydx5p_c + 0.011312218 ribflv_c + 0.029626427 ser__L_c + 0.011312218 so4_c + 0.011312218 thmpp_c + 0.034845974 thr__L_c + 0.00

In [160]:
model.remove_reactions([m for m in model.metabolites if len(m.reactions) == 0])

In [161]:
len(model.reactions)

1819

In [162]:
for r in model.reactions:
    if len(r.metabolites) == 2:
        coeff = list(r.metabolites.values())
        if coeff[0] == -coeff[1] and r.lower_bound == 0 or r.upper_bound == 0:
            print(r.id, r.build_reaction_string())

RXN2PN3_45 CPD_26676_c --> lgt__S_c
RXN_15124 2amac_c --> CPD_16015_c
RXN_12752 CPD0_2472_c --> CPD_653_c
D_PPENTOMUT_RXN 2dr1p_c --> 2dr5p_c
MYO_INOSITOL_1_PHOSPHATE_SYNTHASE_RXN D_glucopyranose_6_phosphate_c --> mi3p__D_c
RXN_14501 CPD_15373_c --> CPD_12601_c
UDP_N_ACETYLGLUCOSAMINE_4_EPIMERASE_RXN uacgam_c --> CPD_14795_c
ALDOSE1EPIM_RXN gal_bD_c --> gal_c
MYO_INOSITOL_1_PHOSPHATE_SYNTHASE_RXN_ALPHA_GLC_6_P//1_L_MYO_INOSITOL_1_P.36. g6p_A_c --> mi3p__D_c
PHOSMANMUT_RXN man6p_c --> man1p_c
RXN_13142 CPD_14133_c --> CPD0_2474_c
RXN_15121 CPD_15056_c --> CPD_16013_c
RXN66_579 GLC_6_P_c --> mi3p__D_c
MALEYLACETOACETATE_ISOMERASE_RXN 4mlacac_c --> 4fumacac_c
PPENTOMUT_RXN r1p_c --> RIBOSE_5P_c
FCLEPIM_RXN gdpddman_c --> gdpofuc_c
TCM0210 occoa_c --> occoa_m
TCM0170 4mop_c --> 4mop_m
TCM0177 CPD_466_m --> CPD_466_c
TCM0169 fe2_c --> fe2_m
TCM1283 crn_m --> crn_c
TCM1288 glyc3p_c --> glyc3p_m
TCM5058 2obut_m --> 2obut_c
TCM1073 2oxoadp_m --> 2oxoadp_c
TCM0141 gln__L_c --> gln__L_m
TCM5060 

In [163]:
# make some isomerases reversible (which they actually are in MetaCyc)
model.reactions.METHYLMALONYL_COA_MUT_RXN_m.lower_bound = -3000
model.reactions.METHYLMALONYL_COA_EPIM_RXN_m.lower_bound = -3000

In [164]:
if species == 'lumbricus_rubellus':
    cobra.io.write_sbml_model(model, os.path.join(species, "metabolic_model", "LRU1.sbml"))
elif species == 'lumbricus_terrestris':
    cobra.io.write_sbml_model(model, os.path.join(species, "metabolic_model", "LRT1.sbml"))
else:
    raise ValueError("Unsupported species.")