# Notebook C: Find GSM flux bounds by constraining the GSM with 13C-MFA
This Jupyter notebook focuses on using Flux Variability Analysis (FVA) to find the bounds of each reaction in a Genome Scale Model (GSM) 
when 13C-MFA reactions are used for GSM boundries.

### Objective
The primary objective of this analysis is to use the GSM to extrapolate the rates of reactions outside the scope of 13C-MFA. We explore the genome scale metabolic flux profiles when Y. lipolytica consumes glucose, glycerol, acetate, and oleic acid. 

### Important Functions
- `load_and_preprocess_data`: Loads genome-scale models and 13C-MFA data, and performs necessary preprocessing.
- `perform_CFBA`: Executes Constrained Flux Balance Analysis and prepares the data for comparison.
- `compare_flux_analysis`: Compares CFBA results with 13C-MFA data, highlighting key similarities and differences.
- `analyze_carbon_sources`: Analyzes the impact of different carbon sources on metabolic fluxes within the CFBA model.
- `integrate_strain_design`: Integrates StrainDesign predictions to enhance the understanding of the metabolic model.


### Load imports

In [1]:
import cobra
import straindesign as sd
import pandas as pd
import numpy as np
import sys


source_dir = '../src'
sys.path.append(source_dir)
from generate_gsm_constraints_from_MFA import generate_gsm_constraints_from_MFA
from add_gsm_bounds_from_mfa import add_gsm_bounds_from_mfa
from get_gsm_df_from_constraints import get_gsm_df_from_constraints

from get_min_max_flux_expression_from_ids import get_min_max_flux_expression_from_ids
from add_flux_column_to_13c_flux_df import add_flux_column_to_13c_flux_df
from add_fva_columns_to_13c_flux_df import add_fva_columns_to_13c_flux_df
from flux_prediction_scatterplot import flux_prediction_scatterplot
from make_rxn_constraint_string import make_rxn_constraint_string
from generate_flux_map import generate_flux_map

### Load the genome scale model

In [2]:
model = cobra.io.json.load_json_model('../genome_scale_models/iYLI647_corr_3.json')

### Rename some reactions to remove parentheses
This is because parentheses in reaction ids cause problems with StrainDesign

In [3]:
model.reactions.get_by_id('EX_glc(e)').id = 'EX_glc_e'
model.reactions.get_by_id('EX_glyc(e)').id = 'EX_glyc_e'
model.reactions.get_by_id('EX_ocdcea(e)').id = 'EX_ocdcea_e'
model.reactions.get_by_id('EX_h2o(e)').id = 'EX_h2o_e'
model.reactions.get_by_id('EX_h(e)').id = 'EX_h_e'
model.reactions.get_by_id('EX_nh4(e)').id = 'EX_nh4_e'
model.reactions.get_by_id('EX_o2(e)').id = 'EX_o2_e'
model.reactions.get_by_id('EX_pi(e)').id = 'EX_pi_e'
model.reactions.get_by_id('EX_so4(e)').id = 'EX_so4_e'

# print an example reaction
model.reactions.get_by_id('EX_glc_e')

0,1
Reaction identifier,EX_glc_e
Name,D Glucose exchange
Memory address,0x2cd5ebd60
Stoichiometry,glc_D[e] <=>  D_Glucose <=>
GPR,YALI0D01111g or YALI0D18876g or YALI0D00132g or YALI0B01342g or YALI0E23287g or YALI0B00396g or...
Lower bound,-10.0
Upper bound,1000.0


### Load 13C-MFA data

In [4]:
central_rxn_df = pd.read_excel('../data/13c_mfa/Full MFA data 01192024.xlsx', sheet_name='Full MFA Data 01192024')

# calculate the number of reactions in the 13C MFA that are mapped to the GSM
mapped_rxn_df = central_rxn_df.dropna(subset = ["reaction_ids"])

print(f'There are {len(mapped_rxn_df)} reactions in the 13C MFA that are mapped to the GSM')

central_rxn_df.head()

There are 46 reactions in the 13C MFA that are mapped to the GSM


Unnamed: 0,Pathway,ID,Equation,reaction_ids,Location on map,Glucose MFA Flux,Glucose MFA LB,Glucose MFA UB,Glycerol MFA Flux,Glycerol MFA LB,Glycerol MFA UB,Oleic Acid MFA Flux,Oleic Acid MFA LB,Oleic Acid MFA UB
0,uptake,gluc uptake,Glucose + ATP -> G6P,reverse_EX_glc_e,"(-1180, 1175)",100.0,100.0,100.0,,,,,,
1,uptake,glyc uptake,Glycerol -> GLYC,reverse_GLYCt,"(-1376, 417)",,,,100.0,100.0,100.0,,,
2,uptake,R3.2,GLYC + ATP -> DHAP + 1.5*ATP,,,,,,100.0,100.0,100.0,,,
3,uptake,OA uptake,OA + ATP -> 9*ACCOAcyt + 8*NADH + 12*ATP,OCDCEAt,"(-55, 111)",,,,,,,100.0,100.0,100.0
4,glycolysis/gluconeogensis,R4 net,G6P <-> F6P,PGI,"(-1180, 960)",18.0175,9.792,23.5403,-41.3947,-41.5761,-36.5906,-305.8491,-319.0201,-256.517


## Glucose

### Generate MFA-based constraint string for glucose

In [5]:
mfa_reactions_to_use_glucose = [    
    # glycolysis
    'Glucose + ATP -> G6P',
    'G6P <-> F6P',
    'F6P + ATP <-> FBP',
    'FBP <-> DHAP + GAP',
    'DHAP <-> GAP',
    'GAP <-> G3P + ATP + NADH',
    'G3P <-> PEP',

    # pentose phosphate pathway
    'G6P -> PG6 + NADPH',
    'PG6 -> Ru5P + CO2 + NADPH',
    'Ru5P <-> R5P',
    'Ru5P <-> X5P',
    'X5P <-> GAP + TKC2',
    'E4P + TKC2 <-> F6P',
    'R5P + TKC2 <-> S7P',
    'GAP + TAC3 <-> F6P',
    'S7P <-> E4P + TAC3',

    # TCA cycle
    'PYRmit -> ACCOAmit + CO2 + NADH',
    'ACCOAmit + OAAmit -> CITmit',
    'CITmit <-> ICTmit',
    'ICTmit -> AKG + CO2 + NADH',
    'ICTmit -> AKG + CO2 + NADPH',
    'AKG -> 0.5*SUCmit + 0.5*SUCmit + CO2 + ATP + NADH',
    'SUCmit <-> FUMmit + 1.5*ATP',
    '0.5*FUMmit + 0.5*FUMmit <-> 0.5*MALmit + 0.5*MALmit',
    'MALmit <-> OAAmit + NADH',

    # glyoxylate shunt
    'ICTcyt -> GLYOXYcyt + SUCcyt',
    'ACCOAcyt + GLYOXYcyt -> MALcyt', # malate synthase causes problem

    # atp citrate lyase
    'CITcyt + ATP -> OAAcyt + ACCOAcyt',

    # CO2 transport
    # 'CO2 -> CO2_EX',
]

glucose_constraint_string = generate_gsm_constraints_from_MFA(mfa_reactions_to_use = mfa_reactions_to_use_glucose, central_rxn_df = central_rxn_df, substrate = 'Glucose')
glucose_constraint_string

'-EX_glc_e >= 100.0, -EX_glc_e <= 100.0, PGI >= 9.792, PGI <= 23.5403, PFK - FBP >= 53.3756, PFK - FBP <= 59.0811, FBA >= 53.3756, FBA <= 59.0811, TPI >= 51.7849, TPI <= 57.7445, GAPD >= 125.282, GAPD <= 135.6607, ENO >= 120.3684, ENO <= 131.5478, G6PDH2 >= 55.2801, G6PDH2 <= 69.7078, GND >= 55.2801, GND <= 69.7078, -RPI >= 20.775, -RPI <= 25.492, RPE >= 34.5248, RPE <= 44.223, TKT1 + TKT2 >= 34.5248, TKT1 + TKT2 <= 44.223, TKT2 >= 16.0395, TKT2 <= 20.9632, TKT1 >= 18.4561, TKT1 <= 23.2611, TALA >= 18.4561, TALA <= 23.2611, TALA >= 18.4561, TALA <= 23.2611, PDHm >= 86.6598, PDHm <= 102.7414, CSm >= 81.9247, CSm <= 98.5614, ACONTm >= 13.2107, ACONTm <= 53.4877, ICDHxm >= 0.0, ICDHxm <= 18.0326, ICDHym >= 0.0, ICDHym <= 33.3765, -SUCOASm >= 0.3408, -SUCOASm <= 42.8441, SUCD2_u6m + SUCD1m >= 20.6873, SUCD2_u6m + SUCD1m <= 49.1634, FUMm >= 23.0833, FUMm <= 51.1684, MDHm >= 90.7308, MDHm <= 112.1369, ICL >= 5.3272, ICL <= 21.8437, MALS >= 5.3272, MALS <= 21.8437, ATPCitL >= 32.966, ATPCitL 

### Add GSM bounds from MFA for glucose

In [6]:
central_rxn_df = add_gsm_bounds_from_mfa(model=model, central_rxn_df=central_rxn_df, substrate='Glucose', constraints=glucose_constraint_string)
central_rxn_df

Unnamed: 0,Pathway,ID,Equation,reaction_ids,Location on map,Glucose MFA Flux,Glucose MFA LB,Glucose MFA UB,Glycerol MFA Flux,Glycerol MFA LB,Glycerol MFA UB,Oleic Acid MFA Flux,Oleic Acid MFA LB,Oleic Acid MFA UB,Glucose MFA-Constrained GSM LB,Glucose MFA-Constrained GSM UB
0,uptake,gluc uptake,Glucose + ATP -> G6P,reverse_EX_glc_e,"(-1180, 1175)",100.000000,100.0000,100.0000,,,,,,,100.0,100.0
1,uptake,glyc uptake,Glycerol -> GLYC,reverse_GLYCt,"(-1376, 417)",,,,1.000000e+02,100.0000,100.0000,,,,-6.2735,-0.0
2,uptake,R3.2,GLYC + ATP -> DHAP + 1.5*ATP,,,,,,1.000000e+02,100.0000,100.0000,,,,,
3,uptake,OA uptake,OA + ATP -> 9*ACCOAcyt + 8*NADH + 12*ATP,OCDCEAt,"(-55, 111)",,,,,,,1.000000e+02,100.0000,100.0000,0.0,-0.0
4,glycolysis/gluconeogensis,R4 net,G6P <-> F6P,PGI,"(-1180, 960)",18.017500,9.7920,23.5403,-4.139470e+01,-41.5761,-36.5906,-3.058491e+02,-319.0201,-256.5170,21.59084,23.5403
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,biomass formation,R80 gluc 1207 avg AA 10%lip,0.355*ALA + 0.13*ARG + 0.1725*ASN + 0.1725*ASP...,biomass_glucose or biomass_oil,,12.075900,10.6005,12.7776,5.189700e+00,5.1208,5.1897,3.333290e+01,31.7890,33.3229,10.6005,12.498455
68,energy metabolism,R81,ATP -> ATP_maintenance,ATPM,,777.930800,616.6971,1065.5000,7.326892e+02,728.5648,746.4218,4.958800e+03,4913.0000,5235.4000,0.0,218.874647
69,energy metabolism,R82,NADH -> NADPH,,,0.000002,0.0000,20.6486,1.000000e-07,0.0000,9.5711,9.987400e-08,0.0000,104.9593,,
70,energy metabolism,R83,NADH -> 2.5*ATP,,"(-1670, -1155)",308.147900,264.7777,398.4924,2.042585e+02,202.6647,208.1858,1.520900e+03,1503.1000,1612.3000,,


### Generate GSM bounds from 13C-MFA for Glucose

In [7]:
glucose_gsm_df_from_constraints = get_gsm_df_from_constraints(
    model=model, 
    substrate='Glucose',
    constraints=glucose_constraint_string,
)

# save the dataframe to a csv file
glucose_gsm_df_from_constraints.to_csv('../results/glucose_gsm_fluxes.csv', index=False)

glucose_gsm_df_from_constraints.head()

Unnamed: 0,reaction_id,reaction_name,full_reaction,Glucose MFA-Constrained GSM flux,Glucose MFA-Constrained GSM LB,Glucose MFA-Constrained GSM UB
0,13BGH,Endo 1 3 beta glucan glucohydrase,13BDglcn[c] + h2o[c] --> glc_D[c],0.0,0.0,109.437324
1,13BGHe,Exo 1 3 beta glucan glucohydrase,13BDglcn[e] + h2o[e] --> glc_D[e],0.0,0.0,-0.0
2,ASADi,aspartate semialdehyde dehydrogenase irreversible,4pasp[c] + h[c] + nadph[c] --> aspsa[c] + nadp...,0.0,3.575628,47.544022
3,2DDA7Ptm,2 Dehydro 3 deoxy D arabino heptonate7 phohsph...,2dda7p[c] <=> 2dda7p[m],0.0,-4.16546,-0.0
4,2DHPtm,2 Dehydropantoate mitochondrial transport,2dhp[c] <=> 2dhp[m],0.0,0.0,6.269987


## Glycerol

### Generate MFA-based constraint string for glycerol

In [8]:
mfa_reactions_to_use_glycerol = [    
    # glycolysis
    'Glycerol -> GLYC',
    # 'G6P <-> F6P',
    # 'F6P + ATP <-> FBP',
    # 'FBP <-> DHAP + GAP',
    'DHAP <-> GAP',
    # 'GAP <-> G3P + ATP + NADH',
    # 'G3P <-> PEP',

    # pentose phosphate pathway
    'G6P -> PG6 + NADPH',
    # 'PG6 -> Ru5P + CO2 + NADPH',
    # 'Ru5P <-> R5P',
    # 'Ru5P <-> X5P',
    # 'X5P <-> GAP + TKC2',
    # 'E4P + TKC2 <-> F6P',
    # 'R5P + TKC2 <-> S7P',
    # 'GAP + TAC3 <-> F6P',
    # 'S7P <-> E4P + TAC3',

    # TCA cycle
    # 'PYRmit -> ACCOAmit + CO2 + NADH',
    # 'ACCOAmit + OAAmit -> CITmit',
    # 'CITmit <-> ICTmit',
    # 'ICTmit -> AKG + CO2 + NADH',
    # 'ICTmit -> AKG + CO2 + NADPH',
    # 'AKG -> 0.5*SUCmit + 0.5*SUCmit + CO2 + ATP + NADH',
    # 'SUCmit <-> FUMmit + 1.5*ATP',
    # '0.5*FUMmit + 0.5*FUMmit <-> 0.5*MALmit + 0.5*MALmit',
    # 'MALmit <-> OAAmit + NADH',

    # glyoxylate shunt
    # 'ICTcyt -> GLYOXYcyt + SUCcyt',
    # 'ACCOAcyt + GLYOXYcyt -> MALcyt', # malate synthase causes problem

    # atp citrate lyase
    # 'CITcyt + ATP -> OAAcyt + ACCOAcyt',

    # CO2 transport
    # 'CO2 -> CO2_EX',
]

glycerol_constraint_string = generate_gsm_constraints_from_MFA(mfa_reactions_to_use = mfa_reactions_to_use_glycerol, central_rxn_df = central_rxn_df, substrate = 'Glycerol')
glycerol_constraint_string

'-GLYCt >= 100.0, -GLYCt <= 100.0, TPI >= 78.6429, TPI <= 80.3046, G6PDH2 >= 27.7687, G6PDH2 <= 32.7664, biomass_glucose >= 5.1208, biomass_glucose <= 5.1897, biomass_oil = 0, biomass_C = 0, biomass_N = 0'

### Add GSM bounds from MFA for glycerol

In [9]:
central_rxn_df = add_gsm_bounds_from_mfa(model=model, central_rxn_df=central_rxn_df, substrate='Glycerol', constraints=glycerol_constraint_string)
display(central_rxn_df.head(36))
display(central_rxn_df.tail(36))

Unnamed: 0,Pathway,ID,Equation,reaction_ids,Location on map,Glucose MFA Flux,Glucose MFA LB,Glucose MFA UB,Glycerol MFA Flux,Glycerol MFA LB,Glycerol MFA UB,Oleic Acid MFA Flux,Oleic Acid MFA LB,Oleic Acid MFA UB,Glucose MFA-Constrained GSM LB,Glucose MFA-Constrained GSM UB,Glycerol MFA-Constrained GSM LB,Glycerol MFA-Constrained GSM UB
0,uptake,gluc uptake,Glucose + ATP -> G6P,reverse_EX_glc_e,"(-1180, 1175)",100.0,100.0,100.0,,,,,,,100.0,100.0,0.0,-0.0
1,uptake,glyc uptake,Glycerol -> GLYC,reverse_GLYCt,"(-1376, 417)",,,,100.0,100.0,100.0,,,,-6.2735,-0.0,100.0,100.0
2,uptake,R3.2,GLYC + ATP -> DHAP + 1.5*ATP,,,,,,100.0,100.0,100.0,,,,,,,
3,uptake,OA uptake,OA + ATP -> 9*ACCOAcyt + 8*NADH + 12*ATP,OCDCEAt,"(-55, 111)",,,,,,,100.0,100.0,100.0,0.0,-0.0,0.0,-0.0
4,glycolysis/gluconeogensis,R4 net,G6P <-> F6P,PGI,"(-1180, 960)",18.0175,9.792,23.5403,-41.3947,-41.5761,-36.5906,-305.8491,-319.0201,-256.517,21.59084,23.5403,-36.776517,-31.998287
5,glycolysis/gluconeogensis,R5 net,F6P + ATP <-> FBP,PFK or reverse_FBP,"(-1180, 700)",56.7218,53.3756,59.0811,-20.6531,-20.7135,-19.0518,-144.5891,-148.9667,-128.1481,53.3756,59.0811,-24.99009,-19.152749
6,glycolysis/gluconeogensis,R6 net,FBP <-> DHAP + GAP,FBA,"(-1184, 515)",56.7218,53.3756,59.0811,-20.6531,-20.7135,-19.0518,-144.5891,-148.9667,-128.1481,53.3756,59.0811,-24.99009,-19.152749
7,glycolysis/gluconeogensis,R7 net,DHAP <-> GAP,TPI,"(-984, 273)",55.2244,51.7849,57.7445,78.7033,78.6429,80.3046,-152.8556,-157.2308,-136.4145,51.7849,57.178615,79.605612,80.3046
8,glycolysis/gluconeogensis,R8 net,GAP <-> G3P + ATP + NADH,GAPD,"(-700, 250)",130.1644,125.282,135.6607,67.9337,67.8732,69.5346,-218.6947,-223.0502,-202.2536,125.282,135.6607,70.295388,71.14059
9,glycolysis/gluconeogensis,R9 net,G3P <-> PEP,ENO,"(-980, 15)",125.4818,120.3684,131.5478,66.3897,66.3727,67.9837,-224.0012,-224.3677,-206.2233,120.3684,131.5478,39.367417,43.451415


Unnamed: 0,Pathway,ID,Equation,reaction_ids,Location on map,Glucose MFA Flux,Glucose MFA LB,Glucose MFA UB,Glycerol MFA Flux,Glycerol MFA LB,Glycerol MFA UB,Oleic Acid MFA Flux,Oleic Acid MFA LB,Oleic Acid MFA UB,Glucose MFA-Constrained GSM LB,Glucose MFA-Constrained GSM UB,Glycerol MFA-Constrained GSM LB,Glycerol MFA-Constrained GSM UB
36,glyoxylate shunt,R43.2,ICTcyt -> GLYOXYcyt + SUCcyt,ICL,"(1589, 38)",16.0091,5.3272,21.8437,6.7755,5.9554,7.755,380.8702,354.773,421.8486,5.3272,21.8437,0.0,1.166857
37,glyoxylate shunt,R45,ACCOAcyt + GLYOXYcyt -> MALcyt,MALS,"(1078, 294)",16.0091,5.3272,21.8437,6.7755,5.9554,7.755,380.8702,354.773,421.8486,5.3272,21.8437,0.0,1.296507
38,glyoxylate shunt,R46 net,MALcyt <-> OAAcyt + NADH,MDH,"(-511, -814)",-57.4445,-65.0182,-45.4439,1.4664,0.458,3.1383,305.9919,235.409,406.0944,-267.316334,107.542895,-152.446822,151.399678
39,compartment transport,R18,PYRcyt -> PYRmit,PYRt2m,"(-234, -230)",110.5956,101.3061,118.4185,58.9536,58.1367,60.5901,74.2508,6.2159,153.1349,-128.020856,120.055308,16.72943,27.525659
40,compartment transport,R42 net,ACCOAcyt <-> ACCOAmit,CSNATifm or reverse_CSNATirm,"(494, -94)",,,,-19.0021,-19.9564,-17.9197,352.2789,315.9699,383.4,0.0,31.479762,0.0,7.779044
41,compartment transport,cit transp,CITmit -> CITcyt,CITtam or CITtbm or CITtcm,"(1129, -302)",63.1827,38.3595,75.9248,7.0052,6.1852,8.1005,380.8702,354.773,421.8486,-85.3507,-28.437,-1009.715436,995.683201
42,compartment transport,R44,SUCcyt -> SUCmit,SUCCtm,"(1687, -677)",16.0525,5.3659,21.8883,6.7941,5.9741,7.7735,380.9902,354.8905,421.9682,0.0,48.8226,0.0,1.166857
43,compartment transport,R47,MALcyt -> MALmit,MALtm or reverse_CITtam or reverse_DICtm,"(-98, -843)",73.4536,56.0663,84.4458,5.309,4.2109,6.4196,74.8783,15.046,154.6591,41.5674,302.461973,-132.836744,171.009756
44,amino acid biosynthesis,R48.2,G3P + GLU -> SER + AKG + NADH,,,4.6826,4.0149,5.1929,1.544,1.4715,1.6458,5.3065,0.0,7.5053,,,,
45,amino acid biosynthesis,R50 net,SER <-> GLY + MTHF,,,1.9558,1.4824,2.363,0.3722,0.2996,0.474,1.1133,-4.192,3.3059,,,,


### Generate GSM bounds from 13C-MFA for Glycerol

In [10]:
glycerol_gsm_df_from_constraints = get_gsm_df_from_constraints(
    model=model, 
    substrate='Glycerol',
    constraints=glycerol_constraint_string,
)

# save the dataframe to a csv file
glycerol_gsm_df_from_constraints.to_csv('../results/glycerol_gsm_fluxes.csv', index=False)

glycerol_gsm_df_from_constraints.head()

Unnamed: 0,reaction_id,reaction_name,full_reaction,Glycerol MFA-Constrained GSM flux,Glycerol MFA-Constrained GSM LB,Glycerol MFA-Constrained GSM UB
0,13BGH,Endo 1 3 beta glucan glucohydrase,13BDglcn[c] + h2o[c] --> glc_D[c],0.0,0.0,2.917142
1,13BGHe,Exo 1 3 beta glucan glucohydrase,13BDglcn[e] + h2o[e] --> glc_D[e],0.0,0.0,-0.0
2,ASADi,aspartate semialdehyde dehydrogenase irreversible,4pasp[c] + h[c] + nadph[c] --> aspsa[c] + nadp...,4.503118,2.622592,5.614604
3,2DDA7Ptm,2 Dehydro 3 deoxy D arabino heptonate7 phohsph...,2dda7p[c] <=> 2dda7p[m],0.0,-1.160637,-0.0
4,2DHPtm,2 Dehydropantoate mitochondrial transport,2dhp[c] <=> 2dhp[m],0.0,0.0,0.105479


## Oleic acid

### Generate MFA-based constraint string for oleic acid

In [11]:
mfa_reactions_to_use_oleic_acid = [    
    # uptake reaction
    'OA + ATP -> 9*ACCOAcyt + 8*NADH + 12*ATP',

    # glycolysis
    'G6P <-> F6P',
    'F6P + ATP <-> FBP',
    'FBP <-> DHAP + GAP',
    'DHAP <-> GAP',
    # 'GAP <-> G3P + ATP + NADH', # causes problem
    # 'G3P <-> PEP', # causes problem

    # pentose phosphate pathway
    'G6P -> PG6 + NADPH',
    'PG6 -> Ru5P + CO2 + NADPH',
    'Ru5P <-> R5P',
    'Ru5P <-> X5P',
    'X5P <-> GAP + TKC2',
    'E4P + TKC2 <-> F6P',
    'R5P + TKC2 <-> S7P',
    'GAP + TAC3 <-> F6P',
    'S7P <-> E4P + TAC3',

    # TCA cycle
    # 'PYRmit -> ACCOAmit + CO2 + NADH', # causes problem
    'ACCOAmit + OAAmit -> CITmit',
    'CITmit <-> ICTmit',
    'ICTmit -> AKG + CO2 + NADH',
    'ICTmit -> AKG + CO2 + NADPH',
    'AKG -> 0.5*SUCmit + 0.5*SUCmit + CO2 + ATP + NADH',
    'SUCmit <-> FUMmit + 1.5*ATP',
    '0.5*FUMmit + 0.5*FUMmit <-> 0.5*MALmit + 0.5*MALmit',
    'MALmit <-> OAAmit + NADH',

    # glyoxylate shunt
    'ICTcyt -> GLYOXYcyt + SUCcyt',
    'ACCOAcyt + GLYOXYcyt -> MALcyt', 

    # atp citrate lyase
    # 'CITcyt + ATP -> OAAcyt + ACCOAcyt', # causes problem

    # CO2 transport
    # 'CO2 -> CO2_EX', # causes problem
]

oleic_acid_constraint_string = generate_gsm_constraints_from_MFA(mfa_reactions_to_use = mfa_reactions_to_use_oleic_acid, central_rxn_df = central_rxn_df, substrate = 'Oleic Acid')
oleic_acid_constraint_string

'OCDCEAt >= 10.0, OCDCEAt <= 10.0, PGI >= -31.90201, PGI <= -25.651699999999998, PFK - FBP >= -14.89667, PFK - FBP <= -12.81481, FBA >= -14.89667, FBA <= -12.81481, TPI >= -15.72308, TPI <= -13.64145, G6PDH2 >= 19.98452, G6PDH2 <= 26.237119999999997, GND >= 19.98452, GND <= 26.237119999999997, -RPI >= 7.147920000000001, -RPI <= 9.23178, RPE >= 12.8368, RPE <= 17.00534, TKT1 + TKT2 >= 12.8368, TKT1 + TKT2 <= 17.00534, TKT2 >= 6.22361, TKT2 <= 8.3074, TKT1 >= 6.61369, TKT1 <= 8.69794, TALA >= 6.61369, TALA <= 8.69794, TALA >= 6.61369, TALA <= 8.69794, CSm >= 41.925599999999996, CSm <= 44.216480000000004, ACONTm >= 1.89638, ACONTm <= 7.52743, ICDHxm >= 0.0, ICDHxm <= 7.527089999999999, ICDHym >= 0.0, ICDHym <= 6.888599999999999, -SUCOASm >= 0.0, -SUCOASm <= 5.58544, SUCD2_u6m + SUCD1m >= 39.9543, SUCD2_u6m + SUCD1m <= 42.33663, FUMm >= 40.35453, FUMm <= 42.71788, MDHm >= 42.46343, MDHm <= 47.5596, ICL >= 35.4773, ICL <= 42.18486, MALS >= 35.4773, MALS <= 42.18486, biomass_oil >= 3.1789, b

### Add GSM bounds from MFA for oleic acid

In [12]:
central_rxn_df = add_gsm_bounds_from_mfa(model=model, central_rxn_df=central_rxn_df, substrate='Oleic Acid', constraints=oleic_acid_constraint_string)
central_rxn_df

Unnamed: 0,Pathway,ID,Equation,reaction_ids,Location on map,Glucose MFA Flux,Glucose MFA LB,Glucose MFA UB,Glycerol MFA Flux,Glycerol MFA LB,Glycerol MFA UB,Oleic Acid MFA Flux,Oleic Acid MFA LB,Oleic Acid MFA UB,Glucose MFA-Constrained GSM LB,Glucose MFA-Constrained GSM UB,Glycerol MFA-Constrained GSM LB,Glycerol MFA-Constrained GSM UB,Oleic Acid MFA-Constrained GSM LB,Oleic Acid MFA-Constrained GSM UB
0,uptake,gluc uptake,Glucose + ATP -> G6P,reverse_EX_glc_e,"(-1180, 1175)",100.000000,100.0000,100.0000,,,,,,,100.0,100.0,0.0,-0.0,0.0,-0.0
1,uptake,glyc uptake,Glycerol -> GLYC,reverse_GLYCt,"(-1376, 417)",,,,1.000000e+02,100.0000,100.0000,,,,-6.2735,-0.0,100.0,100.0,-0.063537,-0.0
2,uptake,R3.2,GLYC + ATP -> DHAP + 1.5*ATP,,,,,,1.000000e+02,100.0000,100.0000,,,,,,,,,
3,uptake,OA uptake,OA + ATP -> 9*ACCOAcyt + 8*NADH + 12*ATP,OCDCEAt,"(-55, 111)",,,,,,,1.000000e+02,100.0000,100.0000,0.0,-0.0,0.0,-0.0,100.0,100.0
4,glycolysis/gluconeogensis,R4 net,G6P <-> F6P,PGI,"(-1180, 960)",18.017500,9.7920,23.5403,-4.139470e+01,-41.5761,-36.5906,-3.058491e+02,-319.0201,-256.5170,21.59084,23.5403,-36.776517,-31.998287,-257.125896,-256.517
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,biomass formation,R80 gluc 1207 avg AA 10%lip,0.355*ALA + 0.13*ARG + 0.1725*ASN + 0.1725*ASP...,biomass_glucose or biomass_oil,,12.075900,10.6005,12.7776,5.189700e+00,5.1208,5.1897,3.333290e+01,31.7890,33.3229,10.6005,12.498455,5.1208,5.149818,31.789,31.795453
68,energy metabolism,R81,ATP -> ATP_maintenance,ATPM,,777.930800,616.6971,1065.5000,7.326892e+02,728.5648,746.4218,4.958800e+03,4913.0000,5235.4000,0.0,218.874647,0.0,5.834283,0.0,0.913344
69,energy metabolism,R82,NADH -> NADPH,,,0.000002,0.0000,20.6486,1.000000e-07,0.0000,9.5711,9.987400e-08,0.0000,104.9593,,,,,,
70,energy metabolism,R83,NADH -> 2.5*ATP,,"(-1670, -1155)",308.147900,264.7777,398.4924,2.042585e+02,202.6647,208.1858,1.520900e+03,1503.1000,1612.3000,,,,,,


### Generate GSM bounds from 13C-MFA for oleic acid

In [13]:
oleic_acid_gsm_df_from_constraints = get_gsm_df_from_constraints(
    model=model, 
    substrate='Oleic Acid',
    constraints=oleic_acid_constraint_string,
)

# save the dataframe to a csv file
oleic_acid_gsm_df_from_constraints.to_csv('../results/oleic_acid_gsm_fluxes.csv', index=False)

oleic_acid_gsm_df_from_constraints.head()

Unnamed: 0,reaction_id,reaction_name,full_reaction,Oleic Acid MFA-Constrained GSM flux,Oleic Acid MFA-Constrained GSM LB,Oleic Acid MFA-Constrained GSM UB
0,13BGH,Endo 1 3 beta glucan glucohydrase,13BDglcn[c] + h2o[c] --> glc_D[c],0.0,0.0,0.456672
1,13BGHe,Exo 1 3 beta glucan glucohydrase,13BDglcn[e] + h2o[e] --> glc_D[e],0.0,0.0,-0.0
2,ASADi,aspartate semialdehyde dehydrogenase irreversible,4pasp[c] + h[c] + nadph[c] --> aspsa[c] + nadp...,6.251885,6.250616,6.598557
3,2DDA7Ptm,2 Dehydro 3 deoxy D arabino heptonate7 phohsph...,2dda7p[c] <=> 2dda7p[m],0.0,-2.342634,-0.0
4,2DHPtm,2 Dehydropantoate mitochondrial transport,2dhp[c] <=> 2dhp[m],0.0,0.0,0.027469


## Save the GSM bounds that were constrained by the 13C-MFA

In [14]:
# save the central_rxn_df
central_rxn_df.to_csv('../results/central_fluxes/mfa_constrained_gsm_central_bounds_01212024.csv')

# Glucose

### Make constraint string from 13C-MFA data for glucose

In [None]:
# can build this to include
mfa_reactions_to_use = [    
    # glycolysis
    'Glucose + ATP -> G6P',
    'G6P <-> F6P',
    'F6P + ATP <-> FBP',
    'FBP <-> DHAP + GAP',
    'DHAP <-> GAP',
    'GAP <-> G3P + ATP + NADH',
    'G3P <-> PEP',

    # pentose phosphate pathway
    'G6P -> PG6 + NADPH',
    'PG6 -> Ru5P + CO2 + NADPH',
    'Ru5P <-> R5P',
    'Ru5P <-> X5P',
    'X5P <-> GAP + TKC2',
    'E4P + TKC2 <-> F6P',
    'R5P + TKC2 <-> S7P',
    'GAP + TAC3 <-> F6P',
    'S7P <-> E4P + TAC3',

    # TCA cycle
    'PYRmit -> ACCOAmit + CO2 + NADH',
    'ACCOAmit + OAAmit -> CITmit',
    'CITmit <-> ICTmit',
    'ICTmit -> AKG + CO2 + NADH',
    'ICTmit -> AKG + CO2 + NADPH',
    'AKG -> 0.5*SUCmit + 0.5*SUCmit + CO2 + ATP + NADH',
    'SUCmit <-> FUMmit + 1.5*ATP',
    '0.5*FUMmit + 0.5*FUMmit <-> 0.5*MALmit + 0.5*MALmit',
    'MALmit <-> OAAmit + NADH',

    # glyoxylate shunt
    'ICTcyt -> GLYOXYcyt + SUCcyt',
    'ACCOAcyt + GLYOXYcyt -> MALcyt', # malate synthase causes problem

    # atp citrate lyase
    'CITcyt + ATP -> OAAcyt + ACCOAcyt'

    # CO2 transport
    'CO2 -> CO2_EX',
]

constraint_strings = []

for mfa_reaction in mfa_reactions_to_use:
    # get the reaction row
    reaction_row = central_rxn_df[central_rxn_df['Equation'] == mfa_reaction]

    # get reaction IDs
    reaction_ids = reaction_row['reaction_ids'].values[0]

    # get the glucose lower bound
    glucose_lb = reaction_row['Glucose MFA LB'].values[0]

    # get the glucose upper bound
    glucose_ub = reaction_row['Glucose MFA UB'].values[0]

    # generate a constraint string
    constraint_string = make_rxn_constraint_string(reaction_ids, glucose_lb, glucose_ub)

    # add the constraint string to the list
    constraint_strings.append(constraint_string)

# add biomass constraint
biomass_glucose_row = central_rxn_df[central_rxn_df['Pathway'] == 'biomass formation']
biomass_glucose_lb = biomass_glucose_row['Glucose MFA LB'].values[0]
biomass_glucose_ub = biomass_glucose_row['Glucose MFA UB'].values[0]

constraint_strings.append(f'biomass_glucose <= {biomass_glucose_ub}')
constraint_strings.append(f'biomass_glucose >= {biomass_glucose_lb}')

# block other biomass reactions
constraint_strings.append('biomass_oil = 0')
constraint_strings.append('biomass_C = 0')
constraint_strings.append('biomass_N = 0')

glucose_constraint_string = ', '.join(constraint_strings)

glucose_constraint_string

### 

In [None]:
substrate = 'Glucose'
constraints = glucose_constraint_string

def add_gsm_bounds_from_mfa(model=None, central_rxn_df=None, substrate=None, constraints=None):
    central_rxn_df = central_rxn_df.copy()

    # update the media to minimal medium with the specified sole carbon source
    medium = model.medium
    medium['EX_glc_e'] = 100 if substrate == 'Glucose' else 0
    medium['EX_glyc_e'] = 100 if substrate == 'Glycerol' else 0
    medium['EX_ocdcea_e'] = 10 if substrate == 'Oleic Acid' else 0
    medium['EX_h2o_e'] = 10000
    medium['EX_h_e'] = 10000
    medium['EX_nh4_e'] = 10000
    medium['EX_o2_e'] = 10000
    medium['EX_pi_e'] = 10000
    medium['EX_so4_e'] = 10000
    medium['trehalose_c_tp'] = 0
    model.medium = medium

    gsm_lbs = []
    gsm_ubs = []

    # loop over MFA reactions and get the GSM bounds
    for _, row in central_rxn_df.iterrows():

        # check if the reaction is mapped to the GSM
        reaction_ids = row['reaction_ids']
        if pd.isna(reaction_ids):
            gsm_lbs.append('')
            gsm_ubs.append('')
            continue

        # get the reaction bounds
        rxn_lb, rxn_ub = get_min_max_flux_expression_from_ids(model, reaction_ids, constraints)

        gsm_lbs.append(10 * rxn_lb if substrate == 'Oleic Acid' else rxn_lb)
        gsm_ubs.append(10 * rxn_ub if substrate == 'Oleic Acid' else rxn_ub)

    gsm_lower_bound_col = f'{substrate} MFA-Constrained GSM LB'
    gsm_upper_bound_col = f'{substrate} MFA-Constrained GSM UB'

    # add the GSM bounds to the dataframe
    central_rxn_df[gsm_lower_bound_col] = gsm_lbs
    central_rxn_df[gsm_upper_bound_col] = gsm_ubs

    return central_rxn_df

# display(central_rxn_df.head(36))
# display(central_rxn_df.tail(36))

In [None]:
# define a 

import pandas as pd
from get_min_max_flux_expression_from_ids import get_min_max_flux_expression_from_ids
from add_mfa_bound_feasibility_column import add_mfa_bound_feasibility_column

def add_gsm_bounds_from_mfa(model=None, central_rxn_df=None, substrate=None, uptake_reaction=None, biomass_cutoff=None):
    central_rxn_df = central_rxn_df.copy()

    # update the media to minimal medium with the specified sole carbon source
    medium = model.medium
    medium['EX_glc_e'] = 100 if substrate == 'Glucose' else 0
    medium['EX_glyc_e'] = 100 if substrate == 'Glycerol' else 0
    medium['EX_ocdcea_e'] = 10 if substrate == 'Oleic Acid' else 0
    medium['EX_h2o_e'] = 10000
    medium['EX_h_e'] = 10000
    medium['EX_nh4_e'] = 10000
    medium['EX_o2_e'] = 10000
    medium['EX_pi_e'] = 10000
    medium['EX_so4_e'] = 10000
    medium['trehalose_c_tp'] = 0
    model.medium = medium

    # set the reaction ids for the biomass formation to glucose
    if substrate == 'Glucose' or substrate == 'Glycerol':
        # define parts of the constraints string
        uptake_string = f'-{uptake_reaction} >= 100.0, -{uptake_reaction} <= 100.0'
        biomass_string = f'biomass_glucose >= {biomass_cutoff}, biomass_oil = 0, biomass_C = 0, biomass_N = 0'

        # ensure the proper biomass reaction is used in the GSM
        central_rxn_df.loc[central_rxn_df['Pathway'] == 'biomass formation', 'reaction_ids'] = 'biomass_glucose'

    elif substrate == 'Oleic Acid':
        # define parts of the constraints string
        uptake_string = f'-{uptake_reaction} >= 10.0, -{uptake_reaction} <= 10.0'
        biomass_string = f'biomass_glucose = 0, biomass_oil >= {biomass_cutoff}, biomass_C = 0, biomass_N = 0'

        # ensure the proper biomass reaction is used in the GSM
        central_rxn_df.loc[central_rxn_df['Pathway'] == 'biomass formation', 'reaction_ids'] = 'biomass_oil'
    else:
        raise ValueError(f'Unknown substrate: {substrate}')

    # define the constraints string
    constraints = f'{uptake_string}, {biomass_string}'

    gsm_lbs = []
    gsm_ubs = []

    # loop over MFA reactions and get the GSM bounds
    for _, row in central_rxn_df.iterrows():

        # check if the reaction is mapped to the GSM
        reaction_ids = row['reaction_ids']
        if pd.isna(reaction_ids):
            gsm_lbs.append('')
            gsm_ubs.append('')
            continue

        # get the reaction bounds
        rxn_lb, rxn_ub = get_min_max_flux_expression_from_ids(model, reaction_ids, constraints)

        gsm_lbs.append(10 * rxn_lb if substrate == 'Oleic Acid' else rxn_lb)
        gsm_ubs.append(10 * rxn_ub if substrate == 'Oleic Acid' else rxn_ub)

    gsm_lower_bound_col = f'{substrate} GSM LB'
    gsm_upper_bound_col = f'{substrate} GSM UB'

    # add the GSM bounds to the dataframe
    central_rxn_df[gsm_lower_bound_col] = gsm_lbs
    central_rxn_df[gsm_upper_bound_col] = gsm_ubs

    # central_rxn_df = add_mfa_bound_feasibility_column(central_rxn_df, substrate)

    return central_rxn_df

In [None]:
central_rxn_df =  add_gsm_bounds_from_mfa(model=model, central_rxn_df=central_rxn_df, substrate='Glucose', constraints=glucose_constraint_string)

central_rxn_df

### Run Glucose pFBA with MFA constraints

In [None]:
# update the media to minimal medium with glucose as the sole carbon source
medium = model.medium
medium['EX_glc_e'] = 100
medium['EX_glyc_e'] = 0
medium['EX_ocdcea_e'] = 0
medium['EX_h2o_e'] = 10000
medium['EX_h_e'] = 10000
medium['EX_nh4_e'] = 10000
medium['EX_o2_e'] = 10000
medium['EX_pi_e'] = 10000
medium['EX_so4_e'] = 10000
medium['trehalose_c_tp'] = 0
model.medium = medium

# print the medium composition
[print(model.medium[m], m) for m in model.medium]

glucose_fba_solution = sd.fba(model, constraints=glucose_constraint_string, obj='biomass_glucose', obj_sense='maximize', pfba=1)

print()

max_glucose_biomass_flux = glucose_fba_solution['biomass_glucose']
PGI_flux = glucose_fba_solution['PGI']
print(f'Maximum biomass flux: {max_glucose_biomass_flux}.')
print(f'PGI flux: {PGI_flux}.')
print(f'The number of active reactions in pFBA: {sum([abs(flux) > 0.1 for flux in glucose_fba_solution.fluxes.values()])}')

# make a list of dictionaries with the reaction id, name, flux, and absolute flux
reactions = []
for reaction_id, flux in glucose_fba_solution.fluxes.items():

  reactions.append({
    'reaction_id': reaction_id,
    'reaction_name': model.reactions.get_by_id(reaction_id).name,
    'full_reaction': model.reactions.get_by_id(reaction_id).reaction,
    'glucose_GSM_flux': flux,
    'absolute_flux': abs(flux), # use for sorting, then drop
  })

# make a dataframe from the list of dictionaries
glucose_gsm_df = pd.DataFrame(reactions)

# sort the dataframe by absolute flux
glucose_gsm_df = glucose_gsm_df.sort_values(by=['absolute_flux'], ascending=False)

# drop the absolute flux column
glucose_gsm_df = glucose_gsm_df.drop(columns=['absolute_flux'])

glucose_gsm_df.head()

### Run Glucose FVA with MFA constraints

In [None]:
# run FVA for 90% of biomass production on the GSM
glucose_fva_solution = sd.fva(
  model, 
  constraints=glucose_constraint_string,
)

# define a function to determine if a reaction is active
def is_active(row):
  return abs(row.maximum) > 0.1 or abs(row.minimum) > 0.1

print(f'The number of active reactions in FVA: {sum([is_active(row) for _, row in glucose_fva_solution.iterrows()])}')

# make a list of dictionaries with the reaction id, name, flux, and absolute flux
fva_upper_bounds = []
fva_lower_bounds = []

# loop over the reactions in the GSM
for _, row in glucose_gsm_df.iterrows():
  reaction_id = row.reaction_id

  # get the upper and lower bounds from the FVA solution
  lower_bound = glucose_fva_solution.loc[reaction_id, 'minimum']
  upper_bound = glucose_fva_solution.loc[reaction_id, 'maximum']
  

  fva_lower_bounds.append(lower_bound)
  fva_upper_bounds.append(upper_bound)
  

# add the upper and lower bounds to the dataframe
glucose_gsm_df['glucose_GSM_LB'] = fva_lower_bounds
glucose_gsm_df['glucose_GSM_UB'] = fva_upper_bounds

# save the dataframe to a csv file
# glucose_gsm_df.to_csv('../results/gsm_fluxes/glucose_gsm_13C_fba.csv', index=False)

# display updated dataframe
glucose_gsm_df


In [None]:
# just look at rows with biomass in the reaction_id
biomass_rows = glucose_gsm_df[glucose_gsm_df['reaction_id'].str.contains('biomass')]
biomass_rows

In [None]:
# look at PFK and FBP reactions
reaction_df = glucose_gsm_df[glucose_gsm_df['reaction_id'].isin(['EX_glc_e', 'PFK', 'FBP'])]

reaction_df

### Add glucose GSM data to dataframe

In [None]:
# add flux and bounds columns
central_rxn_df = add_flux_column_to_13c_flux_df(central_rxn_df, glucose_gsm_df, 'glucose_GSM_flux')
central_rxn_df = add_fva_columns_to_13c_flux_df(central_rxn_df, glucose_gsm_df, 'glucose_GSM_LB', 'glucose_GSM_UB')

central_rxn_df.head(6)

### Plot glucose MFA bounds

In [None]:
glucose_mfa_bounds_map = generate_flux_map(
    flux_df=central_rxn_df, 
    flux_column=['Glucose MFA LB', 'Glucose MFA UB'],
    title_string='Glucose 13C-MFA Bounds 2023-12-21',
    file_name='../figures/test_flux_map.png',
)

### Plot glucose new GSM bounds

In [None]:
glucose_mfa_bounds_map = generate_flux_map(
    flux_df=central_rxn_df, 
    flux_column=['glucose_GSM_LB', 'glucose_GSM_UB'],
    title_string='Glucose GSM Constrained by MFA Bounds 2024-01-18',
    file_name='../figures/test_flux_map.png',
)

### Build glucose constraint string from 13C-MFA data

In [None]:
# update the media to minimal medium with glucose as the sole carbon source
medium = model.medium
medium['EX_glc_e'] = 100
medium['EX_glyc_e'] = 0
medium['EX_ocdcea_e'] = 0
medium['EX_h2o_e'] = 10000
medium['EX_h_e'] = 10000
medium['EX_nh4_e'] = 10000
medium['EX_o2_e'] = 10000
medium['EX_pi_e'] = 10000
medium['EX_so4_e'] = 10000
medium['trehalose_c_tp'] = 0
model.medium = medium

# print the medium composition
[print(model.medium[m], m) for m in model.medium]

lower_bound_row = 'glucose_LB'
upper_bound_row = 'glucose_UB'

constraint_strings = []

# loop over rows in the central flux dataframe
for _, row in central_rxn_df.iterrows():  
    # get the GSM reaction mapping for this reaction
    reaction_ids = row['reaction_ids']
    pathway = row['pathway']

    # get the lower bound for this reaction
    lower_bound = row[lower_bound_row]
    upper_bound = row[upper_bound_row]

    # determine if reaction should be included in the constraint string
    reaction_ids_not_nan = not pd.isna(reaction_ids)
    pathway_not_transport = pathway != 'transport'
    bounds_are_not_nan = not pd.isna(lower_bound) and not pd.isna(upper_bound)
    

    if reaction_ids_not_nan and pathway_not_transport and bounds_are_not_nan:
        print(f'13C-MFA bounds: {lower_bound} - {upper_bound}')

        # make the constraint string
        constraint_string = make_rxn_constraint_string(reaction_ids, lower_bound, upper_bound)

        glucose_fba_solution = sd.fba(model, constraints=constraint_string, obj='biomass_glucose', obj_sense='maximize', pfba=1)

        # add the constraint string to the list
        constraint_strings.append(constraint_string)
        print(constraint_string)
        print(f"max biomass flux: {glucose_fba_solution['biomass_glucose']}")
        print()



full_constraint_string = ', '.join(constraint_strings)
print(len(full_constraint_string))
full_constraint_string 

### Calculate glucose GSM pFBA solution

### Calculate glucose GSM pFBA FVA 

In [None]:
# run FVA for 90% of biomass production on the GSM
biomass_fraction = 0.9
glucose_fva_solution = sd.fva(
  model, 
  constraints=f'EX_glc_e = -100.000, biomass_C >= {biomass_fraction * max_glucose_biomass_flux}',
)

# define a function to determine if a reaction is active
def is_active(row):
  return abs(row.maximum) > 0.1 or abs(row.minimum) > 0.1

print(f'The number of active reactions in FVA: {sum([is_active(row) for _, row in glucose_fva_solution.iterrows()])}')

# make a list of dictionaries with the reaction id, name, flux, and absolute flux
fva_upper_bounds = []
fva_lower_bounds = []

# loop over the reactions in the GSM
for _, row in glucose_gsm_fba_df.iterrows():
  reaction_id = row.reaction_id

  # get the upper and lower bounds from the FVA solution
  upper_bound = glucose_fva_solution.loc[reaction_id, 'maximum']
  lower_bound = glucose_fva_solution.loc[reaction_id, 'minimum']

  fva_upper_bounds.append(upper_bound)
  fva_lower_bounds.append(lower_bound)

# add the upper and lower bounds to the dataframe
glucose_gsm_fba_df['fva_upper_bound'] = fva_upper_bounds
glucose_gsm_fba_df['fva_lower_bound'] = fva_lower_bounds

# save the dataframe to a csv file
# glucose_gsm_fba_df.to_csv('../results/gsm_fluxes/glucose_gsm_13C_fba.csv', index=False)

# display updated dataframe
glucose_gsm_fba_df


### Add glucose pFBA columns to 13C-MFA data

In [None]:
# add the GSM flux predictions to the 13C-MFA dataframe
central_rxn_df = add_flux_column_to_13c_flux_df(central_rxn_df, glucose_gsm_fba_df, 'glucose_pFBA_flux')

# add the GSM flux predictions to the 13C-MFA dataframe
central_rxn_df = add_fva_columns_to_13c_flux_df(central_rxn_df, glucose_gsm_fba_df, f'glucose_pFBA_{100*biomass_fraction}%')

central_rxn_df.head()

# Glycerol

### Calculate glycerol GSM pFBA solution

In [None]:
# update the media to minimal medium with glycerol as the sole carbon source
medium = model.medium
medium['EX_glc_e'] = 0
medium['EX_glyc_e'] = 100
medium['EX_ocdcea_e'] = 0
medium['EX_h2o_e'] = 10000
medium['EX_h_e'] = 10000
medium['EX_nh4_e'] = 10000
medium['EX_o2_e'] = 10000
medium['EX_pi_e'] = 10000
medium['EX_so4_e'] = 10000
medium['trehalose_c_tp'] = 0
model.medium = medium

# print the medium composition
[print(model.medium[m], m) for m in model.medium]

# run biomass-maximizing pFBA
glycerol_fba_solution = sd.fba(model, constraints='EX_glyc_e = -100.000', obj='biomass_C', obj_sense='maximize', pfba=1)

max_glycerol_biomass_flux = glycerol_fba_solution['biomass_C']
print(f'Maximum biomass flux: {max_glycerol_biomass_flux}.')
print(f'The number of active reactions in pFBA: {sum([abs(flux) > 0.1 for flux in glycerol_fba_solution.fluxes.values()])}')

# make a list of dictionaries with the reaction id, name, flux, and absolute flux
reactions = []
for reaction_id, flux in glycerol_fba_solution.fluxes.items():

  reactions.append({
    'reaction_id': reaction_id,
    'reaction_name': model.reactions.get_by_id(reaction_id).name,
    'full_reaction': model.reactions.get_by_id(reaction_id).reaction,
    'flux': flux,
    'absolute_flux': abs(flux), # use for sorting, then drop
  })

# make a dataframe from the list of dictionaries
glycerol_gsm_fba_df = pd.DataFrame(reactions)

# sort the dataframe by absolute flux
glycerol_gsm_fba_df = glycerol_gsm_fba_df.sort_values(by=['absolute_flux'], ascending=False)

# drop the absolute flux column
glycerol_gsm_fba_df = glycerol_gsm_fba_df.drop(columns=['absolute_flux'])

glycerol_gsm_fba_df.head()

### Calculate glycerol GSM pFBA FVA 

In [None]:
# run FVA for 90% of biomass production on the GSM
biomass_fraction = 0.9
glycerol_fva_solution = sd.fva(
  model, 
  constraints=f'EX_glyc_e = -100.000, biomass_C >= {biomass_fraction * max_glycerol_biomass_flux}',
)

# define a function to determine if a reaction is active
def is_active(row):
  return abs(row.maximum) > 0.1 or abs(row.minimum) > 0.1

print(f'The number of active reactions in FVA: {sum([is_active(row) for _, row in glycerol_fva_solution.iterrows()])}')

# make a list of dictionaries with the reaction id, name, flux, and absolute flux
fva_upper_bounds = []
fva_lower_bounds = []

# loop over the reactions in the GSM
for _, row in glycerol_gsm_fba_df.iterrows():
  reaction_id = row.reaction_id

  # get the upper and lower bounds from the FVA solution
  upper_bound = glycerol_fva_solution.loc[reaction_id, 'maximum']
  lower_bound = glycerol_fva_solution.loc[reaction_id, 'minimum']

  fva_upper_bounds.append(upper_bound)
  fva_lower_bounds.append(lower_bound)

# add the upper and lower bounds to the dataframe
glycerol_gsm_fba_df['fva_upper_bound'] = fva_upper_bounds
glycerol_gsm_fba_df['fva_lower_bound'] = fva_lower_bounds

# save the dataframe to a csv file
# glycerol_gsm_fba_df.to_csv('../results/gsm_fluxes/glycerol_gsm_13C_fba.csv', index=False)

# display updated dataframe
glycerol_gsm_fba_df

### Add glycerol pFBA columns to 13C-MFA data

In [None]:
# add the GSM flux predictions to the 13C-MFA dataframe
central_rxn_df = add_flux_column_to_13c_flux_df(central_rxn_df, glycerol_gsm_fba_df, 'glycerol_pFBA_flux')

# add the GSM flux predictions to the 13C-MFA dataframe
central_rxn_df = add_fva_columns_to_13c_flux_df(central_rxn_df, glycerol_gsm_fba_df, f'glycerol_pFBA_{100*biomass_fraction}%')

central_rxn_df.head()

# Oleic Acid

### Calculate oleic acid GSM pFBA solution

In [None]:
# update the media to minimal medium with oleic_acid as the sole carbon source
medium = model.medium
medium['EX_glc_e'] = 0
medium['EX_glyc_e'] = 0
medium['EX_ocdcea_e'] = 10 # this prevents overflow
medium['EX_h2o_e'] = 10000
medium['EX_h_e'] = 10000
medium['EX_nh4_e'] = 10000
medium['EX_o2_e'] = 10000
medium['EX_pi_e'] = 10000
medium['EX_so4_e'] = 10000
medium['trehalose_c_tp'] = 0
model.medium = medium

# print the medium composition
[print(model.medium[m], m) for m in model.medium]

# run biomass-maximizing pFBA
oleic_acid_fba_solution = sd.fba(model, constraints='EX_ocdcea_e = -10.000', obj='biomass_C', obj_sense='maximize', pfba=1)

max_oleic_acid_biomass_flux = oleic_acid_fba_solution['biomass_C']
print(f'Maximum biomass flux: {10 * max_oleic_acid_biomass_flux}.') # restore 100 input flux
print(f'The number of active reactions in pFBA: {sum([abs(flux) > 0.1 for flux in oleic_acid_fba_solution.fluxes.values()])}')

# make a list of dictionaries with the reaction id, name, flux, and absolute flux
reactions = []
for reaction_id, flux in oleic_acid_fba_solution.fluxes.items():

  reactions.append({
    'reaction_id': reaction_id,
    'reaction_name': model.reactions.get_by_id(reaction_id).name,
    'full_reaction': model.reactions.get_by_id(reaction_id).reaction,
    'flux': 10 * flux, # restore 100 input flux
    'absolute_flux': abs(flux), # use for sorting, then drop
  })

# make a dataframe from the list of dictionaries
oleic_acid_gsm_fba_df = pd.DataFrame(reactions)

# sort the dataframe by absolute flux
oleic_acid_gsm_fba_df = oleic_acid_gsm_fba_df.sort_values(by=['absolute_flux'], ascending=False)

# drop the absolute flux column
oleic_acid_gsm_fba_df = oleic_acid_gsm_fba_df.drop(columns=['absolute_flux'])

oleic_acid_gsm_fba_df.head()

### Calculate oleic acid GSM pFBA FVA 

In [None]:
# run FVA for 90% of biomass production on the GSM
biomass_fraction = 0.9
oleic_acid_fva_solution = sd.fva(
  model, 
  constraints=f'EX_ocdcea_e = -10.000, biomass_C >= {biomass_fraction * max_oleic_acid_biomass_flux}',
)

# define a function to determine if a reaction is active
def is_active(row):
  return abs(row.maximum) > 0.1 or abs(row.minimum) > 0.1

print(f'The number of active reactions in FVA: {sum([is_active(row) for _, row in oleic_acid_fva_solution.iterrows()])}')

# make a list of dictionaries with the reaction id, name, flux, and absolute flux
fva_upper_bounds = []
fva_lower_bounds = []

# loop over the reactions in the GSM
for _, row in oleic_acid_gsm_fba_df.iterrows():
  reaction_id = row.reaction_id

  # get the upper and lower bounds from the FVA solution
  upper_bound = 10 * oleic_acid_fva_solution.loc[reaction_id, 'maximum'] # restore 100 input flux
  lower_bound = 10 * oleic_acid_fva_solution.loc[reaction_id, 'minimum'] # restore 100 input flux

  fva_upper_bounds.append(upper_bound)
  fva_lower_bounds.append(lower_bound)

# add the upper and lower bounds to the dataframe
oleic_acid_gsm_fba_df['fva_upper_bound'] = fva_upper_bounds
oleic_acid_gsm_fba_df['fva_lower_bound'] = fva_lower_bounds

# save the dataframe to a csv file
# oleic_acid_gsm_fba_df.to_csv('../results/gsm_fluxes/oleic_acid_gsm_13C_fba.csv', index=False)

# display updated dataframe
oleic_acid_gsm_fba_df


### Add oleic acid pFBA columns to 13C-MFA data

In [None]:
# add the GSM flux predictions to the 13C-MFA dataframe
central_rxn_df = add_flux_column_to_13c_flux_df(central_rxn_df, oleic_acid_gsm_fba_df, 'oleic_acid_pFBA_flux')

# add the GSM flux predictions to the 13C-MFA dataframe
central_rxn_df = add_fva_columns_to_13c_flux_df(central_rxn_df, oleic_acid_gsm_fba_df, f'oleic_acid_pFBA_{100*biomass_fraction}%')

central_rxn_df.head()

# Save Data

### Save central flux data with pFBA data added

In [None]:
# save the dataframe to a csv file
# central_rxn_df.to_csv('../results/central_fluxes/13C_pfba.csv', index=False, encoding='utf-8-sig')

In [None]:
# Code to delete later

# def build_13c_constraint_string(central_rxn_df, lower_bound_row, upper_bound_row):
    
#     # key node reactions to constrain
#     key_pathway_reactions = {
#         'EMP': 'GAPD',
#         'TCA': 'CSm',
#         'PPP': 'GND',
#     }
    
#     constraint_strings = []

#     for pathway, reaction_ids in key_pathway_reactions.items():
#         row = central_rxn_df.loc[central_rxn_df['reaction_ids'] == reaction_ids]
#         # get the lower bound for this reaction
#         lower_bound = row[lower_bound_row]
#         upper_bound = row[upper_bound_row]

#         constraint_string = make_rxn_constraint_string(reaction_ids, lower_bound, upper_bound)
#         constraint_strings.append(constraint_string)

#     full_constraint_string = ', '.join(constraint_strings)
#     return full_constraint_string

    

# build_13c_constraint_string(central_rxn_df, 'gluc LB', 'gluc UB')
