In [1]:
cd ..

/Users/flamholz/Documents/workspace/carboxecoli


In [2]:
%matplotlib inline

import cobra
import numpy as np
import escher
import optslope_rubisco
import pandas as pd
import pint
import seaborn as sns

from cobra.core import model
from cobra.flux_analysis import phenotype_phase_plane, pfba, flux_variability_analysis
from cobra.io import read_sbml_model
from cobra.io import sbml
from copy import deepcopy
from importlib_resources import read_text
from matplotlib import pyplot as plt
from typing import List, Tuple, Iterable, Any
from scipy import stats

## Flux Balance Analysis of CCMB1 + rubisco + prk metabolism
The purpose of this notebook is to 
* produce an FBA model of the CCMB1 strain with rubisco and prk;
* examine the contrbution of rubisco to 3PG production in this strain in silico.

We use parsimonious FBA (pFBA) to get a single defined flux solution for predictions.

In [3]:
# Read in the WT model - core e. coli model with rubisco and prk
WT_MODEL_FNAME = 'optslope_rubisco/core_model_with_rpp.xml'
wt_model = read_sbml_model(WT_MODEL_FNAME)

# Make a CCMB1 model: WT sans rpi, edd and eda activities
ccmb1_model = read_sbml_model(WT_MODEL_FNAME)
ccmb1_model.reactions.RPI.knock_out()
ccmb1_model.reactions.EDD.knock_out()

# Glycerol is converted to DHAP in E. coli, so we allow DHAP uptake
ccmb1_model.exchanges.EX_dhap_e.bounds = (-1000, 1000)

# Second model that disallows overflow metabolism entirely.
# This gives an upper limit of the fraction of 3PG production due to rubisco.
ccmb1_model_no_overflow = read_sbml_model(WT_MODEL_FNAME)
ccmb1_model_no_overflow.reactions.RPI.knock_out()
ccmb1_model_no_overflow.reactions.EDD.knock_out()
ccmb1_model_no_overflow.reactions.EDA.knock_out()
ccmb1_model_no_overflow.exchanges.EX_dhap_e.bounds = (-1000, 1000)

# Disallow overflow by disabling carbon exchange other than glycerol and CO2.
for ex in ccmb1_model_no_overflow.exchanges:
    # Leave glycerol and CO2 alone
    if ex.id in ('EX_dhap_e', 'EX_co2_e'):
        continue
        
    # Turn off all other carbon exchange
    mb = ex.check_mass_balance()
    if abs(mb.get('C', 0)) >= 1:
        ex.bounds = (0, 0)

In [4]:
# Diagram central metabolic fluxes for a single pFBA prediction for complemented CCMB1 grown on glycerol.
growth_obj = ccmb1_model.reactions.BIOMASS_Ecoli_core_w_GAM
s_max = pfba(ccmb1_model, fraction_of_optimum=0.9999, objective=growth_obj)
escher.Builder(map_name="e_coli_core.Core metabolism", reaction_data=s_max.fluxes)

Downloading Map from https://escher.github.io/1-0-0/6/maps/Escherichia%20coli/e_coli_core.Core%20metabolism.json


Builder(reaction_data={'ACALD': -116.57174016330674, 'ACALDt': -1.3250000000011388, 'ACKr': -156.9210304676978…

In [5]:
# Print exchange fluxes to make sure imports are reasonable.
# Notice that there is substantial secretion of acetate, formate, and ethanol in this example.
# Glycerol is predominantly metabolized aerobically by E. coli and selection for improved glycerol growth 
# tends to increase overflow metabolism (e.g. Cheng et al. Nat. Comm. 2014). For this reason we calculate
# flux predictions below with and without of overflow metabolism in order to get a plausible range. 
print('Growth rate', s_max.objective_value)
exs = ccmb1_model.exchanges
for ex in exs:
    if abs(s_max.fluxes[ex.id]) > 0.1:
        print(ex, s_max.fluxes[ex.id])
        
# Summary of fluxes to/from 3PG helps figure out the directionality.
ccmb1_model.metabolites.get_by_id('3pg_c').summary(s_max.fluxes)

Growth rate 19873.31306018833
EX_ac_e: ac_e -->  156.92103046769782
EX_acald_e: acald_e -->  1.3250000000011388
EX_co2_e: co2_e <=>  664.4976547533506
EX_etoh_e: etoh_e -->  115.2467401633056
EX_h_e: h_e <=>  1000.0
EX_h2o_e: h2o_e <=>  -144.60130646373966
EX_nh4_e: nh4_e <=>  -229.16954162840167
EX_o2_e: o2_e <=>  -500.0
EX_pi_e: pi_e <=>  845.3920934587001
EX_dhap_e: dhap_e <=>  -1000.0


Unnamed: 0_level_0,Unnamed: 1_level_0,PERCENT,FLUX,REACTION_STRING
RXN_STAT,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PRODUCING,PGK,80.265612,737.22918,3pg_c + atp_c <=> 13dpg_c + adp_c
PRODUCING,RBC,19.734388,181.257776,co2_c + h2o_c + rubp__D_c --> 2.0 3pg_c + 3.0 h_c
CONSUMING,PGM,93.154646,855.61327,2pg_c <=> 3pg_c
CONSUMING,BIOMASS_Ecoli_core_w_GAM,6.845354,62.873686,1.496 3pg_c + 3.7478 accoa_c + 59.81 atp_c + 0...


In [6]:
# Growth rate has default constraints because we don't have a measured uptake rate.
growth_obj = ccmb1_model.reactions.BIOMASS_Ecoli_core_w_GAM

# Run pFBA over a range "fraction_of_optimum" values to get a sense of fluxes.
# Here we use default media so overflow production is allowed.
# NOTE: no reason to allow f_opt < 0.8 as it doesn't affect the relative rubisco flux.
f_opt = np.arange(0.8, 0.991, 0.01).tolist() + np.arange(0.991, 1.001, 0.001).tolist()
opt_fluxes_overflow_allowed = []
for frac_of_opt in f_opt:    
    s_max = pfba(ccmb1_model, fraction_of_optimum=frac_of_opt, objective=growth_obj)
    opt_fluxes_overflow_allowed.append(s_max.fluxes)

In [7]:
# Make a single prediction without overflow metabolism.
# NOTE: when overflow is disabled, the rubisco flux fraction is independent of 
# the "fraction_of_optimum" parameter since the system has far fewer free variables,
# so we make only one estimate.
growth_obj = ccmb1_model_no_overflow.reactions.BIOMASS_Ecoli_core_w_GAM
opt_fluxes_no_overflow = []
s_max = pfba(ccmb1_model_no_overflow, fraction_of_optimum=0.99, objective=growth_obj)
opt_fluxes_no_overflow.append(s_max.fluxes)

In [8]:
# Reactions producing/consuming 3-phosphoglycerate in the ECC2+rubisco model
# rubisco (RBC) written in the 3pg producing direction - positive flux = production
# phosphoglycerate mutase (PGM) written in the 3pg producing direction - positive flux = production
# phosphoglycerate kinase (PGK) written in 3pg consuming direction - positive flux = consumption
fba_df = pd.DataFrame(opt_fluxes_overflow_allowed+opt_fluxes_no_overflow)
fba_df['fraction_of_optimum'] = f_opt + [0.99]
fba_df['overflow_allowed'] = [True]*len(f_opt)+[False]

In [9]:
# Consistency checks and summary information
rbc_producing = fba_df.RBC >= 0
pgm_consuming = fba_df.PGM <= 0
pgk_producing = fba_df.PGK <= 0

# Check directional consistency
print('3pg producing reactions')
print('\tRubisco producing 3pg at all growth rates:', rbc_producing.all())
print('\tPGK producing 3pg at all growth rates:', pgk_producing.all())

print()
print('3pg consuming reactions')
print('\tPGM consuming 3pg at all growth rates:', pgm_consuming.all())

# Total 3pg production as a function of growth rate - produced by rubisco and pgk.
# Remember that PGK is written in the 3pg consuming direction, hence negative sign.
total_influx = (2*fba_df.RBC - fba_df.PGK)
flux_to_biomass = total_influx+fba_df.PGM

# total influx and outflux are equal of course. 
rub_pct = 100 * 2*fba_df.RBC / total_influx
pgm_pct = 100 * -fba_df.PGM / total_influx
pgk_pct = 100 * -fba_df.PGK / total_influx
# remaining flux to biomass goes to serine from 3pg
pct_to_ser = 100-pgm_pct

fba_df['rub_pct_3pg_prod'] = rub_pct
fba_df['pgk_pct_3pg_prod'] = pgm_pct
fba_df['pgm_pct_3pg_cons'] = pgm_pct
fba_df['ser_pct_3pg_cons'] = pct_to_ser

mean_rub_pct = rub_pct.mean()
mean_ser_pct = pct_to_ser.mean()
rub_pct_range = (fba_df.rub_pct_3pg_prod.min(), fba_df.rub_pct_3pg_prod.max())

print()
print('Mean percent of 3PG production flux through rubisco across all estimates: %.1f%%' % mean_rub_pct)
print('\tRange of rubisco percentages: (%.1f%%, %.1f%%)' % rub_pct_range)
print('Mean percent of 3PG consumption flux to serine across all estimates: %.1f%%' % mean_ser_pct)

# Save resulting dataframe
!mkdir -p notebooks/data/FBA
fba_df.to_csv('notebooks/data/FBA/ccmb1_fluxes.csv')
fba_df.head(5)

3pg producing reactions
	Rubisco producing 3pg at all growth rates: True
	PGK producing 3pg at all growth rates: True

3pg consuming reactions
	PGM consuming 3pg at all growth rates: True

Mean percent of 3PG production flux through rubisco across all estimates: 20.7%
	Range of rubisco percentages: (19.5%, 23.9%)
Mean percent of 3PG consumption flux to serine across all estimates: 7.2%


Unnamed: 0,ACALD,ACALDt,ACKr,ACONTa,ACONTb,ACt2r,ADK1,AKGDH,AKGt2r,ALCD2x,...,r5p_t,xu5p__D_t,2pg_t,dhap_t,fraction_of_optimum,overflow_allowed,rub_pct_3pg_prod,pgk_pct_3pg_prod,pgm_pct_3pg_cons,ser_pct_3pg_cons
fluxes,0.0,0.0,-144.840752,142.39624,142.39624,-144.840752,0.0,106.117522,0.0,0.0,...,0.0,0.0,0.0,737.847663,0.8,True,21.560229,92.521308,92.521308,7.478692
fluxes,0.0,0.0,-146.651262,144.176193,144.176193,-146.651262,0.0,107.443991,0.0,0.0,...,0.0,0.0,0.0,747.070759,0.81,True,21.560229,92.521308,92.521308,7.478692
fluxes,0.0,0.0,-148.461771,145.956146,145.956146,-148.461771,0.0,108.77046,0.0,0.0,...,0.0,0.0,0.0,756.293855,0.82,True,21.560229,92.521308,92.521308,7.478692
fluxes,0.0,0.0,-150.27228,147.736099,147.736099,-150.27228,0.0,110.096929,0.0,0.0,...,0.0,0.0,0.0,765.51695,0.83,True,21.560229,92.521308,92.521308,7.478692
fluxes,0.0,0.0,-152.08279,149.516052,149.516052,-152.08279,0.0,111.423398,0.0,0.0,...,0.0,0.0,0.0,774.740046,0.84,True,21.560229,92.521308,92.521308,7.478692
