# Stage One: Dynamic Differences (Revised Version)

Modifed version of supplementary material from "Brunk, Elizabeth,et al. "Characterizing strain variation in engineered *E. coli* using a multi-omics-based workflow." Cell Systems 2.5 (2016): 335-346.

In [1]:
import os
%matplotlib inline

# COBRA (genome-scale modeling) module and tools
import cobra
import cobra.io
from cobra import Model, Reaction, Metabolite
from cobra.io.mat import load_matlab_model
from cobra.io.mat import save_matlab_model
from scipy.io import loadmat, savemat
from cobra.flux_analysis import variability
import theseus   # 

# Panda python module for dataframe and data storage/manipulation
import pandas as pd
pd.set_option('mode.use_inf_as_na',True)
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 999)
pd.set_option('precision', 3)

# seaborn visualization tools
import seaborn as sns
sns.set(style="white")
c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 = sns.color_palette("Set2", 10)
b1, b2, b3, b4, b5, b6 = sns.color_palette("Blues")

# other
import warnings
warnings.simplefilter('ignore', DeprecationWarning)

# statistical and math toolkits
import numpy as np
import scipy.io
print("I'm using scipy version:" )
scipy.__version__

pd.set_option('display.max_rows', 1000)

I'm using scipy version:


## Loading All Metabolomics Data

In [2]:
EC_data = './data/'

# isopentenol producing strains
I1_data = pd.read_csv(EC_data+'I1.csv',index_col=0)
I2_data = pd.read_csv(EC_data+'I2.csv',index_col=0)
I3_data = pd.read_csv(EC_data+'I3.csv',index_col=0)

# limonene producing strains
L1_data = pd.read_csv(EC_data+'L1.csv',index_col=0)
L2_data = pd.read_csv(EC_data+'L2.csv',index_col=0)
L3_data = pd.read_csv(EC_data+'L3.csv',index_col=0)

# bisabolene producing strains
B1_data = pd.read_csv(EC_data+'B1.csv',index_col=0)
B2_data = pd.read_csv(EC_data+'B2.csv',index_col=0)

# wild-type strain
DH1_data = pd.read_csv(EC_data+'DH1.csv',index_col=0)

# Dictionary of all strain data
met_data ={'I1':I1_data, 'I2':I2_data, 'I3':I3_data, 'L1':L1_data, 'L2':L2_data, 'L3':L3_data, 'B1':B1_data, 'B2':B2_data, 'DH1':DH1_data}

# Pandas dataframe of all strain data
DF_metabolite_conc_all = pd.DataFrame()
for strain in met_data.keys():
    DF_metabolite_conc_all = pd.concat([DF_metabolite_conc_all, met_data[strain]])
DF_metabolite_conc_all

Unnamed: 0,Hour,Strain,Sample,OD600,Intracellular volume / sample,glc__D_e,pyr_e,succ_e,lac__D_e,for_e,ac_e,ipoh_e,bis_e,lim_e,aacoa_c,hmgcoa_c,mev_R_c,5pmev_c,ipdp_c,ip_c,grdp_c,frdp_c,amp_c,adp_c,atp_c,nad_c,nadp_c,accoa_c,fdp_c,3pg_c,pep_c,pyr_c,lac__D_c,fum_c,succ_c,oxalcoa_c,mal__L_c,akg_c,acon__C_c,cit_c,icit_c,glx_c,glyclt_c,dxyl5p_c,2me4p,h2mb4p_c,2mecdp_c,4c2me_c,gly_c,ala__L_c,ser__L_c,pro__L_c,val__L_c,thr__L_c,cys__L_c,leu__L_c,ile__L_c,asn__L_c,asp__L_c,gln__L_c,lys__L_c,glu__L_c,met__L_c,his__L_c,phe__L_c,arg__L_c,tyr__L_c,trp__L_c,Cystine,ex_con.id,ip_e,2mecdp_e,mev_R_e,dxyl5p_e,5pmev_e,h2mb4p_e,grdp_e,frdp_e,ggdp_e,4c2me_e,coa_e,accoa_e,aacoa_e,hmgcoa_e,amp_e,adp_e,atp_e,nad_e,nadh_e,nadp_e,nadph_e,fold_production
0,0,I1,1,1.66,8.3e-07,58.764,0.5597,0.097,0.582,1.406,7.697,0.0,0.0,0.0,0,0.0,3.48e-06,1.65e-05,0.002641,0.0,0.0,0.0,0.0001714,7.17e-05,0.0002214,0.0004793,6.08e-05,0.0001969,0.004755,0.00131,2.38e-05,0.002564,0.014,0.0004481,0.002844,0,0.0003374,3.9e-05,1.71e-05,5.15e-05,1.1e-06,0.0002703,3.69e-05,5.71e-05,0,0,0.0003925,2.93e-05,0.002906,0.001211,0.01365,0.001991,0.0009097,0.0009989,0,0.001498,0.001101,0.005063,0.0,0.0007755,0.0006917,0.01174,0.0002882,0.0006442,0.003269,0.003,0.001517,0.0003734,0,9.64e-05,0.0009263,0.0,0.0,0.0,0.0,0.034,0.0,0.0,0.0,0.0,0.0002085,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0,1
1,2,I1,10,0.0,0.0,52.737,0.05809,1.987,4.418,5.487,15.661,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0,1
2,4,I1,19,2.24,5.6e-07,48.139,0.04464,1.906,6.749,2.06,18.43,0.692,0.0,0.0,0,6.89e-05,0.0002753,0.0009387,0.01825,0.003969,0.0,0.0,0.0003153,0.0001891,0.0003791,0.001829,0.0002212,0.0004923,0.005343,0.002267,5.04e-05,0.0001671,0.124,0.0005649,0.005859,0,0.0004579,2.81e-05,6.22e-05,7.63e-05,4.67e-06,0.0004232,0.0001011,0.000159,0,0,0.0009146,1.9e-05,0.003544,0.001733,0.0001958,0.007942,0.0008314,8.19e-05,0,0.001719,0.0006488,0.0004955,0.0,0.0003902,0.0008716,0.009917,0.0001211,0.0009417,0.003653,0.006,0.001426,0.0003711,0,0.004779,0.03391,0.0002398,0.007932,0.0,0.0,0.0,0.0,0.0,0.00125,0.0009361,0.0,0.0002136,0,0.002,0.0,0,0,0.0,0,0.0,0,1
3,6,I1,28,0.0,0.0,44.761,0.1108,1.936,9.657,2.775,20.056,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.00465,0.04859,0.0006099,0.01194,0.0,0.0,0.0,0.0009151,0.0,0.01008,0.001396,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0,1
4,8,I1,37,2.69,6.73e-07,40.254,0.7193,1.994,13.328,2.925,21.697,1.275,0.0,0.0,0,0.0,0.0005951,0.001216,0.01716,0.003298,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003918,0.001785,4.38e-05,0.001946,0.213,0.0005233,0.005969,0,0.0004227,1.52e-05,8.63e-05,7.52e-05,3.52e-06,0.0003312,9.5e-05,0.0001411,0,0,0.0007889,2.56e-05,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.004413,0.02682,0.0,0.02164,0.0,0.0003952,0.0,0.002255,0.0,0.004893,0.002627,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0,1
5,10,I1,46,0.0,0.0,37.703,0.8736,1.987,14.226,3.534,23.414,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.008866,0.03183,0.003893,0.027,0.0,0.0007721,0.0,0.003241,0.0,0.007019,0.001926,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0,1
6,12,I1,55,2.816,7.04e-07,37.547,0.8706,1.933,14.142,3.3,25.549,1.333,0.0,0.0,0,5.35e-05,0.000523,0.002647,0.03273,0.004269,4.49e-06,0.0,0.0002303,8.62e-05,0.0001572,0.001931,0.0002648,0.0004491,0.001501,0.0,2.49e-05,0.0003411,0.149,0.0005168,0.002019,0,0.0004706,0.0,5.9e-05,4.73e-05,3.36e-06,0.000289,0.000134,2.35e-05,0,0,0.0002686,1.62e-05,0.001472,0.001696,6.68e-05,0.00169,0.000491,9.56e-05,0,0.001018,0.0002651,0.0002044,0.0,0.0,0.001072,0.0,0.0,0.0003559,0.002655,0.004,0.0008289,0.0002917,0,0.01832,0.07934,0.002336,0.03146,0.0,0.0008422,0.0,0.003418,0.0,0.01176,0.002821,0.0,0.000259,0,0.002,0.0,0,0,0.0,0,0.0,0,1
7,16,I1,64,0.0,0.0,34.698,0.8346,1.846,13.614,3.443,26.765,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.02062,0.07171,0.001064,0.03466,0.0,0.000893,0.0,0.003666,0.0,0.00286,0.003106,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0,1
8,18,I1,73,2.865,7.16e-07,34.524,0.7727,1.915,13.782,3.166,29.093,1.434,0.0,0.0,0,9.14e-05,0.0006733,0.001759,0.02458,0.002493,4.08e-06,0.0,0.0001501,6.68e-05,9.69e-05,0.001697,0.0001824,0.0003434,0.0,0.0,2.39e-05,0.0005016,0.161,0.0006246,0.0009084,0,0.0006386,0.0,6.89e-05,5.08e-05,2.84e-06,0.0003071,0.0001277,1.27e-05,0,0,0.0002744,0.0,0.001508,0.001774,0.0,0.0012,0.0004968,0.0001005,0,0.001109,0.0002838,0.0,0.0,0.0,0.001,0.0,0.0,0.0002217,0.002702,0.004,0.0006643,0.0003047,0,0.01617,0.06951,0.0009671,0.03379,0.0,0.001317,0.0,0.002962,0.0,0.003625,0.003081,0.0,0.0003354,0,0.001,0.0,0,0,0.0,0,0.0,0,1
9,20,I1,82,0.0,0.0,33.218,0.6427,1.881,13.338,3.121,30.259,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0132,0.1012,0.005215,0.03522,0.0,0.001732,0.0,0.004877,0.0,0.004184,0.002308,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0,1


### Reading I1 Metabolomics Data

In [3]:
I1_data = pd.read_csv(EC_data+'I1.csv',index_col=0)
I1_data

Unnamed: 0,Hour,Strain,Sample,OD600,Intracellular volume / sample,glc__D_e,pyr_e,succ_e,lac__D_e,for_e,ac_e,ipoh_e,bis_e,lim_e,aacoa_c,hmgcoa_c,mev_R_c,5pmev_c,ipdp_c,ip_c,grdp_c,frdp_c,amp_c,adp_c,atp_c,nad_c,nadp_c,accoa_c,fdp_c,3pg_c,pep_c,pyr_c,lac__D_c,fum_c,succ_c,oxalcoa_c,mal__L_c,akg_c,acon__C_c,cit_c,icit_c,glx_c,glyclt_c,dxyl5p_c,2me4p,h2mb4p_c,2mecdp_c,4c2me_c,gly_c,ala__L_c,ser__L_c,pro__L_c,val__L_c,thr__L_c,cys__L_c,leu__L_c,ile__L_c,asn__L_c,asp__L_c,gln__L_c,lys__L_c,glu__L_c,met__L_c,his__L_c,phe__L_c,arg__L_c,tyr__L_c,trp__L_c,Cystine,ex_con.id,ip_e,2mecdp_e,mev_R_e,dxyl5p_e,5pmev_e,h2mb4p_e,grdp_e,frdp_e,ggdp_e,4c2me_e,coa_e,accoa_e,aacoa_e,hmgcoa_e,amp_e,adp_e,atp_e,nad_e,nadh_e,nadp_e,nadph_e,fold_production
0,0,I1,1,1.66,8.3e-07,58.764,0.56,0.097,0.582,1.406,7.697,0.0,0,0,0,0.0,3.48e-06,1.65e-05,0.003,0.0,0.0,0,0.0001714,7.17e-05,0.0002214,0.0004793,6.08e-05,0.0001969,0.005,0.001,2.38e-05,0.002564,0.014,0.0004481,0.002844,0,0.0003374,3.9e-05,1.71e-05,5.15e-05,1.1e-06,0.0002703,3.69e-05,5.71e-05,0,0,0.0003925,2.93e-05,0.002906,0.001,0.01365,0.001991,0.0009097,0.0009989,0,0.001,0.001101,0.005063,0,0.0007755,0.0006917,0.012,0.0002882,0.0006442,0.003,0.003,0.001517,0.0003734,0,9.64e-05,0.0009263,0.0,0.0,0,0.0,0.034,0.0,0,0.0,0.0,0.0002085,0.0,0,0.0,0.0,0,0,0,0,0,0,1
1,2,I1,10,0.0,0.0,52.737,0.058,1.987,4.418,5.487,15.661,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0,0,0,0,1
2,4,I1,19,2.24,5.6e-07,48.139,0.045,1.906,6.749,2.06,18.43,0.692,0,0,0,6.89e-05,0.0002753,0.0009387,0.018,0.003969,0.0,0,0.0003153,0.0001891,0.0003791,0.001829,0.0002212,0.0004923,0.005,0.002,5.04e-05,0.0001671,0.124,0.0005649,0.005859,0,0.0004579,2.81e-05,6.22e-05,7.63e-05,4.67e-06,0.0004232,0.0001011,0.000159,0,0,0.0009146,1.9e-05,0.003544,0.002,0.0001958,0.007942,0.0008314,8.19e-05,0,0.002,0.0006488,0.0004955,0,0.0003902,0.0008716,0.01,0.0001211,0.0009417,0.004,0.006,0.001426,0.0003711,0,0.004779,0.03391,0.0002398,0.008,0,0.0,0.0,0.0,0,0.001,0.0009361,0.0,0.0002136,0,0.002,0.0,0,0,0,0,0,0,1
3,6,I1,28,0.0,0.0,44.761,0.111,1.936,9.657,2.775,20.056,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.00465,0.04859,0.0006099,0.012,0,0.0,0.0,0.0009151,0,0.01,0.001396,0.0,0.0,0,0.0,0.0,0,0,0,0,0,0,1
4,8,I1,37,2.69,6.73e-07,40.254,0.719,1.994,13.328,2.925,21.697,1.275,0,0,0,0.0,0.0005951,0.001216,0.017,0.003298,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.002,4.38e-05,0.001946,0.213,0.0005233,0.005969,0,0.0004227,1.52e-05,8.63e-05,7.52e-05,3.52e-06,0.0003312,9.5e-05,0.0001411,0,0,0.0007889,2.56e-05,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.004413,0.02682,0.0,0.022,0,0.0003952,0.0,0.002255,0,0.005,0.002627,0.0,0.0,0,0.0,0.0,0,0,0,0,0,0,1
5,10,I1,46,0.0,0.0,37.703,0.874,1.987,14.226,3.534,23.414,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.008866,0.03183,0.003893,0.027,0,0.0007721,0.0,0.003241,0,0.007,0.001926,0.0,0.0,0,0.0,0.0,0,0,0,0,0,0,1
6,12,I1,55,2.816,7.04e-07,37.547,0.871,1.933,14.142,3.3,25.549,1.333,0,0,0,5.35e-05,0.000523,0.002647,0.033,0.004269,4.49e-06,0,0.0002303,8.62e-05,0.0001572,0.001931,0.0002648,0.0004491,0.002,0.0,2.49e-05,0.0003411,0.149,0.0005168,0.002019,0,0.0004706,0.0,5.9e-05,4.73e-05,3.36e-06,0.000289,0.000134,2.35e-05,0,0,0.0002686,1.62e-05,0.001472,0.002,6.68e-05,0.00169,0.000491,9.56e-05,0,0.001,0.0002651,0.0002044,0,0.0,0.001072,0.0,0.0,0.0003559,0.003,0.004,0.0008289,0.0002917,0,0.01832,0.07934,0.002336,0.031,0,0.0008422,0.0,0.003418,0,0.012,0.002821,0.0,0.000259,0,0.002,0.0,0,0,0,0,0,0,1
7,16,I1,64,0.0,0.0,34.698,0.835,1.846,13.614,3.443,26.765,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.02062,0.07171,0.001064,0.035,0,0.000893,0.0,0.003666,0,0.003,0.003106,0.0,0.0,0,0.0,0.0,0,0,0,0,0,0,1
8,18,I1,73,2.865,7.16e-07,34.524,0.773,1.915,13.782,3.166,29.093,1.434,0,0,0,9.14e-05,0.0006733,0.001759,0.025,0.002493,4.08e-06,0,0.0001501,6.68e-05,9.69e-05,0.001697,0.0001824,0.0003434,0.0,0.0,2.39e-05,0.0005016,0.161,0.0006246,0.0009084,0,0.0006386,0.0,6.89e-05,5.08e-05,2.84e-06,0.0003071,0.0001277,1.27e-05,0,0,0.0002744,0.0,0.001508,0.002,0.0,0.0012,0.0004968,0.0001005,0,0.001,0.0002838,0.0,0,0.0,0.001,0.0,0.0,0.0002217,0.003,0.004,0.0006643,0.0003047,0,0.01617,0.06951,0.0009671,0.034,0,0.001317,0.0,0.002962,0,0.004,0.003081,0.0,0.0003354,0,0.001,0.0,0,0,0,0,0,0,1
9,20,I1,82,0.0,0.0,33.218,0.643,1.881,13.338,3.121,30.259,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0132,0.1012,0.005215,0.035,0,0.001732,0.0,0.004877,0,0.004,0.002308,0.0,0.0,0,0.0,0.0,0,0,0,0,0,0,1


In [4]:
met_data

{'I1':     Hour Strain  Sample  OD600  Intracellular volume / sample  glc__D_e  \
 0      0     I1       1  1.660                      8.300e-07    58.764   
 1      2     I1      10  0.000                      0.000e+00    52.737   
 2      4     I1      19  2.240                      5.600e-07    48.139   
 3      6     I1      28  0.000                      0.000e+00    44.761   
 4      8     I1      37  2.690                      6.730e-07    40.254   
 5     10     I1      46  0.000                      0.000e+00    37.703   
 6     12     I1      55  2.816                      7.040e-07    37.547   
 7     16     I1      64  0.000                      0.000e+00    34.698   
 8     18     I1      73  2.865                      7.160e-07    34.524   
 9     20     I1      82  0.000                      0.000e+00    33.218   
 10    24     I1      91  2.763                      6.910e-07    33.674   
 11    36     I1     100  3.000                      7.500e-07    32.485   
 12   

### Drop the columns not used in metabolite analysis

In [5]:
DF_metabolite_conc_all = DF_metabolite_conc_all.drop(columns = ['Cystine','Intracellular volume / sample','OD600','Sample','fold_production'])
print(' ')
print("List of strains in pandas metabolite dataframe:", DF_metabolite_conc_all.Strain.unique().tolist())
DF_metabolite_conc_all[0:10] # Show all the column names that remain

 
List of strains in pandas metabolite dataframe: ['I1', 'I2', 'I3', 'L1', 'L2', 'L3', 'B1', 'B2', 'DH1']


Unnamed: 0,Hour,Strain,glc__D_e,pyr_e,succ_e,lac__D_e,for_e,ac_e,ipoh_e,bis_e,lim_e,aacoa_c,hmgcoa_c,mev_R_c,5pmev_c,ipdp_c,ip_c,grdp_c,frdp_c,amp_c,adp_c,atp_c,nad_c,nadp_c,accoa_c,fdp_c,3pg_c,pep_c,pyr_c,lac__D_c,fum_c,succ_c,oxalcoa_c,mal__L_c,akg_c,acon__C_c,cit_c,icit_c,glx_c,glyclt_c,dxyl5p_c,2me4p,h2mb4p_c,2mecdp_c,4c2me_c,gly_c,ala__L_c,ser__L_c,pro__L_c,val__L_c,thr__L_c,cys__L_c,leu__L_c,ile__L_c,asn__L_c,asp__L_c,gln__L_c,lys__L_c,glu__L_c,met__L_c,his__L_c,phe__L_c,arg__L_c,tyr__L_c,trp__L_c,ex_con.id,ip_e,2mecdp_e,mev_R_e,dxyl5p_e,5pmev_e,h2mb4p_e,grdp_e,frdp_e,ggdp_e,4c2me_e,coa_e,accoa_e,aacoa_e,hmgcoa_e,amp_e,adp_e,atp_e,nad_e,nadh_e,nadp_e,nadph_e
0,0,I1,58.764,0.56,0.097,0.582,1.406,7.697,0.0,0.0,0.0,0,0.0,3.48e-06,1.65e-05,0.003,0.0,0.0,0.0,0.0001714,7.17e-05,0.0002214,0.0004793,6.08e-05,0.0001969,0.005,0.001,2.38e-05,0.002564,0.014,0.0004481,0.002844,0,0.0003374,3.9e-05,1.71e-05,5.15e-05,1.1e-06,0.0002703,3.69e-05,5.71e-05,0,0,0.0003925,2.93e-05,0.003,0.001,0.01365,0.002,0.0009097,0.0009989,0,0.001,0.001101,0.005063,0.0,0.0007755,0.0006917,0.012,0.0002882,0.0006442,0.003,0.003,0.001517,0.0003734,9.64e-05,0.0009263,0.0,0.0,0.0,0.0,0.034,0.0,0.0,0.0,0.0,0.0002085,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0
1,2,I1,52.737,0.058,1.987,4.418,5.487,15.661,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0
2,4,I1,48.139,0.045,1.906,6.749,2.06,18.43,0.692,0.0,0.0,0,6.89e-05,0.0002753,0.0009387,0.018,0.004,0.0,0.0,0.0003153,0.0001891,0.0003791,0.001829,0.0002212,0.0004923,0.005,0.002,5.04e-05,0.0001671,0.124,0.0005649,0.005859,0,0.0004579,2.81e-05,6.22e-05,7.63e-05,4.67e-06,0.0004232,0.0001011,0.000159,0,0,0.0009146,1.9e-05,0.004,0.002,0.0001958,0.008,0.0008314,8.19e-05,0,0.002,0.0006488,0.0004955,0.0,0.0003902,0.0008716,0.01,0.0001211,0.0009417,0.004,0.006,0.001426,0.0003711,0.004779,0.03391,0.0002398,0.008,0.0,0.0,0.0,0.0,0.0,0.001,0.0009361,0.0,0.0002136,0,0.002,0.0,0,0,0.0,0,0.0,0
3,6,I1,44.761,0.111,1.936,9.657,2.775,20.056,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00465,0.04859,0.0006099,0.012,0.0,0.0,0.0,0.0009151,0.0,0.01,0.001396,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0
4,8,I1,40.254,0.719,1.994,13.328,2.925,21.697,1.275,0.0,0.0,0,0.0,0.0005951,0.001216,0.017,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.002,4.38e-05,0.001946,0.213,0.0005233,0.005969,0,0.0004227,1.52e-05,8.63e-05,7.52e-05,3.52e-06,0.0003312,9.5e-05,0.0001411,0,0,0.0007889,2.56e-05,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004413,0.02682,0.0,0.022,0.0,0.0003952,0.0,0.002255,0.0,0.005,0.002627,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0
5,10,I1,37.703,0.874,1.987,14.226,3.534,23.414,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008866,0.03183,0.003893,0.027,0.0,0.0007721,0.0,0.003241,0.0,0.007,0.001926,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0
6,12,I1,37.547,0.871,1.933,14.142,3.3,25.549,1.333,0.0,0.0,0,5.35e-05,0.000523,0.002647,0.033,0.004,4.49e-06,0.0,0.0002303,8.62e-05,0.0001572,0.001931,0.0002648,0.0004491,0.002,0.0,2.49e-05,0.0003411,0.149,0.0005168,0.002019,0,0.0004706,0.0,5.9e-05,4.73e-05,3.36e-06,0.000289,0.000134,2.35e-05,0,0,0.0002686,1.62e-05,0.001,0.002,6.68e-05,0.002,0.000491,9.56e-05,0,0.001,0.0002651,0.0002044,0.0,0.0,0.001072,0.0,0.0,0.0003559,0.003,0.004,0.0008289,0.0002917,0.01832,0.07934,0.002336,0.031,0.0,0.0008422,0.0,0.003418,0.0,0.012,0.002821,0.0,0.000259,0,0.002,0.0,0,0,0.0,0,0.0,0
7,16,I1,34.698,0.835,1.846,13.614,3.443,26.765,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02062,0.07171,0.001064,0.035,0.0,0.000893,0.0,0.003666,0.0,0.003,0.003106,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0
8,18,I1,34.524,0.773,1.915,13.782,3.166,29.093,1.434,0.0,0.0,0,9.14e-05,0.0006733,0.001759,0.025,0.002,4.08e-06,0.0,0.0001501,6.68e-05,9.69e-05,0.001697,0.0001824,0.0003434,0.0,0.0,2.39e-05,0.0005016,0.161,0.0006246,0.0009084,0,0.0006386,0.0,6.89e-05,5.08e-05,2.84e-06,0.0003071,0.0001277,1.27e-05,0,0,0.0002744,0.0,0.002,0.002,0.0,0.001,0.0004968,0.0001005,0,0.001,0.0002838,0.0,0.0,0.0,0.001,0.0,0.0,0.0002217,0.003,0.004,0.0006643,0.0003047,0.01617,0.06951,0.0009671,0.034,0.0,0.001317,0.0,0.002962,0.0,0.004,0.003081,0.0,0.0003354,0,0.001,0.0,0,0,0.0,0,0.0,0
9,20,I1,33.218,0.643,1.881,13.338,3.121,30.259,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0132,0.1012,0.005215,0.035,0.0,0.001732,0.0,0.004877,0.0,0.004,0.002308,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0


## Loading Proteomics Data

In [6]:
I1_prot_data_cobra = pd.read_csv(EC_data+'I1_prot.csv',index_col=0)
I2_prot_data_cobra = pd.read_csv(EC_data+'I2_prot.csv',index_col=0)
I3_prot_data_cobra = pd.read_csv(EC_data+'I3_prot.csv',index_col=0)
L1_prot_data_cobra = pd.read_csv(EC_data+'L1_prot.csv',index_col=0)
L2_prot_data_cobra = pd.read_csv(EC_data+'L2_prot.csv',index_col=0)
L3_prot_data_cobra = pd.read_csv(EC_data+'L3_prot.csv',index_col=0)
B1_prot_data_cobra = pd.read_csv(EC_data+'B1_prot.csv',index_col=0)
B2_prot_data_cobra = pd.read_csv(EC_data+'B2_prot.csv',index_col=0)
DH1_prot_data_cobra = pd.read_csv(EC_data+'DH1_prot.csv',index_col=0)

prot_data ={'I1':I1_prot_data_cobra, 'I2':I2_prot_data_cobra, 'I3':I3_prot_data_cobra, 
            'L1':L1_prot_data_cobra, 'L2':L2_prot_data_cobra, 'L3':L3_prot_data_cobra, 
            'B1':B1_prot_data_cobra, 'B2':B2_prot_data_cobra, 'DH1':DH1_prot_data_cobra }

In [7]:
print(' ')
print("Example dataframe of wild-type data:")
DH1_prot_data_cobra[0:10]

 
Example dataframe of wild-type data:


Unnamed: 0,ACACT1r,BISS,HMGR,HMGS,IPDDI,GRTT,LIMS,ERG1,NudB,MVD1,ERG8,GND,FRD2,PPC,FUM,PFK,SUCDi,FDH4pp,ICDHyr,PFL,ACKr,ENO,PGI,PGK,RPI,SUCOAS,TPI,TALA,FRD3,FBA,GAPD,FBP,PTAr,PDH,ACALD,FHL,CS,G6PDH2r,PYK,AKGDH,PDXPP,PPCK,PPS,ACONTa,ME1,TKT1,ACS,TKT2,MDH2,ACONTb,PGM,MDH,ME2,YPL069C,GLYCL
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.771,-0.433,0.572,1.537,-1.497,-0.25,-2.133,-1.919,-0.953,-1.036,-1.82,-0.086,-0.121,0.906,-0.608,1.073,-0.597,2.134,-0.202,-0.768,-1.063,0.428,0.613,0.374,0.456,-0.301,1.424,1.579,0.032,0.314,0.204,-0.893,-0.594,-0.775,0.361,-0.237,0.262,0.159,0.003,-0.634,-0.491,-1.089,0.197,2.092,0.081,-0.16,0.038,3.464,-1.14,-0.179,0.318,0.718,-0.466,-2.447,-0.775
8,-0.051,-0.082,-0.359,1.408,-1.755,-0.057,-2.326,-2.799,0.124,-2.0,-2.758,0.206,0.281,1.21,-0.301,1.234,-0.786,2.25,-0.004,-0.317,-0.913,0.677,0.91,0.59,0.51,-0.151,1.731,1.742,0.537,0.538,0.402,-0.44,-0.398,-0.61,0.646,-0.302,0.546,0.353,0.229,-0.473,-0.903,-0.836,0.375,2.452,0.189,-0.017,0.123,3.665,-0.842,-0.16,0.622,0.803,-0.333,-2.265,-0.61
12,-0.904,-0.363,0.616,1.591,-1.797,0.384,-2.655,-1.61,0.172,-0.898,-2.929,-0.145,0.08,1.135,-0.273,1.099,-0.637,2.475,-0.152,-0.452,-1.091,0.535,0.593,0.449,0.227,-0.314,1.572,1.722,0.046,0.315,0.222,-0.502,-0.689,-0.626,0.503,0.108,0.434,0.208,0.053,-0.586,-0.878,-1.023,0.333,2.18,0.071,-0.186,0.204,3.5,-1.045,-0.538,0.465,0.611,-0.375,-2.594,-0.626
18,-1.068,0.078,-0.935,1.184,-2.061,0.39,-2.705,-2.195,-1.383,-0.898,-5.714,0.036,0.07,1.016,-0.595,1.169,-0.909,2.041,0.06,-0.299,-0.946,0.626,0.715,0.525,0.277,-0.401,1.707,1.716,0.095,0.413,0.303,-0.68,-0.457,-0.563,0.563,0.023,0.422,0.305,0.162,-0.551,-1.02,-1.006,0.336,2.348,0.252,-0.033,0.1,3.659,-0.97,-0.73,0.489,0.663,-0.37,-3.285,-0.563
24,-0.314,0.753,-0.702,1.23,-1.684,0.11,-3.018,-2.585,0.093,-1.427,-3.48,-0.015,0.145,1.116,-0.356,1.164,-1.774,2.205,0.011,-0.101,-0.92,0.616,0.847,0.51,0.143,-0.208,1.59,1.69,0.524,0.362,0.175,-0.651,-0.414,-0.518,0.554,0.036,0.45,0.323,0.331,-0.472,-0.941,-0.725,0.37,2.288,0.221,-0.123,0.402,3.586,-1.095,-0.816,0.488,0.74,-0.43,-2.867,-0.518
36,-1.214,0.176,0.96,0.85,-2.231,0.393,-2.911,-1.662,-1.131,-0.589,-4.548,0.11,0.047,1.18,-0.266,1.132,-0.664,1.973,-0.08,-0.412,-1.095,0.624,0.828,0.495,0.105,-0.095,1.568,1.74,0.532,0.41,0.127,-0.613,-0.346,-0.571,0.63,-0.045,0.463,0.235,0.321,-0.438,-0.993,-0.649,0.383,2.271,0.29,-0.222,0.147,3.619,-0.943,-0.802,0.542,0.771,-0.114,-2.551,-0.571
48,-0.585,0.245,-0.082,1.12,-1.813,0.216,-2.231,-2.489,-0.674,-0.803,-3.216,0.14,0.143,1.174,-0.434,1.371,-0.66,2.037,0.118,0.027,-0.796,0.721,0.925,0.593,0.112,-0.077,1.627,1.8,0.512,0.527,0.312,-0.618,-0.472,-0.563,0.661,0.076,0.578,0.293,0.394,-0.418,-0.841,-0.705,0.621,2.399,0.11,-0.069,-0.093,3.737,-0.669,-0.617,0.577,0.811,-0.22,-2.646,-0.563
72,0.049,0.688,-0.082,1.354,-2.041,0.076,-3.941,-1.421,-0.854,-1.313,-3.258,-0.03,0.068,0.999,-0.613,1.082,-3.178,2.172,-0.116,-0.17,-1.144,0.557,0.759,0.489,-0.075,-0.38,1.455,1.596,0.184,0.328,0.072,-0.701,-0.537,-0.816,0.454,0.087,0.379,0.229,0.225,-0.638,-1.163,-0.854,0.367,2.198,0.046,-0.23,-0.237,3.459,-1.217,-0.746,0.383,0.608,-0.616,-3.244,-0.816


In [8]:
prot_data

{'I1':     ACACT1r   BISS   HMGR   HMGS  IPDDI   GRTT   LIMS   ERG1   NudB   MVD1  \
 0     0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000   
 4     8.754 -0.043  0.526  4.998  0.730  0.131  0.216  1.088  6.746  5.941   
 8     8.643  0.339 -0.372  4.881  0.898 -0.252 -0.047  0.985  6.653  5.788   
 12    8.839  0.322  0.067  5.113  1.607  0.188  1.255  1.220  6.828  6.050   
 18    8.710 -0.387 -0.076  4.857  0.447 -0.153 -0.047  0.920  6.735  5.806   
 24    8.753  0.160 -0.196  4.988  1.199 -0.052  0.600  0.891  6.717  5.865   
 36    8.699  0.667 -0.371  4.916  1.155 -0.177 -1.147  0.920  6.620  5.887   
 48    9.106  1.011  0.236  5.368  1.596  0.258  0.600  1.293  7.155  6.260   
 72    8.696  0.497 -0.231  5.037  1.227 -0.017  0.403  0.825  6.721  5.998   
 
      ERG8    GND   FRD2    PPC    FUM    PFK  SUCDi  FDH4pp  ICDHyr    PFL  \
 0   0.000  0.000  0.000  0.000  0.000  0.000  0.000   0.000   0.000  0.000   
 4   1.251  0.104  0.401  0.892  0.583  1.44

## Compute errors from triplicate measurements in wild-type and isopentenol strains

In [9]:
# estimate error (%RSD) from triplicate proteomic and metabolomic data sets:

endo_data = pd.read_csv('./data/endometabolomic_triplicate_data.csv',index_col=0)
exo_data = pd.read_csv('./data/exometabolomic_triplicate_data.csv',index_col=0)

DF_endo_avg_metabolomics_cobra_conc = pd.read_csv('./data/endometabolomic_error_estimates.csv',index_col=0)
DF_exo_avg_metabolomics_cobra_conc = pd.read_csv('./data/exometabolomic_error_estimates.csv',index_col=0)

proteomics_triplicate_data = pd.read_csv('./data/proteomic_triplicate_data.csv',index_col=0)
DF_proteomics_avg_std_rsd_peak_areas = pd.read_csv('./data/proteomic_triplicate_data.csv', index_col=0)

### Metabolomics data

#### Intracellular metabolomics triplicate data

In [10]:
endo_data[0:30]

Unnamed: 0,Strain,Hour,Replicate,OD600,pyr_c,oaa_c,lac__D_c,succ_c,mal__L_c,akg_c,acon__C_c,cit_c,mev_R_c,ip_c,dxyl5p_c,2me4p_c,5dpmev_c,ex_con.id,h2mb4p_c,2mecdp_c,4c2me_c,gly_c,ala__L_c,pro__L_c,val__L_c,Betaine,thr__L_c,cys__L_c,leu__L_c,ile__L_c,asn__L_c,asp__L_c,gln__L_c,lys__L_p,glu__L_c,met__L_c,his__L_c,phe__L_c,arg__L_c,tyr__L_c,trp__L_c
0,WT,0.0,1,0.714,12.242,0.176,10.847,3.382,1.257,0.097,0.0,0.234,0.0,0.0,0.049,0.0,0.0,0.003,0,0.0,0.023,8.819,3.408,6.86,2.472,1.586,1.61,0.044,7.432,7.83,8.354,2.846,3.249,32.057,9.4,2.499,3.483,9.085,20.919,2.079,1.899
1,WT,0.0,2,0.708,19.639,0.299,18.745,6.009,1.834,0.18,0.0,0.496,0.0,0.0,0.084,0.0,0.0,0.006,0,0.0,0.042,11.611,4.149,8.531,2.791,2.061,2.121,0.069,8.796,9.894,10.485,7.397,3.197,33.746,10.692,3.259,3.909,11.441,21.577,2.841,2.397
2,WT,0.0,3,0.71,12.125,0.32,11.323,4.436,1.503,0.182,0.0,0.468,0.0,0.0,0.077,0.0,0.0,0.004,0,0.0,0.042,8.236,3.317,6.284,2.238,1.67,1.44,0.048,6.548,7.293,6.706,5.018,2.597,34.152,9.397,2.519,3.454,8.043,19.597,1.896,1.658
3,I1,0.0,1,0.501,8.884,0.256,9.971,2.039,1.046,0.098,0.0,0.155,0.0,0.092,0.015,0.0,0.0,0.42,0,0.0,0.009,12.794,4.335,8.959,3.004,2.586,2.798,0.053,9.999,10.751,12.694,14.577,3.939,36.418,11.574,3.595,4.048,12.171,22.391,2.58,2.493
4,I1,0.0,2,0.471,15.926,0.255,17.846,3.74,1.61,0.234,0.0,0.356,0.0,0.162,0.036,0.0,0.009,0.855,0,0.0,0.014,12.557,4.292,8.923,3.024,2.563,2.689,0.048,10.329,10.477,13.665,14.435,3.9,35.78,11.727,3.636,3.96,12.187,21.529,2.584,2.481
5,I1,0.0,3,0.502,12.259,0.309,15.11,3.336,1.483,0.212,0.0,0.384,0.0,0.183,0.038,0.0,0.009,0.837,0,0.0,0.02,9.081,3.371,6.785,2.418,2.201,1.956,0.043,7.462,7.811,8.821,7.833,3.619,34.352,10.378,2.728,3.467,9.034,19.962,1.969,1.881
6,I2,0.0,1,0.519,15.118,0.29,17.621,4.161,1.712,0.181,0.0,0.376,0.0,0.07,0.062,0.035,0.0,0.059,0,0.0,0.031,11.898,4.037,8.407,2.948,2.338,2.461,0.045,9.656,9.934,10.907,11.143,3.698,34.753,10.79,3.386,3.807,11.349,21.78,2.374,2.454
7,I2,0.0,2,0.572,16.617,0.383,24.912,4.726,1.822,0.16,0.0,0.409,0.0,0.062,0.055,0.037,0.005,0.066,0,0.0,0.035,11.259,4.056,8.158,2.796,2.376,2.371,0.057,8.705,9.49,10.18,9.134,3.376,35.733,10.527,3.212,3.702,11.052,21.387,2.597,2.356
8,I2,0.0,3,0.588,12.671,0.208,21.045,4.077,1.525,0.145,0.0,0.318,0.0,0.058,0.048,0.029,0.0,0.048,0,0.0,0.03,10.951,3.881,7.808,2.97,2.413,2.331,0.051,8.479,9.196,9.75,8.409,3.695,35.718,10.352,3.131,3.687,10.618,20.643,2.506,2.214
9,I3,0.0,1,0.443,13.343,0.23,17.665,3.501,1.611,0.182,0.0,0.325,0.0,0.072,0.045,0.029,0.004,0.069,0,0.0,0.02,13.266,4.243,8.896,3.179,2.612,2.783,0.037,10.585,10.655,13.145,12.44,4.68,36.168,10.809,3.692,3.875,12.053,22.086,2.419,2.225


Find triplicate values for a single strain

In [11]:
endo_data[endo_data.Strain == 'WT']

Unnamed: 0,Strain,Hour,Replicate,OD600,pyr_c,oaa_c,lac__D_c,succ_c,mal__L_c,akg_c,acon__C_c,cit_c,mev_R_c,ip_c,dxyl5p_c,2me4p_c,5dpmev_c,ex_con.id,h2mb4p_c,2mecdp_c,4c2me_c,gly_c,ala__L_c,pro__L_c,val__L_c,Betaine,thr__L_c,cys__L_c,leu__L_c,ile__L_c,asn__L_c,asp__L_c,gln__L_c,lys__L_p,glu__L_c,met__L_c,his__L_c,phe__L_c,arg__L_c,tyr__L_c,trp__L_c
0,WT,0.0,1,0.714,12.242,0.176,10.847,3.382,1.257,0.097,0.0,0.234,0.0,0.0,0.049,0.0,0.0,0.003,0,0.0,0.023,8.819,3.408,6.86,2.472,1.586,1.61,0.044,7.432,7.83,8.354,2.846,3.249,32.057,9.4,2.499,3.483,9.085,20.919,2.079,1.899
1,WT,0.0,2,0.708,19.639,0.299,18.745,6.009,1.834,0.18,0.0,0.496,0.0,0.0,0.084,0.0,0.0,0.006,0,0.0,0.042,11.611,4.149,8.531,2.791,2.061,2.121,0.069,8.796,9.894,10.485,7.397,3.197,33.746,10.692,3.259,3.909,11.441,21.577,2.841,2.397
2,WT,0.0,3,0.71,12.125,0.32,11.323,4.436,1.503,0.182,0.0,0.468,0.0,0.0,0.077,0.0,0.0,0.004,0,0.0,0.042,8.236,3.317,6.284,2.238,1.67,1.44,0.048,6.548,7.293,6.706,5.018,2.597,34.152,9.397,2.519,3.454,8.043,19.597,1.896,1.658
12,WT,0.5,1,1.049,7.022,0.706,14.435,6.013,1.165,0.353,0.04,0.763,0.0,0.0,0.082,0.035,0.0,0.004,0,0.0,0.061,10.283,3.882,7.545,2.324,2.725,2.161,0.058,8.338,7.871,6.653,2.648,1.734,34.45,11.766,2.667,3.425,9.543,20.451,1.805,1.667
13,WT,0.5,2,1.09,10.609,0.566,14.214,7.767,1.413,0.331,0.042,0.764,0.0,0.0,0.094,0.048,0.0,0.003,0,0.0,0.068,14.072,4.635,9.633,2.794,3.005,2.576,0.069,10.464,10.29,9.021,3.49,2.286,35.872,11.888,3.264,3.783,12.531,23.04,2.309,2.131
14,WT,0.5,3,1.088,6.66,0.556,10.812,6.886,1.158,0.266,0.042,0.699,0.0,0.0,0.084,0.041,0.0,0.003,0,0.0,0.065,12.157,4.195,8.589,2.477,3.072,2.341,0.0,9.38,8.908,7.476,3.08,1.562,35.972,13.519,3.165,3.695,11.05,21.786,2.035,1.876
24,WT,1.0,1,1.445,0.963,0.89,13.992,10.036,0.924,0.39,0.069,1.328,0.01,0.0,0.155,0.086,0.0,0.004,0,0.0,0.137,12.29,4.221,8.891,2.247,3.45,1.729,0.072,8.91,8.498,5.431,2.099,0.838,35.409,12.563,2.882,3.214,10.395,21.285,1.685,1.6
25,WT,1.0,2,1.542,0.878,1.29,12.596,10.963,0.817,0.375,0.073,1.553,0.011,0.0,0.17,0.09,0.0,0.003,0,0.0,0.151,10.515,4.233,8.851,2.3,3.994,1.862,0.052,8.048,7.943,4.421,1.913,0.843,38.413,14.046,2.859,3.609,9.969,21.19,1.922,1.673
26,WT,1.0,3,1.53,1.1,0.934,14.548,11.231,0.91,0.357,0.078,1.313,0.012,0.0,0.163,0.101,0.0,0.003,0,0.0,0.179,13.042,4.601,9.616,2.422,3.959,1.721,0.067,9.784,9.061,5.581,2.116,0.767,36.361,13.712,3.1,3.502,11.371,19.358,1.771,1.691
36,WT,2.0,1,2.117,3.665,1.805,78.743,20.133,0.955,0.353,0.107,2.253,0.009,0.0,0.22,0.0,0.0,0.0,0,0.0,0.065,7.743,4.575,9.953,1.473,8.415,0.142,0.048,6.527,4.979,1.157,1.933,0.688,32.41,8.912,1.569,2.736,8.7,18.33,1.091,1.187


### Intracellular average  metabolomics concentrations

In [12]:
DF_endo_avg_metabolomics_cobra_conc[0:30]

Unnamed: 0,Hour,avg_conc,metabolite,std_conc,strain,var_conc,%RSD
0,0.0,0.711,OD600,0.002,WT,6.222e-06,0.004
1,0.0,14.669,pyr_c,3.515,WT,12.35,0.24
2,0.0,0.265,oaa_c,0.063,WT,0.004028,0.24
3,0.0,13.638,lac__D_c,3.616,WT,13.08,0.265
4,0.0,4.609,succ_c,1.079,WT,1.165,0.234
5,0.0,1.532,mal__L_c,0.237,WT,0.05596,0.154
6,0.0,0.153,akg_c,0.04,WT,0.001564,0.259
7,0.0,0.0,acon__C_c,0.0,WT,0.0,0.0
8,0.0,0.399,cit_c,0.117,WT,0.01375,0.294
9,0.0,0.0,mev_R_c,0.0,WT,0.0,0.0


Find triplicate values for a single strain

In [13]:
DF_endo_avg_metabolomics_cobra_conc[DF_endo_avg_metabolomics_cobra_conc.strain == 'WT']

Unnamed: 0,Hour,avg_conc,metabolite,std_conc,strain,var_conc,%RSD
0,0.0,0.711,OD600,0.002494,WT,6.222e-06,0.004
1,0.0,14.669,pyr_c,3.515,WT,12.35,0.24
2,0.0,0.265,oaa_c,0.06347,WT,0.004028,0.24
3,0.0,13.638,lac__D_c,3.616,WT,13.08,0.265
4,0.0,4.609,succ_c,1.079,WT,1.165,0.234
5,0.0,1.532,mal__L_c,0.2366,WT,0.05596,0.154
6,0.0,0.153,akg_c,0.03954,WT,0.001564,0.259
7,0.0,0.0,acon__C_c,0.0,WT,0.0,0.0
8,0.0,0.399,cit_c,0.1172,WT,0.01375,0.294
9,0.0,0.0,mev_R_c,0.0,WT,0.0,0.0


### Extracellular metabolomics triplicate data

In [14]:
exo_data[0:30]

Unnamed: 0,Strain,Hour,Replicate,OD600,ipoh_e,glc__D_e,ac_e,for_e,pyr_e,lac__D_e,succ_e,mal__L_e,akg_e,acon__C_e,cit_e,icit_e,mev_R_e,ip_e,dxyl5p_e,2me4p_e,5pmev_e,ex_con.id,h2mb4p_e,2mecdp_e,5dpmev_e,grdp_e,frdp_e,ggdp_e,4c2me_e,gly_e,ala__L_e,ser__L_e,pro__L_e,val__L_e,thr__L_e,cys__L_e,leu__L_e,ile__L_e,asn__L_e,asp__L_e,gln__L_e,glu__L_e,met__L_e,his__L_e,phe__L_e,tyr__L_e,trp__L_e
0,WT,0.0,1,0.714,0.0,10.002,0.563,0.064,862.196,421.648,70.211,37.35,0.0,0.0,10.616,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,547.255,174.494,1582.35,318.177,147.901,177.633,2.261,406.649,343.085,635.002,69.166,139.318,303.411,135.735,77.379,381.826,90.36,36.65
1,WT,0.0,2,0.708,0.0,10.002,0.588,0.063,909.426,455.381,77.781,39.36,0.0,0.0,1.506,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,565.456,171.38,1419.42,330.316,148.345,172.761,2.542,444.076,363.304,653.084,67.93,169.435,329.249,133.042,78.006,385.094,80.925,36.295
2,WT,0.0,3,0.71,0.0,10.002,0.589,0.075,979.137,513.688,80.604,40.986,0.0,0.0,1.651,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,580.637,173.24,1392.087,335.804,148.365,177.594,2.993,433.337,377.307,668.685,65.144,157.834,336.735,135.014,75.158,388.77,80.05,36.377
3,I1,0.0,1,0.501,0.0,10.013,0.358,0.023,552.902,279.927,39.886,31.751,0.0,0.0,2.215,0.0,0.0,3.146,0.0,0.0,0.0,0.117,0.0,0.0,0.0,0.0,0.0,0.0,0.0,579.75,174.558,3806.303,333.966,160.763,194.778,2.381,469.88,392.183,671.145,201.687,168.655,368.674,154.925,77.573,401.202,82.917,40.881
4,I1,0.0,2,0.471,0.0,10.013,0.371,0.033,617.028,300.981,40.771,32.579,0.0,0.0,1.964,0.0,0.0,2.342,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,571.022,173.341,3629.345,334.474,157.971,192.655,2.213,456.471,396.054,668.916,184.085,167.053,359.13,153.764,73.447,402.773,85.397,39.687
5,I1,0.0,3,0.502,0.0,10.013,0.37,0.022,598.667,299.695,41.557,31.916,0.0,0.0,2.91,0.0,0.0,3.151,0.0,0.0,0.0,0.091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,579.995,172.26,3714.189,334.697,153.208,193.188,1.889,472.985,377.571,688.41,189.892,166.546,360.602,153.646,70.525,398.899,83.213,39.693
6,I2,0.0,1,0.519,0.0,10.03,0.389,0.045,579.122,358.495,55.185,37.728,0.0,0.0,2.302,0.0,0.0,2.833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,555.303,169.359,3123.401,325.231,152.084,184.383,2.796,423.723,363.101,631.13,135.506,178.376,309.924,136.151,65.691,389.464,88.188,40.792
7,I2,0.0,2,0.572,0.0,10.03,0.45,0.038,641.886,406.475,62.71,40.259,0.0,0.0,2.341,0.0,0.0,3.369,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,594.784,172.546,2808.778,340.776,153.634,190.779,2.107,450.316,394.323,663.128,114.214,176.637,335.745,153.058,65.259,400.544,84.231,41.209
8,I2,0.0,3,0.588,0.0,10.03,0.454,0.044,646.626,409.913,61.468,38.808,0.0,0.0,5.25,0.0,0.0,3.594,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,598.896,171.295,2876.124,339.721,153.511,187.291,2.976,464.307,390.263,696.864,122.813,162.11,338.423,152.164,66.644,397.769,81.074,41.079
9,I3,0.0,1,0.443,0.0,10.038,0.309,0.041,468.469,246.248,47.515,28.425,0.0,0.0,4.361,0.0,0.0,3.696,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,606.969,175.316,4410.836,344.362,166.747,205.739,0.0,442.148,401.72,723.854,177.925,183.095,387.152,158.606,67.362,414.671,92.114,36.612


#### Extracellular average metabolomics concentrations

In [15]:
DF_exo_avg_metabolomics_cobra_conc[0:30]

Unnamed: 0,Hour,avg_conc,metabolite,std_conc,strain,var_conc,%RSD
0,0.0,0.711,OD600,0.002,WT,6.222e-06,0.004
1,0.0,0.0,ipoh_e,0.0,WT,0.0,0.0
2,0.0,10.002,glc__D_e,0.0,WT,0.0,0.0
3,0.0,0.58,ac_e,0.012,WT,0.0001432,0.021
4,0.0,0.067,for_e,0.005,WT,2.693e-05,0.077
5,0.0,916.92,pyr_e,48.034,WT,2307.0,0.052
6,0.0,463.572,lac__D_e,38.019,WT,1445.0,0.082
7,0.0,76.199,succ_e,4.388,WT,19.26,0.058
8,0.0,39.232,mal__L_e,1.487,WT,2.212,0.038
9,0.0,0.0,akg_e,0.0,WT,0.0,0.0


### Proteomics data

#### Proteomics average data

In [16]:
print("Number of rows = ",len(DF_proteomics_avg_std_rsd_peak_areas.index))
DF_proteomics_avg_std_rsd_peak_areas[0:30]

Number of rows =  888


Unnamed: 0,%RSD,Hour,m_reaction,mean_area,norm_mean,norm_rsd,norm_std,std_area,strain,var_area
0,0.805,0,PFK,362.667,0.173,0.406,0.07,291.907,I1,85210.0
1,0.657,6,PFK,1399.5,0.266,0.107,0.029,919.595,I1,845700.0
2,0.443,12,PFK,1122.667,0.248,0.175,0.043,497.789,I1,247800.0
3,0.444,24,PFK,1370.333,0.317,0.064,0.02,608.212,I1,369900.0
4,0.49,36,PFK,1448.5,0.329,0.165,0.054,709.339,I1,503200.0
5,0.507,48,PFK,1149.5,0.288,0.136,0.039,582.576,I1,339400.0
6,0.826,0,PFK,674.5,0.184,0.232,0.043,557.424,I2,310700.0
7,0.435,6,PFK,1298.0,0.374,0.166,0.062,564.619,I2,318800.0
8,0.394,12,PFK,1500.667,0.604,0.316,0.191,590.853,I2,349100.0
9,0.446,24,PFK,1680.667,0.783,0.126,0.099,749.63,I2,561900.0


#### Proteomics triplicate data

In [17]:
proteomics_triplicate_data[0:900]

Unnamed: 0,%RSD,Hour,m_reaction,mean_area,norm_mean,norm_rsd,norm_std,std_area,strain,var_area
0,0.805,0,PFK,362.667,0.173,0.406,0.07,291.907,I1,85210.0
1,0.657,6,PFK,1399.5,0.266,0.107,0.029,919.595,I1,845700.0
2,0.443,12,PFK,1122.667,0.248,0.175,0.043,497.789,I1,247800.0
3,0.444,24,PFK,1370.333,0.317,0.064,0.02,608.212,I1,369900.0
4,0.49,36,PFK,1448.5,0.329,0.165,0.054,709.339,I1,503200.0
5,0.507,48,PFK,1149.5,0.288,0.136,0.039,582.576,I1,339400.0
6,0.826,0,PFK,674.5,0.184,0.232,0.043,557.424,I2,310700.0
7,0.435,6,PFK,1298.0,0.374,0.166,0.062,564.619,I2,318800.0
8,0.394,12,PFK,1500.667,0.604,0.316,0.191,590.853,I2,349100.0
9,0.446,24,PFK,1680.667,0.783,0.126,0.099,749.63,I2,561900.0


Find triplicate values for a single reaction

In [18]:
proteomics_triplicate_data[(proteomics_triplicate_data.strain == 'I1') & 
                          (proteomics_triplicate_data.m_reaction == 'PFK')]

Unnamed: 0,%RSD,Hour,m_reaction,mean_area,norm_mean,norm_rsd,norm_std,std_area,strain,var_area
0,0.805,0,PFK,362.667,0.173,0.406,0.07,291.907,I1,85209.867
1,0.657,6,PFK,1399.5,0.266,0.107,0.029,919.595,I1,845654.3
2,0.443,12,PFK,1122.667,0.248,0.175,0.043,497.789,I1,247794.267
3,0.444,24,PFK,1370.333,0.317,0.064,0.02,608.212,I1,369922.267
4,0.49,36,PFK,1448.5,0.329,0.165,0.054,709.339,I1,503161.9
5,0.507,48,PFK,1149.5,0.288,0.136,0.039,582.576,I1,339394.7


## Data Stats

In [19]:
# initialize time points and strain names

all_time_points = [int(i) for i in DF_metabolite_conc_all.Hour.unique().tolist()]
list_strains = [str(i) for i in DF_metabolite_conc_all.Strain.unique().tolist()]

# initialize heterologous pathway metabolite and reaction names
mevalonate_rxns_list = {'Mev-P':'EX_5pmev_e', 'GGPP':'EX_GGDP_e','HMG-coA':'EX_hmgcoa_e', 'hmbdp':'EX_h2mb4p_e', 'Mevalonate':'EX_mev_e', 'Isopentenol':'EX_ipoh_e', 'IP':'EX_ip_e', 'dxyl5p':'EX_dxyl5p_e', 'FPP':'EX_frdp_e', 'GPP':'EX_grdp_e', '2mecdp':'EX_2mecdp_e', '4c2me':'EX_4c2me_e'}
mevalonate_mets_list = ['grdp_e', 'frdp_e', 'ipdp_e', 'h2mb4p_e', 'dmpp_e', 'atp_e', '5dpmev_c', 'mev_R_c', 'mev_R_e', '5dpmev_e', 'accoa_e', 'hmgcoa_c', 'coa_e', 'dxyl5p_e', 'hmgcoa_e', 'nadh_e', 'ggdp_c', 'lim_e', 'ggdp_e', 'lim_c', '2mecdp_e', 'adp_e', 'aacoa_e', 'ip_e', 'ip_c', 'nadph_e', 'nadp_e', 'ipoh_e', 'bis_c', 'ipoh_c', 'bis_e', '4c2me_e', '5pmev_c', '5pmev_e', 'nad_e']


In [20]:
all_time_points

[0, 2, 4, 6, 8, 10, 12, 16, 18, 20, 24, 36, 48, 72]

In [21]:
list_strains

['I1', 'I2', 'I3', 'L1', 'L2', 'L3', 'B1', 'B2', 'DH1']

In [22]:
print("number of proteins in study:", len(I1_prot_data_cobra.loc[0:1].iloc[0:].columns))
print("number of metabolites in study:", len(I1_data.loc[0:1].iloc[5:-1].columns))
print("number of strains in study:", len(list_strains))
print("numer of time points in study:", len(all_time_points))
print("number of unique heterologous reactions:", len(mevalonate_rxns_list.keys()))
print("number of unique heterologous metabolites:", len(mevalonate_mets_list))

number of proteins in study: 55
number of metabolites in study: 92
number of strains in study: 9
numer of time points in study: 14
number of unique heterologous reactions: 12
number of unique heterologous metabolites: 35


## Dictionary of metabolite identifiers¶

In [23]:
metabolite_IDs = pd.read_csv(EC_data+'metabolite_identifiers.csv',index_col=0)
metabolite_IDs.set_index(['name']).head(10)

Unnamed: 0_level_0,BiGG_id,metabolite_name
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Glyoxylate (uM),glx_c,Glyoxylate
3-P Glycerate (uM),3pg_c,3-Phospho-D-glycerate
MEVALONATE extracellular (uM),mev_R_e,Mevalonate
NAD extracellular (uM),nad_e,Nicotinamide adenine dinucleotide
Serine,ser__L_c,L-Serine
DXP extracellular (uM),dxyl5p_e,1-Deoxy-D-xylulose 5-Phosphate
ATP extracellular (uM),atp_e,ATP
Formate g/L,for_e,Formate
IP extracellular (uM),ip_e,Isopentenol-P
Histidine,his__L_c,L-Histidine


# Key Analysis Scripts¶

#### Create a function to provide a progress bar for other functions.

In [24]:
import sys, time
try:
    from IPython.display import clear_output
    have_ipython = True
except ImportError:
    have_ipython = False

class ProgressBar:
    def __init__(self, iterations):
        self.iterations = iterations
        self.prog_bar = '[]'
        self.fill_char = '*'
        self.width = 40
        self.__update_amount(0)
        if have_ipython:
            self.animate = self.animate_ipython
        else:
            self.animate = self.animate_noipython

    def animate_ipython(self, iter):
        print('\r', self),
        sys.stdout.flush()
        self.update_iteration(iter + 1)

    def update_iteration(self, elapsed_iter):
        self.__update_amount((elapsed_iter / float(self.iterations)) * 100.0)
        self.prog_bar += '  %d of %s complete' % (elapsed_iter, self.iterations)

    def __update_amount(self, new_amount):
        percent_done = int(round((new_amount / 100.0) * 100.0))
        all_full = self.width - 2
        num_hashes = int(round((percent_done / 100.0) * all_full))
        self.prog_bar = '[' + self.fill_char * num_hashes + ' ' * (all_full - num_hashes) + ']'
        pct_place = (len(self.prog_bar) // 2) - len(str(percent_done))
        pct_string = '%d%%' % percent_done
        self.prog_bar = self.prog_bar[0:pct_place] + \
            (pct_string + self.prog_bar[pct_place + len(pct_string):])

    def __str__(self):
        return str(self.prog_bar)

#### Convert data to fold changes

In [25]:
def mM_to_foldchange(df):  
    df.index = range(1,len(df) + 1)
    for i in df.columns:
        record = 0
        for j in df.index:
            origin = df[i][1]                
            
            if origin ==0 and df[i][j] == 0.0:
                df[i][j] = 0.00001
                tmp_fold = 0.0
            elif df[i][j] == 0.0:
                df[i][j] = 0.00001
            else:   
                tmp_fold = (np.true_divide(df[i][j], origin))
            
            if record == 0:
                record = 1
            else:
                df[i][j] = tmp_fold    
                
        df[i][1] = 0.0   
    return df

## Error Propagation:

Once error estimates have been assigned to each piece of data, we must then find out how these errors contribute to the error in the result. The error in a quantity may be thought of as a variation or "change" in the value of that quantity. Results are is obtained by mathematical operations on the data, and small changes in any data quantity can affect the value of a result. We say that "errors in the data propagate through the calculations to produce error in the result."

- Quotient rule. When two quantities are divided, the relative determinate error of the quotient is the relative determinate error of the numerator minus the relative determinate error of the denominator.
- Power rule. When a quantity Q is raised to a power, P, the relative determinate error in the result is P times the relative determinate error in Q. This also holds for negative powers, i.e. the relative determinate error in the square root of Q is one half the relative determinate error in Q.
- Sum and difference rule. When two quantities are added (or subtracted), their determinate errors add (or subtract).

### A function to compare a matabolite of a particular strain to it's WT value 

A function to list the strain, metabolite, time_point, difference to WT value (diff_to_wt), difference to WT value standard deviation (diff_to_wt_std), phase, metabolite standard deviation (metabolite_std), and finally the WT standard deviation (wt_std).

In [26]:
def create_difference_dataframe(df_endo, df_exo):
    
    ''' takes in two dataframes (endo vs exo) that provides the error estimates in the metabolite measurements 
    to return a dataframe that computes concentration differences between any two conditions 
    (e.g. wild-type vs engineered strain)
    
    e.g. 
    
    create_difference_dataframe(DF_endo_avg_metabolomics_cobra_conc, DF_exo_avg_metabolomics_cobra_conc)
    '''
    
    react_to_struct = []

    for strain in met_data.keys():
        for c in met_data[strain].columns[5:-1]:
            for i in range(0,len(met_data[strain][str(c)].index)):
                            
                if i < 8:
                    phase = 1
                elif i > 4 and i < 24:
                    phase = 2
                else:
                    phase = 3
                
            # calculate the difference between strain measurement and wt measurement
            
                wt_val = met_data['DH1'][str(c)].reset_index(drop=True)[i]
                strain_val = met_data[strain][str(c)].reset_index(drop=True)[i]
                
                #fold difference is take here (can also be changed to subtraction)
                diff = strain_val/wt_val 
            
            # Note: 
            # if the metabolite is a heterologous intermediate - take strain I3 as reference for RSD:
            # get the scaling factor based on the metabolite measured, if it exists
            
                if str(c) in df_endo.metabolite.unique().tolist():
                
                    if strain not in ['I1','I2','I3']:
                        if str(c) in mevalonate_mets_list:
                            strain_tmp = 'I3'
                        else:
                            strain_tmp = 'WT'
                        
                        endo_scaling_factor = df_endo[(df_endo.metabolite == str(c)) & 
                                                      (df_endo.strain == strain_tmp) & 
                                                      (df_endo.Hour == 0.5)]['%RSD'].values[0]
                    
                        # in some cases, certain mev pathway metabolites are not seen in I1, I2, I3 strains (e.g. ggdp)
                        if endo_scaling_factor == 0.0:
                            endo_scaling_factor = df_endo[(df_endo.strain == strain_tmp) & 
                                                          (df_endo.Hour == 0.5)]['%RSD'].values.mean()
                                       
                    else:
                        if str(c) in mevalonate_mets_list:
                            strain_tmp = 'I3'
                        else:
                            strain_tmp = strain
                        
                        endo_scaling_factor = df_endo[(df_endo.metabolite == str(c)) & 
                                                      (df_endo.strain == strain_tmp) & 
                                                      (df_endo.Hour == 0.5)]['%RSD'].values[0]
                        
                        # in some cases, certain mev pathway metabolites are not seen in I1, I2, I3 strains (e.g. ggdp)
                        if endo_scaling_factor == 0.0:
                            endo_scaling_factor = df_endo[(df_endo.strain == strain_tmp) & 
                                                          (df_endo.Hour == 0.5)]['%RSD'].values.mean()
                        
                        
                elif str(c) in df_exo.metabolite.unique().tolist():
                
                    if strain not in ['I1','I2','I3']:
                        if str(c) in mevalonate_mets_list:
                            strain_tmp = 'I3'
                        else:
                            strain_tmp = 'WT'
                        
                        exo_scaling_factor =  df_exo[(df_exo.metabolite == str(c)) & 
                                                     (df_exo.strain == strain_tmp) & 
                                                     (df_exo.Hour == 0.5)]['%RSD'].values[0]
                        
                        if exo_scaling_factor == 0.0:
                            exo_scaling_factor = df_exo[(df_exo.strain == strain_tmp) & 
                                                        (df_exo.Hour == 0.5)]['%RSD'].values.mean()
                        
                    else:
                        if str(c) in mevalonate_mets_list:
                            strain_tmp = 'I3'
                        else:
                            strain_tmp = strain
                        
                        exo_scaling_factor =  df_exo[(df_exo.metabolite == str(c)) & 
                                                     (df_exo.strain == strain_tmp) & 
                                                     (df_exo.Hour == 0.5)]['%RSD'].values[0]
                        
                        if exo_scaling_factor == 0.0:
                            exo_scaling_factor = df_exo[(df_exo.strain == strain_tmp) & 
                                                        (df_exo.Hour == 0.5)]['%RSD'].values.mean()
                    
                else:
                    if '_c' in str(c):
                        if str(c) in mevalonate_mets_list:
                            strain_tmp = 'I3'
                        else:
                            strain_tmp = 'WT'
                        endo_scaling_factor = df_endo[(df_endo.strain == strain_tmp) & 
                                                      (df_endo.Hour == 0.5)]['%RSD'].values.mean()
                        
                    else:
                        if str(c) in mevalonate_mets_list:
                            strain_tmp = 'I3'
                        else:
                            strain_tmp = 'WT'
                        exo_scaling_factor =  df_exo[(df_exo.strain == strain_tmp) & 
                                                     (df_exo.Hour == 0.5)]['%RSD'].values.mean()
                    

                if '_c' in str(c):
                    tmp_var = (strain_val*endo_scaling_factor)**2
                    wt_var = (wt_val*endo_scaling_factor)**2
                else:
                    tmp_var = (strain_val*exo_scaling_factor)**2
                    wt_var = (wt_val*exo_scaling_factor)**2
                
                    
                diff_std = np.sqrt(abs(tmp_var - wt_var))  #### change to addition for subtraction differences  
                react_to_struct.append({'strain':strain, 'metabolite':c, 'time_point':all_time_points[i], 'diff_to_wt':diff, 'diff_to_wt_std':diff_std, 'phase':phase, 'metabolite_std':np.sqrt(tmp_var), 'wt_std':np.sqrt(wt_var)})

    return pd.DataFrame(react_to_struct)

### A function to plot the metabolomics data

In [27]:
import matplotlib.pyplot as plt

def plot_raw_strain_data(strain_set, test, df_err):
    ''' 
    Plots the raw metabolite concentrations over time per fuel type, or grouping specified by the user.
    Takes in the pre-calculated experimental errors (standard deviations taken from triplicate measurements) 
    from user inputted file
    
    Example inputs:
        strain_set = 'I'
        test = ['I1','I2','I3','DH1']

        strain_set = 'L'
        test = ['L1','L2','L3','DH1']

        strain_set = 'B'
        test = ['B1','B2','DH1']
        '''
    %matplotlib inline
    color_map_dict = {'I1':c1,'I2':c2,'I3':c3,'L1':c4,'L2':c5,'L3':c6,'B1':c7,'B2':c8,'DH1':b3}
    list_rxns = DF_metabolite_conc_all.drop(['Hour','Strain'],axis=1).columns.tolist()
    end = 72

    for strain in ['DH1']:
 
        fig, axes = plt.subplots(nrows=int(np.round(true_divide(len(list_rxns),4))), ncols=4, figsize=(25,90));
        k=0
        af=axes.flatten()
        for a in af:
            if k < len(list_rxns):
                r = list_rxns[k]
                df = DF_metabolite_conc_all[[r,'Hour','Strain']]
            
                colors = [color_map_dict[cc] for cc in test]
                linewidths = [5 for cc in test]
            
                max_val = 0
                min_val = 1000
                for col, color, lw in zip(df[df.Strain.isin(test)].Strain.unique(), colors, linewidths):
                    
                    time_list=df[(df.Strain == col) & (df[r] != 0)].Hour.tolist() # Added
                                    
                    if len(time_list) == 0:
                        time_list = df[df.Strain == col].Hour.tolist()
                    if 0 not in time_list:
                        time_list.append(0)
                    if end not in time_list:
                        time_list.append(end)
                    
                    if col != 'DH1':
                        errors = df_err[(df_err.metabolite==r) & (df_err.strain == col) & 
                                        (df_err.time_point.isin(time_list))].metabolite_std.tolist()  
                    else:
                        errors = df_err[(df_err.metabolite==r) & (df_err.strain == col) & 
                                        (df_err.time_point.isin(time_list))].wt_std.tolist()  
                    if len(errors) <1:
                        errors = [0 for i in df[(df.Strain == col) & (df.Hour.isin(time_list))].Hour.tolist()]
                        
                    df[(df.Strain == col) & (df.Hour.isin(time_list))].set_index('Hour').plot(ax=a, color = 'grey', lw=1)
                    
                    a.plot(df[(df.Strain == col) & (df.Hour.isin(time_list))].Hour, 
                           df[(df.Strain == col) & (df.Hour.isin(time_list))][r], color = color_map_dict[col], lw=4)
                                       
                    if np.max(df[(df.Strain == col) & (df.Hour.isin(time_list))][r]) + np.max(errors) > max_val:
                        max_val = np.max(df[(df.Strain == col) & (df.Hour.isin(time_list))][r]) + np.max(errors)
                         
                    if np.min(df[(df.Strain == col) & (df.Hour.isin(time_list))][r]) - np.min(errors) < min_val:
                        min_val = np.min(df[(df.Strain == col) & (df.Hour.isin(time_list))][r]) - np.min(errors)
                
                a.legend().set_visible(False)
                a.grid(False)
            
                k=k+1
            
                a.set_ylabel("conc. (mM)", fontsize=30) 
                a.set_xlabel("time (hr)", fontsize=30)
                a.set_title(r, fontsize=30)
            
            
                for tick in a.xaxis.get_major_ticks():
                    tick.label.set_fontsize(30) 
                for tick in a.yaxis.get_major_ticks():
                    tick.label.set_fontsize(30)
            
                # set limits
                a.set_xlim(2,end)
                
                if min_val == max_val: # Added
                    max_val += 0.1
                    min_val -= 0.1
                    
                a.set_ylim(min_val,max_val)
            
                #color based on phase
                a.axvspan(0, 6, alpha=0.2, color=c2)
                a.plot((1,1),(min_val,max_val),'grey',lw=1)
                a.axvspan(6, 20, alpha=0.2, color=c3)
                a.axvspan(20, 72, alpha=0.2, color=b3)
                a.plot((2,72),(0,0),'r-',lw=1)
                
    fig.tight_layout();


# Metabolite Z-score Analysis

Phases:

- I: 0 to 4 (0 to 6 hours)
- II: 4 to 10 (8 to 20 hours)
- III: 10 to 13 (24 to 72 hours)

Method:
    
- take the fold difference (concentration) from time 0 - time i across all time points for all measurements
- loop over mets/proteins, calculate the z score (X-mu)/(sigma) for each time point j, where mu and sigma are mean and std over all fold changes in concentration for met/protein j
- given, 8 degrees of freedom (i.e. time points), select all time points with z-scores higher than 1.39 (for 80% or alternatively, 1.86 for 90% confidence level)
- result is which time points have significant fold changes from the starting time point 0
    
Summary:

- in most cases, 8 hours after induction, cells undergo large-scale change in the proteome
- only in few cases do certain proteins have significant fold changes at 4 hours or 48 hours

1. changes across all strains compared to wt - compares which reactions change significantly over wt changes
       - are some strains closer to wt phenotype than others?

- [(time_di, strain_j, data_k) - (time_di, strain_wt, data_k)]

To remind us of the samples and strains that we are working with

List all time points that are measured

In [28]:
all_time_points

[0, 2, 4, 6, 8, 10, 12, 16, 18, 20, 24, 36, 48, 72]

List all the strains that will be tracked

In [29]:
list_strains

['I1', 'I2', 'I3', 'L1', 'L2', 'L3', 'B1', 'B2', 'DH1']

Each strain will include three phases with the following assigned time slots, 

Phase 1: 4 samples at times [0, 2, 4, 6]
Phase 2: 6 samples at times [8, 10, 12, 16, 18, 20]
Phase 3: 4 samples at times [24, 36, 48, 72]

### A function to calculate the z-score

The dataframe required by the calculate_metabolite_Zscore(df) methods includes the following information where df = "metabolomics_diff"

![image-2.png](attachment:image-2.png)

The z-score is added to the table after it is calculated in the method below.

The calculation performed is 

- z = x- mean(p) / std(p)
- mean and std consider differences to WT across all metabolites within a class: 
         (i) intra vs exo; mev-related (dxp vs mev) 
         (ii) non-mev (organic acids vs amino vs other)) across all time points for a given strain
- The populations (p) are different depending on the metabolites
         (i) The extracellular and intracellular are treated separately
         (ii) The mevalonate_mets (mevalonate_mets) and dxp_mets (dxp_mets) are combined to create a unique population
         (iii) The organic acid (organic_acid) metabolites are treated like a unique population
         (iv) The amino acids (amino_acids_all) are treated like a unique population
         (v) If they don't fit into any of the above three categories they are included in a population that includes 
             all the metabolites (either extracelluar or intracellular)
- population mean difference referred to as: mu_p (mean(p)) (p = population)
- population std: std_p (strain vs wt difference for all measurements in a class)
- ex_con = extracellular dmapp/ipdp measurements

In [30]:
# mevalonate_rxns_list = {'Mev-P':'EX_5pmev_e', 'GGPP':'EX_GGDP_e','HMG-coA':'EX_hmgcoa_e', 'hmbdp':'EX_h2mb4p_e', 'Mevalonate':'EX_mev_e', 'Isopentenol':'EX_ipoh_e', 'IP':'EX_ip_e', 'dxyl5p':'EX_dxyl5p_e', 'FPP':'EX_frdp_e', 'GPP':'EX_grdp_e', '2mecdp':'EX_2mecdp_e', '4c2me':'EX_4c2me_e'}
# mevalonate_mets_list = ['grdp_e', 'frdp_e', 'ipdp_e', 'h2mb4p_e', 'dmpp_e', 'atp_e', '5dpmev_c', 'mev_R_c', 'mev_R_e', '5dpmev_e', 'accoa_e', 'hmgcoa_c', 'coa_e', 'dxyl5p_e', 'hmgcoa_e', 'nadh_e', 'ggdp_c', 'lim_e', 'ggdp_e', 'lim_c', '2mecdp_e', 'adp_e', 'aacoa_e', 'ip_e', 'ip_c', 'nadph_e', 'nadp_e', 'ipoh_e', 'bis_c', 'ipoh_c', 'bis_e', '4c2me_e', '5pmev_c', '5pmev_e', 'nad_e']
#dxp_mets = ['dxyl5p_e','dxyl5p_c','h2mb4p_c','h2mb4p_e','2me4p', '2mecdp_c','2mecdp_e', '4c2me_c', '4c2me_e']
#organic_acids = ['ac_e','ac_e','fum_c', 'for_e','glc__D_e', 'glu__L_c','lac__D_c', 'lac__D_e','pyr_c','pyr_e'] +\
#                ['succ_e','succ_c']

def calculate_metabolite_Zscore(df):
    
    read_to_struct = []
    p = ProgressBar(len(df.strain.unique()))
    
    try:
        df = df.drop(['z_score'],axis = 1)
    except:
        pass

    for ind,s in enumerate(df.strain.unique()):
        p.animate(ind+1)
        
        if s != 'DH1':
         
            for t in df[df.strain == s].time_point.unique():
            
                for r in df[(df.strain == s) & (df.time_point == t)].metabolite.unique():
                    tt = 0
                    mu_p_e = 0
                    std_p_e = 0
                    
                # differentiate between extra and intracellular:
                    # is extracellular
                    if '_e' in r or 'ex_con' in r:
                    
                        if r in mevalonate_mets or r in dxp_mets:
                            mu_p_e = df[(pd.notnull(df.diff_to_wt)) & 
                                        (df.strain == s) & (df.metabolite.str.contains('_e')) & 
                                        (df.metabolite.isin(mevalonate_mets+dxp_mets))]['diff_to_wt'].mean()
                            std_p_e = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                         (df.metabolite.str.contains('_e')) & 
                                         (df.metabolite.isin(mevalonate_mets+dxp_mets))]['diff_to_wt'].std()
                        
                        elif r in organic_acids:
                            mu_p_e = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                        (df.metabolite.str.contains('_e')) & 
                                         (df.metabolite.isin(organic_acids))]['diff_to_wt'].mean()
                            std_p_e = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                         (df.metabolite.str.contains('_e')) & 
                                         (df.metabolite.isin(organic_acids))]['diff_to_wt'].std()                        
                        
                        elif r in amino_acids_all:
                            mu_p_e = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                        (df.metabolite.str.contains('_e')) & 
                                        (df.metabolite.isin(amino_acids_all))]['diff_to_wt'].mean()
                            std_p_e = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                         (df.metabolite.str.contains('_e')) & 
                                         (df.metabolite.isin(amino_acids_all))]['diff_to_wt'].std()                       
                                                          
                        else:
                            mu_p_e = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & (df.metabolite.str.contains('_e')) & 
                                        (~df.metabolite.isin(mevalonate_mets+dxp_mets)) & 
                                        (~df.metabolite.isin(organic_acids+amino_acids_all))]['diff_to_wt'].mean()
                            std_p_e = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                         (df.metabolite.str.contains('_e')) & 
                                         (~df.metabolite.isin(mevalonate_mets+dxp_mets)) & 
                                         (~df.metabolite.isin(organic_acids+amino_acids_all))]['diff_to_wt'].std()                        
                        
                        tt = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & (df.metabolite == r) & 
                                (df.time_point == t)].diff_to_wt.tolist()
                        
                        if len(tt)<1:
                            tmp_z_1 = 0
                            tt = 0
                        else:
                            tt = tt[0]
                            tmp_z_1 = (np.true_divide(tt-mu_p_e,std_p_e))
                    
 
                    # is intracellular     
                    else:
                        mu_p_c = 0
                        std_p_c = 0
                    
                        if r in mevalonate_mets or r in dxp_mets:
                            mu_p_c = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                        (df.metabolite.str.contains('_c')) & 
                                        (df.metabolite.isin(mevalonate_mets+dxp_mets))]['diff_to_wt'].mean()
                            std_p_c = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                         (df.metabolite.str.contains('_c')) & 
                                         (df.metabolite.isin(mevalonate_mets+dxp_mets))]['diff_to_wt'].std()
                    
                    
                        elif r in organic_acids:
                            mu_p_c = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                        (df.metabolite.str.contains('_c')) & 
                                        (df.metabolite.isin(organic_acids))]['diff_to_wt'].mean()
                            std_p_c = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                         (df.metabolite.str.contains('_c')) & 
                                         (df.metabolite.isin(organic_acids))]['diff_to_wt'].std()                     
                    
                    
                        elif r in amino_acids_all:
                            mu_p_c = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                        (df.metabolite.str.contains('_c')) & 
                                        (df.metabolite.isin(amino_acids_all))]['diff_to_wt'].mean()
                            std_p_c = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                         (df.metabolite.str.contains('_c')) & 
                                         (df.metabolite.isin(amino_acids_all))]['diff_to_wt'].std()                    
                    
                        else:
                            mu_p_c =  df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                         (df.metabolite.str.contains('_c')) & 
                                         (~df.metabolite.isin(mevalonate_mets+dxp_mets)) & 
                                         (~df.metabolite.isin(organic_acids+amino_acids_all))]['diff_to_wt'].mean()
                            std_p_c = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & 
                                         (df.metabolite.str.contains('_c')) & 
                                         (~df.metabolite.isin(mevalonate_mets+dxp_mets)) & 
                                         (~df.metabolite.isin(organic_acids+amino_acids_all))]['diff_to_wt'].std()                        
            
                        tt = df[(pd.notnull(df.diff_to_wt)) & (df.strain == s) & (df.metabolite == r) & 
                                (df.time_point == t)].diff_to_wt.tolist()  

                                        
                        if len(tt)<1:
                            tmp_z_1 = 0
                            tt = 0
                        else:
                            tt = tt[0]
                            tmp_z_1 = (np.true_divide(tt-mu_p_c,std_p_c))
            
                    read_to_struct.append({'strain':s, 'metabolite':r,'z_score':tmp_z_1,'time_point':t})

    df = pd.merge(df, pd.DataFrame(read_to_struct),on=['strain','metabolite','time_point']).drop_duplicates()
    df = df[pd.notnull(df.diff_to_wt)]
    return df

### A function to plot the z-scores

In [31]:
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import numpy as np
import matplotlib
import matplotlib.pyplot as plt # Added
import brewer2mpl
import string

def z_score_heat_plot(strain_list,subset_mets,time_point_list, df):
    
    '''
    Example: z_score_heat_plot(strain_list, mets_to_plot, time_point_list, metabolomics_diff)
        where the user can select by most significant z-score:
            metabolomics_diff[(metabolomics_diff.z_score > 1.7) & (metabolomics_diff.z_score < 1.7)]
    
        where:
            strain_list = ['I1','I2','I3','L1','L2','L3','B1','B2']
            mets_to_plot = ['accoa_c','akg_c','acon__C_c','cit_c','icit_c','glx_c','glyclt_c','fdp_c','3pg_c','pep_c','nadp_c','nad_c','amp_c']
            time_point_list = [4,18,48]
    '''

    for tt in time_point_list:
    
        temp_df = df[(df.metabolite.isin(mets_to_plot)) & (df.time_point==tt)]
        
        M = np.array([])
        
        for i in subset_mets:
    
            tmp_list = []
            tmp_strain = temp_df[temp_df.metabolite == i].strain.unique().tolist()
    
            for j in strain_list:
                try:
                    tmp_list.append(np.mean(temp_df[(temp_df.metabolite == i) & 
                                                    (temp_df.strain == j)].diff_to_wt.tolist()))  
                
                except:
                    tmp_list.append(0)
       
                
            if len(M) > 0:
                M = np.vstack((M, tmp_list))
            else:
                M = tmp_list
        
        fig, ax = plt.subplots(figsize=(10,15))
        im = ax.imshow(M, cmap ='Blues')
        
        # Show all ticks and label them with the respective list entries
        ax.set_yticks(np.arange(len(subset_mets)))
        ax.set_xticks(np.arange(len(strain_list)))
        ax.set_yticklabels(subset_mets, fontsize = 16)       
        ax.set_xticklabels(strain_list, fontsize = 16)       
        
        # Loop over data dimensions and create text annotations.
        for i in range(len(subset_mets)):
            for j in range(len(strain_list)):
                text = ax.text(j, i, M[i, j].round(2),
                       ha="center", va="center", color="r", fontsize = 16, fontweight ="bold")
        
        fig.suptitle('Metabolite Z-Scores Changes with Respect to Wild Data', fontweight ="bold", fontsize = 16)
        fig.tight_layout()
        plt.show()

        #name_graph = 'heat_plot_mets_FOLD_diff_wt_strain_all_strains_time_PHASE_%s'%tt
        #fig.savefig('~/%s.svg' %str(name_graph), format='SVG')

### A function to cluster the metabolic shifts

In [32]:
def cluster_metabolite_shifts(df_main):
    ''' code to cluster metabolites based on simple features '''
    
    read_to_struct = []

    for strain in df_main.strain.unique():
        for met in df_main.metabolite.unique():
    
            #df = df_main[df_main.metabolite == met][df_main.strain == strain]
            df = df_main[(df_main.metabolite == met) & (df_main.strain == strain)]

            orig = df.diff_to_wt.tolist()[0]
            std_1 = orig + df.diff_to_wt_std.tolist()[0]
            std_2 = orig - df.diff_to_wt_std.tolist()[0]
    
            # check if met starts at zero
            if min(std_1,std_2) <= 0.0 <= max(std_1,std_2):
                starts_at_zero = True
            else:
                starts_at_zero = False
        
            # check if met ends at zero
            end = df.diff_to_wt.tolist()[-1]
            std_1 = end + df.diff_to_wt_std.tolist()[-1]
            std_2 = end - df.diff_to_wt_std.tolist()[-1]
    
            if min(std_1,std_2) <= 0.0 <= max(std_1,std_2):
                ends_at_zero = True
            else:
                ends_at_zero = False   
        
            #check how many times we leave/return to zero
            leave_zero = 0
            return_to_zero = 0
            count = 0
    
            for t in df.index:
                if count == 0:
                    t_prev_up = np.max([(df.diff_to_wt[t] - df.diff_to_wt_std[t]), (df.diff_to_wt[t] + df.diff_to_wt_std[t])])
                    t_prev_down = np.min([(df.diff_to_wt[t] - df.diff_to_wt_std[t]), (df.diff_to_wt[t] + df.diff_to_wt_std[t])])
                    t_curr_up = np.max([(df.diff_to_wt[t] - df.diff_to_wt_std[t]), (df.diff_to_wt[t] + df.diff_to_wt_std[t])])
                    t_curr_down = np.min([(df.diff_to_wt[t] - df.diff_to_wt_std[t]), (df.diff_to_wt[t] + df.diff_to_wt_std[t])])
                    count +=1
                else:
                    # calculate new time point
                    t_curr_up = np.max([(df.diff_to_wt[t] - df.diff_to_wt_std[t]), (df.diff_to_wt[t] + df.diff_to_wt_std[t])])
                    t_curr_down = np.min([(df.diff_to_wt[t] - df.diff_to_wt_std[t]), (df.diff_to_wt[t] + df.diff_to_wt_std[t])])
            
                    # deviates from zero:
                    # if it was previously zero at time i-1 and now has deviated
                    if t_prev_down <= 0.0 <= t_prev_up:
                        if (t_curr_down > 0.0 and t_curr_up > 0.0) or (t_curr_down < 0.0 and t_curr_up < 0.0):   
                            leave_zero += 1
                
                    else:
                        # return to zero:
                        # if it was previously not at zero at time i-1 and now has returned to zero
                        if t_curr_down <= 0.0 <= t_curr_up:
                            return_to_zero += 1

                    t_prev_up = t_curr_up
                    t_prev_down = t_curr_down
                    count +=1           
                    
            read_to_struct.append({'strain':strain,'metabolite':met,'starts_at_zero':starts_at_zero, 'ends_at_zero':ends_at_zero, 'leave_zero':leave_zero, 'return_to_zero':return_to_zero})
    
    
    DF_MET_clustering = pd.DataFrame(read_to_struct)
    return DF_MET_clustering

### A function to find the shifts in time

In [33]:
def find_shifts_in_time(df, df_omics):    
    ''' script that identifies where shifts occur at specific time points '''
    
    read_to_struct = []
    for r in df.metabolite.unique().tolist():
        for s in df[(DF_MET_clustering.metabolite == r)].strain.unique():

            sig_times = []
            sig_shifts = []
        
            strain_tmp = s 

            df_diff = df_omics[(df_omics.metabolite == r) & (df_omics.strain == strain_tmp)]
            
            tmp_list = df_diff.diff_to_wt.tolist()
    
            for t in df_diff.index:
                t_curr_up = np.max([(df_diff.diff_to_wt[t] - df_diff.diff_to_wt_std[t]), (df_diff.diff_to_wt[t] + df_diff.diff_to_wt_std[t])])
                t_curr_down = np.min([(df_diff.diff_to_wt[t] - df_diff.diff_to_wt_std[t]), (df_diff.diff_to_wt[t] + df_diff.diff_to_wt_std[t])])

                if t_curr_down <= 0.0 <= t_curr_up:
                    pass
                else:
                    sig_times.append(df_diff.time_point[t])
                    sig_shifts.append("%.4f" %np.mean([t_curr_up,t_curr_down]))
    
            read_to_struct.append({ 'metabolite':r, 'strain':strain_tmp, 'significant_shifts':sig_shifts, 'times':sig_times }) 
    
    return(pd.DataFrame(read_to_struct)) 

### A function to plot the profile differences

In [34]:
def plot_dynamic_profile_differences(strain_list, list_mets, df_data):
    '''
    Example: plot_dynamic_profile_differences(strain_list, mets_to_plot)
    
        where:
            strain_list = ['I1','I2','I3','L1','L2','L3','B1','B2']
            mets_to_plot = ['accoa_c','akg_c','acon__C_c','cit_c','icit_c','glx_c','glyclt_c','fdp_c']
    '''
    color_map_dict = {'I1':'black','I2':'black','I3':'black','L1':'black','L2':'black',
                      'L3':'black','B1':'black','B2':'black','DH1':'black'}

    if strain_list == 'all':
        strain_list = ['I1','I2','I3','L1','L2','L3','B1','B2']
    elif strain_list == 'I':
        strain_list = ['I1','I2','I3']
    elif strain_list == 'L':
        strain_list = ['L1','L2','L3']
    elif strain_list == 'B':
        strain_list = ['B1','B2']
    else:
        pass
        
    df_data = df_data.fillna(0) #### Added
    
    for strain in strain_list: 
        fig_len = len(list_mets)*1.1
        num_col = len(list_mets)
        
        fig, axes = plt.subplots(nrows=int(np.round(true_divide(len(list_mets),num_col))), 
                                 ncols=num_col, figsize=(25,fig_len));
        
        nrows = int(np.round(true_divide(len(list_mets),num_col))) # Added
        ncols = num_col # Added
        
        k=0
        af=axes.flatten()       
        for a in af:
            if k < len(list_mets):
                r = list_mets[k]
                
                df = df_data[(df_data.strain == strain) & 
                             (df_data.metabolite == r)].set_index(['time_point'])  ##
                
                
                errors = df.diff_to_wt_std.tolist()[0] 
                if errors < 0.001:
                    errors = df.wt_std.tolist()[0]
                df.diff_to_wt.plot(ax=a, color = color_map_dict[strain],alpha=0.53,legend=False,ls='None', 
                                   marker='o',markersize=10)
                a.errorbar(df.index,df.diff_to_wt, yerr=errors,color = color_map_dict[strain],ls='None', 
                           marker='o',markersize=10)
                a.grid(False);
                k=k+1
            
                a.set_ylabel("diff. (fold)", fontsize=15) 
                a.set_xlabel("time (hr)", fontsize=15)
                a.set_title(r, fontsize=20)
            
            
                for tick in a.xaxis.get_major_ticks():
                    tick.label.set_fontsize(14) 
                for tick in a.yaxis.get_major_ticks():
                    tick.label.set_fontsize(14)
            
                # set limits
                a.set_xlim(2,72)

                max_val = np.max([np.max(df.diff_to_wt.values + max(df.diff_to_wt_std)) , 
                                  min(df.diff_to_wt.values - max(df.diff_to_wt_std))])
                min_val = np.min([np.max(df.diff_to_wt.values + max(df.diff_to_wt_std)) , 
                                  min(df.diff_to_wt.values - max(df.diff_to_wt_std))])
                
                if min_val == max_val:
                    min_val = -0.1
                    max_val = 0.1
                
                
                a.set_ylim(min_val,max_val)
            
                #color based on phase
                a.axvspan(0, 6, alpha=0.2, color=c2)
                a.plot((1,1),(min_val,max_val),'grey',lw=1)
                a.axvspan(6, 20, alpha=0.2, color=c3)
                a.plot((4,4),(min_val,max_val),'grey',lw=1)
                a.axvspan(20, 72, alpha=0.2, color=b3)
                a.plot((2,72),(0,0),'red',lw=1)
        #fig.tight_layout();

# Compute strain differences and store in a pandas dataframe

In [35]:
metabolomics_diff  = create_difference_dataframe(DF_endo_avg_metabolomics_cobra_conc, DF_exo_avg_metabolomics_cobra_conc)
metabolomics_diff[:20]

  diff = strain_val/wt_val
  diff = strain_val/wt_val
  diff = strain_val/wt_val
  diff = strain_val/wt_val


Unnamed: 0,strain,metabolite,time_point,diff_to_wt,diff_to_wt_std,phase,metabolite_std,wt_std
0,I1,glc__D_e,0,0.985,0.342,1,1.952,1.982
1,I1,glc__D_e,2,1.044,0.505,1,1.752,1.678
2,I1,glc__D_e,4,1.096,0.656,1,1.599,1.459
3,I1,glc__D_e,6,1.082,0.568,1,1.487,1.375
4,I1,glc__D_e,8,1.054,0.421,1,1.337,1.269
5,I1,glc__D_e,10,1.044,0.359,1,1.253,1.2
6,I1,glc__D_e,12,1.065,0.431,1,1.247,1.171
7,I1,glc__D_e,16,1.028,0.266,1,1.153,1.122
8,I1,glc__D_e,18,1.102,0.482,2,1.147,1.041
9,I1,glc__D_e,20,1.084,0.427,2,1.104,1.018


# Compute the significance perturbation (z score) for a given metabolite, between two conditions

## Group metabolites based on whether they are organic acids, endogenous vs not endogenous

- How significant their differences are will be influenced by these factors, thus they should be grouped together for further assessment

In [36]:
import cobra.test
from cobrapy_bigg_client import client

# create genome-scale model (GEM) for engineered strain
m = cobra.io.load_json_model("iJO1366_mevalonate_pathways.json")

# cluster metabolite and reaction info for analysis of heterologous pathway effects
mevalonate_mets = ['hmgcoa_c','hmgcoa_e','mev_R_e','mev_R_c', '5pmev_c', '5pmev_e','ex_con.id','ipdp_c','ip_e'] +\
                  ['ip_c','frdp_e','frdp_c','grdp_e', 'grdp_c','ggdp_e','ipoh_e','lim_e', 'lim_c', 'bis_e']
subset_mets = ['aacoa_c','accoa_c','akg_c','acon__C_c','cit_c','icit_c','oxalcoa_c','glx_c','glyclt_c','fdp_c'] +\
              ['3pg_c','pep_c','nadp_c','nadp_e','nad_c','nad_e','amp_c','amp_e','adp_c','adp_e','atp_c','atp_e']
organic_acids = ['ac_e','ac_e','fum_c', 'for_e','glc__D_e', 'glu__L_c','lac__D_c', 'lac__D_e','pyr_c','pyr_e'] +\
                ['succ_e','succ_c']
dxp_mets = ['dxyl5p_e','dxyl5p_c','h2mb4p_c','h2mb4p_e','2me4p', '2mecdp_c','2mecdp_e', '4c2me_c', '4c2me_e']
reaction_arr_subset = ['SUCDi','MDH','ICDHyr','FUM','FRD3','CS','AKGDH','ACONTb','ACONTa','FRD2','PDH','GAPD','ENO'] +\
                      ['PGM','PGK','TPI','PYK','GLYAT','GLNS','THRA2','PGL','GND','G6PDH2r','PFK_3','FBA3','RPI'] +\
                      ['TKT1','TKT2','TALA']

# create dataframe to manage subsets
read_to_struct = []
rxn_list = []
ss_list = {}

for r in reaction_arr_subset:
    read_to_struct.append({'rxn':r, 'subsystem':m.reactions.get_by_id(r).subsystem})    
reaction_arr_subset = pd.DataFrame(read_to_struct)

# create groupings by metabolic subsystem
for s in reaction_arr_subset.subsystem.unique():
    rxn_list = rxn_list + [t for t in reaction_arr_subset[reaction_arr_subset.subsystem == s].rxn.tolist()]
    ss_list[s] = [t for t in reaction_arr_subset[reaction_arr_subset.subsystem == s].rxn.tolist()]
    
amino_acids_all = []
for i in metabolomics_diff.metabolite.unique():
    if '__L' in i or 'gly_' in i:
        if 'mal__L_c' not in i and 'glu__L_c' not in i:
            amino_acids_all.append(str(i))

Using license file c:\gurobi910\gurobi.lic
Academic license - for non-commercial use only - expires 2022-11-21


List of the amino acid metabolites

In [37]:
amino_acids_all

['gly_c',
 'ala__L_c',
 'ser__L_c',
 'pro__L_c',
 'val__L_c',
 'thr__L_c',
 'cys__L_c',
 'leu__L_c',
 'ile__L_c',
 'asn__L_c',
 'asp__L_c',
 'gln__L_c',
 'lys__L_c',
 'met__L_c',
 'his__L_c',
 'phe__L_c',
 'arg__L_c',
 'tyr__L_c',
 'trp__L_c']

## Compute Z-scores for each grouping of metabolite for all engineered strains (test vs control)

- z = x- mean(p) / std(p)
- mean and std consider differences to WT across all metabolites within a class: 
         (i) intra vs exo; mev-related (dxp vs mev) 
         (ii) non-mev (organic acids vs amino vs other)) across all time points for a given strain
- population mean difference referred to as: mu_p (mean(p)) (p = population)
- population std: std_p (strain vs wt difference for all measurements in a class)
- ex_con = extracellular dmapp/ipdp measurements

In [38]:
metabolomics_diff[:20]

Unnamed: 0,strain,metabolite,time_point,diff_to_wt,diff_to_wt_std,phase,metabolite_std,wt_std
0,I1,glc__D_e,0,0.985,0.342,1,1.952,1.982
1,I1,glc__D_e,2,1.044,0.505,1,1.752,1.678
2,I1,glc__D_e,4,1.096,0.656,1,1.599,1.459
3,I1,glc__D_e,6,1.082,0.568,1,1.487,1.375
4,I1,glc__D_e,8,1.054,0.421,1,1.337,1.269
5,I1,glc__D_e,10,1.044,0.359,1,1.253,1.2
6,I1,glc__D_e,12,1.065,0.431,1,1.247,1.171
7,I1,glc__D_e,16,1.028,0.266,1,1.153,1.122
8,I1,glc__D_e,18,1.102,0.482,2,1.147,1.041
9,I1,glc__D_e,20,1.084,0.427,2,1.104,1.018


#### Add z-score to "metabolites_diff" dataframe

In [None]:
# note, this takes roughly 5-10 min - note the progress bar below to help track computation

metabolomics_diff = calculate_metabolite_Zscore(metabolomics_diff)
metabolomics_diff[:20]

 [                  0%                  ]
 [********         22%                  ]  2 of 9 complete


  tmp_z_1 = (np.true_divide(tt-mu_p_e,std_p_e))


 [*************    33%                  ]  3 of 9 complete
 [*****************44%                  ]  4 of 9 complete


### Notes on what the Z score means:

For differences based on subtraction:
- negative Z score indicates depletion: WT flux > strain flux (strain - WT < 0)
- positive Z score indicates enrichment: WT flux < strain flux (strain - WT > 0)

For differences based on division:
- negative Z score indicates depletion: WT flux > strain flux (strain / WT < 1)
- positive Z score indicates enrichment: WT flux < strain flux (strain / WT > 1)    
    
## Characterize shift mechanisms into six different dynamic profile categories

In this section, we use the above dataframe to compute dynamic difference profiles:
    
    (i) no change
    (ii) constant change
    (iii) deviation
    (iv) return
    (v) shift
    (vi) transient
    
![image.png](attachment:image.png)

The following is computed for each metabolite per strain:
- number of deviations from 0 (float)
- number of returns to 0 (float)
- start at 0 (True/False)
- end at 0 (True/False)

In [None]:
DF_MET_clustering = cluster_metabolite_shifts(metabolomics_diff)
DF_MET_clustering.head(20)

## Profile 1: "No change"

In [None]:
df = DF_MET_clustering[(DF_MET_clustering.ends_at_zero == True) & (DF_MET_clustering.starts_at_zero == True) & 
                       (DF_MET_clustering.leave_zero == 0) & (DF_MET_clustering.return_to_zero == 0)]

print("Non-changing metabolites:", df.metabolite.unique().tolist())
df.set_index(['strain','metabolite']).head(30)

## Profile 2: "Constant change"

![image.png](attachment:image.png)

In [None]:
df = DF_MET_clustering[(DF_MET_clustering.ends_at_zero == False) & (DF_MET_clustering.starts_at_zero == False) 
                       & (DF_MET_clustering.leave_zero == 0) & (DF_MET_clustering.return_to_zero == 0)]

print("Constant changing metabolites:", df.metabolite.unique().tolist())
df.set_index(['strain','metabolite']).head(30)

When do these shifts occur? (since they are constant- all time points)

In [None]:
df_omics = metabolomics_diff
find_shifts_in_time(df, df_omics).set_index(['strain','metabolite']).head(30)

## Profile 3: "Deviation"

![image.png](attachment:image.png)

In [None]:
df = DF_MET_clustering[(DF_MET_clustering.ends_at_zero == False) & (DF_MET_clustering.starts_at_zero == True) & 
                       (DF_MET_clustering.leave_zero == 1) & (DF_MET_clustering.return_to_zero == 0)]

df.set_index(['strain','metabolite']).head(30)

When do these shifts occur?

In [None]:
df_omics = metabolomics_diff
find_shifts_in_time(df, df_omics).set_index(['strain','metabolite']).head(30)

## Profile 4: "Return"

![image.png](attachment:image.png)

In [None]:
DF_MET_clustering[(DF_MET_clustering.ends_at_zero == True) & 
                  (DF_MET_clustering.starts_at_zero == False) & (DF_MET_clustering.leave_zero == 0) & 
                  (DF_MET_clustering.return_to_zero == 1)]

## Profile 5: "Shift"

![image.png](attachment:image.png)

In [None]:
DF_MET_clustering[(DF_MET_clustering.ends_at_zero == True) & (DF_MET_clustering.starts_at_zero == True) & 
                  (DF_MET_clustering.leave_zero == 1) & (DF_MET_clustering.return_to_zero == 1)]

## Profile 6: "Transient"

![image.png](attachment:image.png)

In [None]:
DF_MET_clustering[(DF_MET_clustering.ends_at_zero == False) & (DF_MET_clustering.starts_at_zero == False) & 
                  (DF_MET_clustering.leave_zero > 1) & (DF_MET_clustering.return_to_zero > 1)]

# Visualization

### Plot raw metabolite concentrations over time, loops over all strains, takes error estimates from previous step

In [None]:
metabolomics_diff[:20]

In [None]:
from numpy import true_divide

plot_raw_strain_data('I',['I1','I2','I3','DH1'], metabolomics_diff);

### Plot heat map showing the significant changing metabolites per time point (hour = 4), for all strains

In [None]:
strain_list = ['I1','I2','I3','L1','L2','L3','B1','B2']
mets_to_plot = ['accoa_c','akg_c','acon__C_c','cit_c','icit_c','glx_c','glyclt_c','fdp_c','3pg_c','pep_c',
                'nadp_c','nad_c','amp_c']
time_list = [4,18,48] # Available time = [0, 2, 4, 6, 8, 10, 12, 16, 18, 20, 24, 36, 48, 72]

z_score_heat_plot(strain_list, mets_to_plot, time_list, metabolomics_diff)

## Plot dynamic difference profiles between a strain and WT metabolite

### Deviations profile
The rows are selected as "all" which means 'I1','I2','I3','L1','L2','L3','B1',and 'B2'in order. Plotting the reactions associated with the "Deviation difference profile."

![image.png](attachment:image.png)

### The list of metabolites with the "Deviations" profile

In [None]:
DF_MET_clustering[(DF_MET_clustering.ends_at_zero == False) & (DF_MET_clustering.starts_at_zero == True) 
                       & (DF_MET_clustering.leave_zero == True) & (DF_MET_clustering.return_to_zero == False)]

### Plot the data for the "Deviations" profile

In [None]:
df = DF_MET_clustering[(DF_MET_clustering.ends_at_zero == False) & (DF_MET_clustering.starts_at_zero == True) 
                       & (DF_MET_clustering.leave_zero == True) & (DF_MET_clustering.return_to_zero == False)]

tmp_df = find_shifts_in_time(df, metabolomics_diff)

list_mets = tmp_df[~tmp_df.metabolite.isin(mevalonate_mets)].metabolite.unique()[:4]

# All = ['I1','I2','I3','L1','L2','L3','B1','B2']
plot_dynamic_profile_differences('all', list_mets, metabolomics_diff)

### Constant Change profile

![image.png](attachment:image.png)

### The list of metabolites in the "Constant change" profile

In [None]:
DF_MET_clustering[(DF_MET_clustering.ends_at_zero == False) & (DF_MET_clustering.starts_at_zero == False) 
                       & (DF_MET_clustering.leave_zero == False) & (DF_MET_clustering.return_to_zero == False)]

In [None]:
df = DF_MET_clustering[(DF_MET_clustering.ends_at_zero == False) & (DF_MET_clustering.starts_at_zero == False) 
                       & (DF_MET_clustering.leave_zero == False) & (DF_MET_clustering.return_to_zero == False)]

tmp_df = find_shifts_in_time(df, metabolomics_diff)

list_mets = tmp_df[~tmp_df.metabolite.isin(mevalonate_mets)].metabolite.unique()[:4]

# All = ['I1','I2','I3','L1','L2','L3','B1','B2']
plot_dynamic_profile_differences('all', list_mets, metabolomics_diff)

### Shift profile

In [None]:
DF_MET_clustering[(DF_MET_clustering.ends_at_zero == True) & (DF_MET_clustering.starts_at_zero == True) & 
                  (DF_MET_clustering.leave_zero == 1) & (DF_MET_clustering.return_to_zero == 1)]

In [None]:
df = DF_MET_clustering[(DF_MET_clustering.ends_at_zero == True) & (DF_MET_clustering.starts_at_zero == True) & 
                  (DF_MET_clustering.leave_zero == 1) & (DF_MET_clustering.return_to_zero == 1)]

tmp_df = find_shifts_in_time(df, metabolomics_diff)

list_mets = tmp_df[~tmp_df.metabolite.isin(mevalonate_mets)].metabolite.unique()[:4]

# All = ['I1','I2','I3','L1','L2','L3','B1','B2']
plot_dynamic_profile_differences('all', list_mets, metabolomics_diff)