# Pseudo batch transformation


## loading fedbatch data

In [37]:
import sys
import os
import pathlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from patsy import dmatrices
import statsmodels.api as sm

from pseudobatch import pseudobatch_transform_pandas
from pseudobatch.datasets._dataloaders import _prepare_simulated_dataset

In [38]:
FIGURES_DIR = pathlib.Path('../figures')

In [39]:
# import the dataset from the article/data folder, this makes sure if the simulations are rerun the new data is used
data_path = pathlib.Path('../data/standard_fed-batch_process.csv')
fedbatch_df = _prepare_simulated_dataset(data_path)

To calculate the consumed glucose we require a measurement of the glucose concentration when the feeding is initiated (time 0). Here we assume that we have a measurement at time 0, but if a measurement is not available therefore several options:
1. Assume that initial glucose is 0. If the fed-batch is properly setup, the glucose concentration should be small at time 0. Thus, this assumption should not bias the results significantly. 
2. Estimate the concentration based on experience from similar batch cultures

As stated earlier fed-batch fermentations typically has a low substrate concentration when the feeding is initiated. 

The the initial glucose concentration typically constitute a small fraction of the substrate add over the hole course of the fed-batch process. Thus in most cases it is not crucial to have a very accurate estimate of the initial substrate concentration. One should be cautious if the feeding phase the short, or if the initial substrate concentration is significant compared to the substrate added through feeding.

In [41]:
def consumed_substrate(concentration_in_feed, accumulated_feed_volume, measured_substrate_mass, initial_substrate_mass):
    return initial_substrate_mass + accumulated_feed_volume * concentration_in_feed - measured_substrate_mass

glucose_in_feed = fedbatch_df.s_f.iloc[0] # 100 g/L, substrate (glucose) concentration in feed is store in the dataframe 

fedbatch_df['m_Glucose_consumed'] = consumed_substrate(
    concentration_in_feed=glucose_in_feed,
    accumulated_feed_volume=fedbatch_df['v_Feed_accum'],
    measured_substrate_mass=fedbatch_df['m_Glucose'],
    initial_substrate_mass=0
)

In [42]:
fedbatch_df_measurements_only = (fedbatch_df
    .query('sample_volume > 0')
    .copy()
    .reset_index(drop=True)
)

In [44]:
fedbatch_df_measurements_only[["c_Biomass_pseudo", "c_Glucose_pseudo", "c_Product_pseudo", "c_CO2_pseudo"]] = pseudobatch_transform_pandas(
    fedbatch_df_measurements_only,
    measured_concentration_colnames=["c_Biomass", "c_Glucose", "c_Product", "c_CO2"],
    reactor_volume_colname="v_Volume",
    accumulated_feed_colname="v_Feed_accum",
    sample_volume_colname="sample_volume",
    concentration_in_feed=[0, glucose_in_feed, 0, 0],
)

In [45]:
fedbatch_df_measurements_only

Unnamed: 0,Kc_s,mu_max,Yxs,Yxp,Yxco2,F0,mu0,s_f,sample_volume,timestamp,...,c_Glucose,c_Biomass,c_Product,c_CO2,mu_true,m_Glucose_consumed,c_Biomass_pseudo,c_Glucose_pseudo,c_Product_pseudo,c_CO2_pseudo
0,0.15,0.3,1.85,0.82151,0.045193,0.159031,0.1,100.0,100.0,10.0,...,0.075012,1.337854,0.694737,0.038219,0.100011,1514.398639,1.337854,-1.490688,0.694737,0.038219
1,0.15,0.3,1.85,0.82151,0.045193,0.159031,0.1,100.0,100.0,14.545455,...,0.07501,2.078116,1.308552,0.071987,0.100009,2826.408824,2.107737,-2.91497,1.327204,0.073013
2,0.15,0.3,1.85,0.82151,0.045193,0.159031,0.1,100.0,100.0,19.090909,...,0.075111,3.203015,2.241303,0.1233,0.100099,4667.783417,3.320595,-5.158758,2.323579,0.127826
3,0.15,0.3,1.85,0.82151,0.045193,0.159031,0.1,100.0,100.0,23.636364,...,0.07502,4.879772,3.631646,0.199786,0.100018,7223.688645,5.231542,-8.69401,3.893442,0.214188
4,0.15,0.3,1.85,0.82151,0.045193,0.159031,0.1,100.0,100.0,28.181818,...,0.075064,7.307921,5.645033,0.310547,0.100057,10726.438732,8.24205,-14.263449,6.366604,0.350243
5,0.15,0.3,1.85,0.82151,0.045193,0.159031,0.1,100.0,100.0,32.727273,...,0.07501,10.681703,8.442525,0.464444,0.100009,15462.638428,12.98505,-23.038,10.263028,0.564595
6,0.15,0.3,1.85,0.82151,0.045193,0.159031,0.1,100.0,100.0,37.272727,...,0.075015,15.109112,12.113671,0.666403,0.100014,21782.768703,20.457389,-36.861827,16.401631,0.902295
7,0.15,0.3,1.85,0.82151,0.045193,0.159031,0.1,100.0,100.0,41.818182,...,0.075044,20.503292,16.586449,0.912462,0.100039,30127.024138,32.229719,-58.640638,26.072721,1.434326
8,0.15,0.3,1.85,0.82151,0.045193,0.159031,0.1,100.0,100.0,46.363636,...,0.075043,26.510976,21.567936,1.186506,0.100038,41085.977233,50.776554,-92.952283,41.309136,2.272519
9,0.15,0.3,1.85,0.82151,0.045193,0.159031,0.1,100.0,100.0,50.909091,...,0.075049,32.568141,26.590453,1.462807,0.100044,55524.66644,79.996288,-147.00879,65.313447,3.593056


## Calculate the growth rate using the pseudo batch transformation

Now we can calculate the corrected biomass using the pseudo batch transformation

In [46]:
def fit_ols_model(formula_like: str, data: pd.DataFrame) -> sm.regression.linear_model.RegressionResultsWrapper:
    y, X = dmatrices(formula_like, data)
    model = sm.OLS(endog=y, exog=X)
    res = model.fit()
    return res

Now we can fit the growth rate for both the transform and raw biomass data.

In [47]:
res_mu_hat_corrected = fit_ols_model("np.log(c_Biomass_pseudo) ~ timestamp", fedbatch_df_measurements_only)
res_mu_hat_noncorrected = fit_ols_model("np.log(m_Biomass) ~ timestamp", fedbatch_df_measurements_only)

## Calculate yields using the corrected fedbatch data
....

We can calculate the consumed pseudo batch glucose simply by subtracting the initial pseudo batch transformed glucose by the pseudo batch transformed measurements at each time point.

In [61]:
fedbatch_df_measurements_only['c_Glucose_consumed_pseudo'] = fedbatch_df_measurements_only['c_Glucose_pseudo'].iloc[0] - fedbatch_df_measurements_only['c_Glucose_pseudo']
res_yxs_noncorrected = fit_ols_model(formula_like = "m_Glucose_consumed ~ m_Biomass", data= fedbatch_df_measurements_only)
res_yxs_corrected = fit_ols_model(formula_like = "c_Glucose_consumed_pseudo ~ c_Biomass_pseudo", data= fedbatch_df_measurements_only)

print(f"Fitted Yxs from raw data: {res_yxs_noncorrected.params[1].round(5)}")
print(f"Fitted Yxs from pseudo batch transformed data: {res_yxs_corrected.params[1].round(5)}")
print(f"True Yxs: {fedbatch_df.Yxs.iloc[0].round(5)}")

Fitted Yxs from raw data: 2.68841
Fitted Yxs from pseudo batch transformed data: 1.85
True Yxs: 1.85


In [51]:
res_yxp_noncorrected = fit_ols_model(formula_like = "m_Product ~ m_Biomass", data= fedbatch_df_measurements_only)
res_yxp_corrected = fit_ols_model(formula_like = "c_Product_pseudo ~ c_Biomass_pseudo", data= fedbatch_df_measurements_only)

print(f"Fitted Yxp from raw data: {res_yxp_noncorrected.params[1].round(5)}")
print(f"Fitted Yxp from pseudo batch transformed data: {res_yxp_corrected.params[1].round(5)}")
print(f"True Yxp: {fedbatch_df.Yxp.iloc[0].round(5)}")

Fitted Yxp from raw data: 0.82967
Fitted Yxp from pseudo batch transformed data: 0.82151
True Yxp: 0.82151


Calculating the specific glucose uptake rate using np.gradient() which calculates the finite difference between the points.

The method perform reasonably well, but is slightly biased. This is mostly due to the centering of the biomass division. This introduce a bias because the cells grow exponentially and not linearly.

## Calculate product yield

In [52]:
res_yxp_noncorrected = fit_ols_model(formula_like = "m_Product ~ m_Biomass", data= fedbatch_df_measurements_only)
res_yxp_corrected = fit_ols_model(formula_like = "c_Product_pseudo ~ c_Biomass_pseudo", data= fedbatch_df_measurements_only)

print(res_yxp_noncorrected.params[1])
print(res_yxp_corrected.params[1])
print(fedbatch_df.Yxp.iloc[0])

0.8296739566642375
0.8215102466751044
0.8215102466751038


## Calculate CO2 yield

In [53]:
res_yxco2_noncorrected = fit_ols_model(formula_like = "m_CO2 ~ m_Biomass", data= fedbatch_df_measurements_only)
res_yxco2_corrected = fit_ols_model(formula_like = "c_CO2_pseudo ~ c_Biomass_pseudo", data= fedbatch_df_measurements_only)

print(res_yxco2_noncorrected.params[1])
print(res_yxco2_corrected.params[1])
print(fedbatch_df.Yxco2.iloc[0])

0.04564243854099134
0.045193332445214146
0.0451933324452141


In [54]:
overview_table_raw = pd.DataFrame.from_dict({
        "Yxs": [res_yxs_noncorrected.params[1], res_yxs_corrected.params[1], fedbatch_df.Yxs.iloc[0]],
        "Yxp": [res_yxp_noncorrected.params[1], res_yxp_corrected.params[1], fedbatch_df.Yxp.iloc[0]],
        "Yxco2": [res_yxco2_noncorrected.params[1], res_yxco2_corrected.params[1], fedbatch_df.Yxco2.iloc[0]],
        "mu": [res_mu_hat_noncorrected.params[1], res_mu_hat_corrected.params[1], fedbatch_df.mu_true.iloc[0]],
    }, 
    columns=[
        "Non-corrected", "Corrected", "True"
    ],
    orient="index",
)

In [55]:
def relative_error(true_value, predicted_value):
    return (true_value - predicted_value) / true_value

def combine_value_and_error(value: float, error: float)-> str:
    return f"{value:.2f} ({error:.2f})"

def prepare_output_strings(true_value: float, predicted_value: float)-> str:
    error = relative_error(true_value, predicted_value)
    error_in_percent = error * 100
    return [combine_value_and_error(value=v, error=e) for v, e in zip(predicted_value, error)]


In [57]:
overview_table_clean = (
    overview_table_raw
    .assign(rel_error_noncorrected=prepare_output_strings(overview_table_raw["True"], overview_table_raw["Non-corrected"]))
    .assign(rel_error_corrected=prepare_output_strings(overview_table_raw["True"], overview_table_raw["Corrected"]))
    .round(2)
    .rename(columns={
        "rel_error_noncorrected": "Non-corrected (rel. error %)",
        "rel_error_corrected": "Corrected (rel. error %)"
    })
    .drop(columns=["Non-corrected", "Corrected"])
)
overview_table_clean

Unnamed: 0,True,Non-corrected (rel. error %),Corrected (rel. error %)
Yxs,1.85,2.69 (-0.45),-1.85 (2.00)
Yxp,0.82,0.83 (-0.01),0.82 (-0.00)
Yxco2,0.05,0.05 (-0.01),0.05 (-0.00)
mu,0.1,0.07 (0.34),0.10 (-0.00)
