# Articles tables
This notebook contains the code required to reproduce the table shown in the pseudo batch transformation article. 

## loading fedbatch data

In [8]:
import pathlib
import pandas as pd
from patsy import dmatrices
import statsmodels.api as sm
import numpy as np

from pseudobatch import pseudobatch_transform_pandas
from pseudobatch.datasets._dataloaders import _prepare_simulated_dataset

In [9]:
# import the dataset from the article/data folder, this makes sure if the simulations are rerun the new data is used
data_path = pathlib.Path('../data/standard_fed-batch_process.csv')
fedbatch_df = _prepare_simulated_dataset(data_path)

Make a dataframe that only contains the measurements at the sampling time points.

In [10]:
fedbatch_df_measurements_only = (fedbatch_df
    .query('sample_volume > 0')
    .copy()
    .reset_index(drop=True)
)

We will now do the pseudo batch transformation of the simulated measurements.

In [11]:
glucose_in_feed = fedbatch_df_measurements_only['s_f'].iloc[0]

fedbatch_df_measurements_only[["c_Biomass_pseudo", "c_Glucose_pseudo", "c_Product_pseudo", "c_CO2_pseudo"]] = pseudobatch_transform_pandas(
    fedbatch_df_measurements_only,
    measured_concentration_colnames=["c_Biomass", "c_Glucose", "c_Product", "c_CO2"],
    reactor_volume_colname="v_Volume",
    accumulated_feed_colname="v_Feed_accum",
    sample_volume_colname="sample_volume",
    concentration_in_feed=[0, glucose_in_feed, 0, 0],
)

## Calculate the growth rate using the pseudo batch transformation

Now we can calculate the corrected biomass using the pseudo batch transformation

In [12]:
def fit_ols_model(formula_like: str, data: pd.DataFrame) -> sm.regression.linear_model.RegressionResultsWrapper:
    y, X = dmatrices(formula_like, data)
    model = sm.OLS(endog=y, exog=X)
    res = model.fit()
    return res

Now we can fit the growth rate for both the transform and raw biomass data.

In [13]:
res_mu_hat_corrected = fit_ols_model("np.log(c_Biomass_pseudo) ~ timestamp", fedbatch_df_measurements_only)
res_mu_hat_noncorrected = fit_ols_model("np.log(m_Biomass) ~ timestamp", fedbatch_df_measurements_only)

## Calculate yields using the corrected fedbatch data
....

We will first estimate the biomass based substrate yield. To estimate this for the raw data we first need to calculate the consumed glucose time series. This is simply done using the following equation:

$$
cm_{Glucose}(t) = m_{Glucose}(0) + \int_0^t v_{feed}(t) dt \cdot c_{Glucose \: feed} - m_{Glucose}(t)
$$

where $cm_{Glucose}(t)$ is the consumed mass of glucose at timepoint $t$, $m_{Glucose}(t)$ is the mass of glucose at timepoint $t$, $v_{feed}(t)$ is the feeding profile (the time integral is the accumulated feed), $c_{Glucose \: feed}$ is the concentration of glucose in the feeding medium and $m_{Glucose}(t)$ is the measured mass of glucose at time $t$.

In [18]:
initial_glucose = fedbatch_df_measurements_only['m_Glucose'].iloc[0]
fedbatch_df_measurements_only['m_Glucose_consumed'] = fedbatch_df_measurements_only['v_Feed_accum'] * glucose_in_feed - fedbatch_df_measurements_only['m_Glucose'] - initial_glucose

The pseudobatch transformation automatically integrates the feeding volume, thus for the pseudo concentrations we don't need to do further processing. Now we are ready to estimate the yield coefficients. We will follow the standard of yield coefficient being positive.

In [20]:
res_yxs_noncorrected = fit_ols_model(formula_like = "m_Glucose_consumed ~ m_Biomass", data= fedbatch_df_measurements_only)
res_yxs_corrected = fit_ols_model(formula_like = "c_Glucose_pseudo ~ c_Biomass_pseudo", data= fedbatch_df_measurements_only)

print(f"Fitted Yxs from raw data: {np.abs(res_yxs_noncorrected.params[1]).round(5)}")
print(f"Fitted Yxs from pseudo batch transformed data: {np.abs(res_yxs_corrected.params[1]).round(5)}")
print(f"True Yxs: {fedbatch_df.Yxs.iloc[0].round(5)}")

Fitted Yxs from raw data: 2.68841
Fitted Yxs from pseudo batch transformed data: 1.85
True Yxs: 1.85


We see that substrate yield coefficient is correctly calculated using the pseudo concentrations, while the estimate is wrong using the non-transformed data.

## Calculate product yield
We will now proceed to the biomass based product yield. The simulated fermentation process has to products: a generic product and CO2.

In [21]:
res_yxp_noncorrected = fit_ols_model(formula_like = "m_Product ~ m_Biomass", data= fedbatch_df_measurements_only)
res_yxp_corrected = fit_ols_model(formula_like = "c_Product_pseudo ~ c_Biomass_pseudo", data= fedbatch_df_measurements_only)

print(f"Fitted Yxp from raw data: {res_yxp_noncorrected.params[1].round(5)}")
print(f"Fitted Yxp from pseudo batch transformed data: {res_yxp_corrected.params[1].round(5)}")
print(f"True Yxp: {fedbatch_df.Yxp.iloc[0].round(5)}")

Fitted Yxp from raw data: 0.82967
Fitted Yxp from pseudo batch transformed data: 0.82151
True Yxp: 0.82151


In [23]:
res_yxco2_noncorrected = fit_ols_model(formula_like = "m_CO2 ~ m_Biomass", data= fedbatch_df_measurements_only)
res_yxco2_corrected = fit_ols_model(formula_like = "c_CO2_pseudo ~ c_Biomass_pseudo", data= fedbatch_df_measurements_only)

print(f"Fitted Yxco2 from raw data: {res_yxco2_noncorrected.params[1].round(5)}")
print(f"Fitted Yxco2 from pseudo batch transformed data: {res_yxco2_corrected.params[1].round(5)}")
print(f"True Yxco2: {fedbatch_df.Yxco2.iloc[0].round(5)}")

Fitted Yxco2 from raw data: 0.04564
Fitted Yxco2 from pseudo batch transformed data: 0.04519
True Yxco2: 0.04519


For the product yield coefficient the error of not using the pseudo batch transformation is small, but still the yield based on the pseudo concentrations are more accurate.

## Creating overview table
In this section we simply collects all the above results in a table.

In [24]:
overview_table_raw = pd.DataFrame.from_dict({
        "Yxs": [res_yxs_noncorrected.params[1], res_yxs_corrected.params[1], fedbatch_df.Yxs.iloc[0]],
        "Yxp": [res_yxp_noncorrected.params[1], res_yxp_corrected.params[1], fedbatch_df.Yxp.iloc[0]],
        "Yxco2": [res_yxco2_noncorrected.params[1], res_yxco2_corrected.params[1], fedbatch_df.Yxco2.iloc[0]],
        "mu": [res_mu_hat_noncorrected.params[1], res_mu_hat_corrected.params[1], fedbatch_df.mu_true.iloc[0]],
    }, 
    columns=[
        "Non-corrected", "Corrected", "True"
    ],
    orient="index",
)

In [25]:
def relative_error(true_value, predicted_value):
    return (true_value - predicted_value) / true_value

def combine_value_and_error(value: float, error: float)-> str:
    return f"{value:.2f} ({error:.2f})"

def prepare_output_strings(true_value: float, predicted_value: float)-> str:
    error = relative_error(true_value, predicted_value)
    error_in_percent = error * 100
    return [combine_value_and_error(value=v, error=e) for v, e in zip(predicted_value, error)]


In [26]:
overview_table_clean = (
    overview_table_raw
    .assign(rel_error_noncorrected=prepare_output_strings(overview_table_raw["True"], overview_table_raw["Non-corrected"]))
    .assign(rel_error_corrected=prepare_output_strings(overview_table_raw["True"], overview_table_raw["Corrected"]))
    .round(2)
    .rename(columns={
        "rel_error_noncorrected": "Non-corrected (rel. error %)",
        "rel_error_corrected": "Corrected (rel. error %)"
    })
    .drop(columns=["Non-corrected", "Corrected"])
)
overview_table_clean

Unnamed: 0,True,Non-corrected (rel. error %),Corrected (rel. error %)
Yxs,1.85,2.69 (-0.45),-1.85 (2.00)
Yxp,0.82,0.83 (-0.01),0.82 (-0.00)
Yxco2,0.05,0.05 (-0.01),0.05 (-0.00)
mu,0.1,0.07 (0.34),0.10 (-0.00)
