#### Definition of the model

* finding the optimal parameters
* finding the optimal equations for mu and qs
* applying **Latin Hypercube Sampling** for generating parameter sets
* calculating the **RMSE** of model and experiment

#### Code

In [1]:
import pandas as pd
import numpy as np
import yaml
from sklearn.metrics import mean_squared_error
from C_model import get_LHS_samples, model_optimization, plot_estimation

In [2]:
# Load experimental data
df_exp = pd.read_csv('data/data_combined.csv')
biomass_exp = df_exp['Biomass [g/L]']
substrate_exp = df_exp['Glucose [g/L]']

In [3]:
# Load parameters from YAML file
with open('config/parameters.yml', 'r') as file:
    param = yaml.safe_load(file)

! [Cell growth model] (images/correlation_muqs.png)

The growth rate and the substrate uptake rate are dependend on each other. Glucose is present in the media. From there the cell consumes glucose which is described by the glucose uptake rate qs. Many processes are supplied by the energy production due to the glucose such as growth described by the growth rate mu, maintenance, product formation and so on. Consequently, we will find the optimal equation for the glucose uptake and calculate the growth rate mu by $mu = qs * Yxs$.

In [4]:
qs0 = lambda qs_max, c_glucose, Ks: qs_max * c_glucose / (Ks + c_glucose) # -- MONOD
qs1 = lambda qs_max, c_glucose, Ks, Ki, glu_met: qs_max * c_glucose / (Ks + c_glucose) * (Ki / (Ki + glu_met)) # -- MONOD + NON COMPETITIVE INHIBITION
qs2 = lambda qs_max, c_glucose, Ks, c_biomass, lag: qs_max * c_glucose / (Ks + c_glucose) * (1 / (np.exp(c_biomass * lag))) # -- MONOD + METABOLIZED GLU

In [5]:
# Saving all equations in each list
qs_all=[qs0, qs1, qs2]

In [6]:
# Root mean squared error is the objective function
def objective_function(parameters, qs_eq, num_qs):
    # Solve the model using the optimal parameters
    time_pred, biomass_pred, substrate_pred, volume_pred = model_optimization(param, parameters, qs_eq, num_qs)  # Solve the model using the current parameters
    biomass = pd.concat([biomass_exp, pd.Series(biomass_pred)], axis=1, keys=['biomass_exp', 'biomass_pred']).dropna()
    biomass_exp_ = biomass['biomass_exp'].values
    biomass_pred_ = biomass['biomass_pred'].values
    mse_x = mean_squared_error(biomass_exp_, biomass_pred_)  # Calculate mean squared error for biomass

    glucose = pd.concat([substrate_exp, pd.Series(substrate_pred)], axis=1, keys=['substrate_exp', 'substrate_pred']).dropna()
    substrate_exp_ = glucose['substrate_exp'].values
    substrate_pred_ = glucose['substrate_pred'].values
    mse_s = mean_squared_error(substrate_exp_, substrate_pred_)  # Calculate mean squared error for substrate
    
    # Calculate the combined rmse
    mse = (mse_x + mse_s)/2
    rmse = np.sqrt(mse)  # Calculate root mean squared error
    return rmse, time_pred, biomass_pred, substrate_pred, volume_pred

In [7]:
# Set the number of samples and parameters
num_samples = 100
num_parameters = 6

# Define the ranges for each parameter
parameter_bounds = [
    [0.3, 0.5],    # Range for parameter 0 Yxs
    [0.1, 1.5],       # Range for parameter 1 qs_max
    [0.1, 20.0],    # Range for parameter 2 - Ks
    [0.1, 20.0],    # Range for parameter 3 - Ki
    [0.0, 0.2],     # Range for parameter 4 - m_s
    [0.001, 1.0],     # Range for parameter 5 - lag
]

In [8]:
LHS_samples = get_LHS_samples(num_samples, num_parameters, parameter_bounds)
LHS_samples.shape

(100, 6)

In [10]:
df_all_sets = pd.DataFrame(columns=['set', 'qs', 'Yxs', 'qs_max', 'Ks', 'Ki', 'm_s', 'lag', 'rmse'])
for set_num in range(LHS_samples.shape[0]):
    # Save all parameters and equations and the RMSE in a dataframe
    ## with the beginning of one set a new rmse_overview will be created
    rmse_one_set = []
    #key = f'set{set_num}' ; init_p = est_mu_max[key]
    init_p = list(LHS_samples[set_num, :])
    for j in range(len(qs_all)):
        qs_eq = qs_all[j]; num_qs = j   

        # Make the predictions and calculate the error
        rmse, time_pred, biomass_pred, substrate_pred, volume_pred = objective_function(init_p, qs_eq, num_qs)
        # save the parameters in a dataframe
        append_list=[set_num, j, init_p[0], init_p[1], init_p[2], init_p[3], init_p[4], init_p[5], round(rmse, 3)]
        rmse_one_set.append(append_list)

        # Make a plot and save it
        title = f'set{set_num}/ qs{j} - rmse: {round(rmse, 3)}'
        plot_name = f'set{set_num}_qs{j}_rmse{int(rmse)}'

        if rmse <= float(5):
            plot_estimation(time_pred, biomass_pred, substrate_pred, volume_pred, title, plot_name, set_num)
        
    # save the parameters of one set
    df_1set = pd.DataFrame(rmse_one_set, columns=['set', 'qs', 'Yxs', 'qs_max', 'Ks', 'Ki', 'm_s', 'lag', 'rmse'])
    df_all_sets = pd.concat([df_all_sets,df_1set], axis=0, ignore_index=True)


df_all_sets.sort_values(by=['rmse'], ascending=True, inplace=True)
df_all_sets.to_csv(f'data/estimation/0207_1/data.csv')

df_all_sets.head(10)

Unnamed: 0,set,qs,Yxs,qs_max,Ks,Ki,m_s,lag,rmse
269,89,2,0.406619,0.84671,1.355904,13.652274,0.086777,0.265662,5.834
32,10,2,0.450759,1.348163,16.248245,18.284727,0.011123,0.305023,5.859
41,13,2,0.397879,1.15137,4.309711,14.500328,0.095019,0.369708,5.93
5,1,2,0.453494,1.056616,1.233745,8.56869,0.033991,0.398989,5.99
293,97,2,0.358769,1.049629,17.114502,19.181768,0.028571,0.180705,6.229
104,34,2,0.393007,1.075469,7.31797,6.362284,0.013123,0.35936,6.306
266,88,2,0.478655,0.893121,5.093273,0.723069,0.056429,0.417355,6.478
257,85,2,0.345029,0.93214,8.583261,13.174963,0.002042,0.297929,6.496
143,47,2,0.408087,1.244597,16.68108,16.672167,0.163676,0.289621,6.5
65,21,2,0.47143,0.562694,2.25367,3.852938,0.129934,0.37326,6.507


In [None]:
df_LHS = pd.read_csv('data/estimation/LHS_sampling/data.csv')