In [175]:
import pandas as pd
import numpy as np
from sklearn.metrics import root_mean_squared_error, r2_score
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPRegressor
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

pio.templates.default = "plotly_white"
pcolors = px.colors.qualitative.T10
pcolors25 = px.colors.qualitative.Alphabet


# Conclusion

* Instant-ANN Best

In [176]:
def r2(y, y_pred):
    return round(r2_score(y.flatten(), y_pred.flatten()), 3)


def absolute_rmse(y, y_pred):
    return round(root_mean_squared_error(y, y_pred), 3)


def relative_rmse(y, y_pred):
    return round(root_mean_squared_error(y, y_pred) / np.std(np.array(y)), 3)


def transform_owu(owu, t_steps=15, batch_first=False):
    X_columns = [col for col in owu.columns if "X:" in col]
    X_owu = owu[X_columns].copy()
    X_owu = X_owu.sort_index(level=["run", "time"])

    C = len(X_columns)
    B = X_owu.index.get_level_values("run").nunique()
    T = t_steps

    if batch_first:
        X = np.zeros((B, T, C))
    else:
        X = np.zeros((T, C, B))

    for i, (run, group) in enumerate(X_owu.groupby(level="run")):
        if len(group) != T:
            raise ValueError(f"Run {run} does not have {T} time steps.")

        if batch_first:
            X[i, :, :] = group.values
        else:
            X[:, :, i] = group.values

    return X, X_columns



def plot_rmse_by_hidden_dims(all_train_eval, all_valid_eval, hidden_dims_options):
    train_rmse = []
    valid_rmse = []
    hidden_dims_str = [str(hidden_dims) for hidden_dims in hidden_dims_options]

    for t in all_train_eval.keys():
        train_rmse.append(
            [all_train_eval[t][tuple(dims)] for dims in hidden_dims_options]
        )
        valid_rmse.append(
            [all_valid_eval[t][tuple(dims)] for dims in hidden_dims_options]
        )

    train_rmse = np.array(train_rmse).T
    valid_rmse = np.array(valid_rmse).T

    fig = go.Figure()

    for i, hidden_dims in enumerate(hidden_dims_options):
        fig.add_trace(
            go.Scatter(
                x=list(all_train_eval.keys()),
                y=train_rmse[i],
                mode="lines+markers",
                name=f"Train - {hidden_dims}",
                line=dict(dash="solid"),
            )
        )
        fig.add_trace(
            go.Scatter(
                x=list(all_train_eval.keys()),
                y=valid_rmse[i],
                mode="lines+markers",
                name=f"Validation - {hidden_dims}",
                line=dict(dash="dash"),
            )
        )

    fig.update_layout(
        title="RMSE by Hidden Dimensions Over Time",
        xaxis_title="Time Steps",
        yaxis_title="RMSE",
        legend_title="Evaluation Type",
        width=1000,
        height=600,
    )

    fig.show()


def plot_multi_step_ann_model_eval(
    X,
    X_pred,
    X_test,
    X_test_pred,
    X_columns=None,
):
    for i, col in enumerate(X_columns):
        y = X[:, i, :].copy()
        y_pred = X_pred[:, i, :].copy()
        y_test = X_test[:, i, :].copy()
        y_test_pred = X_test_pred[:, i, :].copy()

        # Metrics for training set
        train_r2 = r2(y, y_pred)
        train_abs_rmse = absolute_rmse(y, y_pred)
        train_rel_rmse = relative_rmse(y, y_pred)

        # Metrics for testing set
        test_r2 = r2(y_test, y_test_pred)
        test_abs_rmse = absolute_rmse(y_test, y_test_pred)
        test_rel_rmse = relative_rmse(y_test, y_test_pred)

        # Plot observed vs predicted
        fig = make_subplots(
            rows=1,
            cols=2,
            subplot_titles=(
                f"Train Set - {col} <br> R^2 = {train_r2} <br> Abs RMSE = {train_abs_rmse} <br> Rel RMSE = {train_rel_rmse}",
                f"Test Set - {col} <br> R^2 = {test_r2} <br> Abs RMSE = {test_abs_rmse} <br> Rel RMSE = {test_rel_rmse}",
            ),
        )

        # Train set plot
        _, _, NUM_TRAIN = X.shape
        for i in range(NUM_TRAIN):
            fig.add_trace(
                go.Scatter(
                    x=y[:, i].reshape(-1),
                    y=y_pred[:, i].reshape(-1),
                    mode="markers",
                    name=f"Run id in Train {i}",
                    legendgroup=f"train_{i}",
                ),
                row=1,
                col=1,
            )
        fig.add_shape(
            type="line",
            x0=y_pred.min(),
            y0=y_pred.min(),
            x1=y_pred.max(),
            y1=y_pred.max(),
            layer="above",
            line=dict(dash="dash"),
        )

        # Test set plot
        _, _, NUM_TEST = X_test.shape
        for j in range(NUM_TEST):
            fig.add_trace(
                go.Scatter(
                    x=y_test[:, j].reshape(-1),
                    y=y_test_pred[:, j].reshape(-1),
                    mode="markers",
                    name=f"Run id in Test {j}",
                    legendgroup=f"test_{j}",
                ),
                row=1,
                col=2,
            )
        fig.add_shape(
            type="line",
            x0=y_test_pred.min(),
            y0=y_test_pred.min(),
            x1=y_test_pred.max(),
            y1=y_test_pred.max(),
            layer="above",
            line=dict(dash="dash"),
            row=1,
            col=2,
        )

        fig.update_layout(width=1600)
        fig.update_xaxes(title="Observed values", row=1, col=1)
        fig.update_xaxes(title="Observed values", row=1, col=2)
        fig.update_yaxes(title="Predicted values", row=1, col=1)
        fig.update_yaxes(title="Predicted values", row=1, col=2)
        fig.show()


def plot_relative_rmse_by_variables(
    X,
    X_pred,
    X_test,
    X_test_pred,
    X_columns=None,
):
    relative_rmse_train = []
    relative_rmse_test = []
    for i, col in enumerate(X_columns):
        y = X[:, i, :].copy()
        y_pred = X_pred[:, i, :].copy()
        y_test = X_test[:, i, :].copy()
        y_test_pred = X_test_pred[:, i, :].copy()

        # Metrics for training set
        train_rel_rmse = relative_rmse(y, y_pred)
        relative_rmse_train.append(train_rel_rmse)

        # Metrics for testing set
        test_rel_rmse = relative_rmse(y_test, y_test_pred)
        relative_rmse_test.append(test_rel_rmse)

    fig_rmse = go.Figure()
    fig_rmse.add_trace(
        go.Bar(
            x=X_columns,
            y=relative_rmse_train,
            name="Train Set",
            marker_color=pcolors[0],
            text=[f"{v:.2f}" for v in relative_rmse_train],
            textposition="outside",
        )
    )

    fig_rmse.add_trace(
        go.Bar(
            x=X_columns,
            y=relative_rmse_test,
            name="Test Set",
            marker_color=pcolors[1],
            text=[f"{v:.2f}" for v in relative_rmse_test],
            textposition="outside",
        )
    )

    fig_rmse.update_layout(
        barmode="group",
        title="Relative RMSE for Each Variables",
        xaxis_title="Feature",
        yaxis_title="Relative RMSE",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    fig_rmse.show()


def plot_predicted_profile(X, X_pred, X_columns, select_runs=[0], height=1000):
    max_cols_per_row = 5
    num_columns = len(X_columns)
    num_rows = (num_columns + max_cols_per_row) // max_cols_per_row

    fig = make_subplots(
        rows=num_rows, cols=min(num_columns, max_cols_per_row), subplot_titles=X_columns
    )

    color_palette = px.colors.qualitative.Plotly

    for idx, j in enumerate(select_runs):
        color = color_palette[idx % len(color_palette)]
        for i, c in enumerate(X_columns):
            row = i // max_cols_per_row + 1
            col = i % max_cols_per_row + 1
            show_legend = i == 0
            fig.add_trace(
                go.Scatter(
                    x=list(range(15)),
                    y=X[:, i, j],
                    name=f"Run {j} Observed",
                    marker=dict(color=color),
                    showlegend=show_legend,
                    legendgroup=f"group_{j}",
                ),
                row=row,
                col=col,
            )
            fig.add_trace(
                go.Scatter(
                    x=list(range(15)),
                    y=X_pred[:, i, j],
                    name=f"Run {j} Predicted",
                    line=dict(dash="dash"),
                    marker=dict(color=color),
                    showlegend=show_legend,
                    legendgroup=f"group_{j}",
                ),
                row=row,
                col=col,
            )

    fig.update_layout(
        showlegend=True,
        title_text="Process variable evolution for selected runs",
        height=height,
    )
    fig.show()

# Dataset

In [177]:
# data_type = 'interpolation'
data_type = 'interpolation'
root_path = f'dataset/datahow_2022/{data_type}/'

def read_owu_v4(file, root_path = 'dataset/datahow_2022/interpolation/'):
    data = pd.read_csv(f'{root_path}/{file}.csv')
    owu_df = data.copy()
    num_runs = len(pd.read_csv(f'{root_path}/{file}_doe.csv'))
    if 'run' not in owu_df.columns:
        owu_df.index = pd.MultiIndex.from_product(
            [list(range(num_runs)), list(range(15))], names=["run", "time"]
        )
    else:
        owu_df.set_index(['run', 'time'], inplace=True)
    return owu_df

def read_doe(file, root_path= 'dataset/datahow_2022/interpolation/'):
    data = pd.read_csv(f'{root_path}/{file}.csv', usecols=["feed_start", "feed_end", "Glc_feed_rate", "Glc_0", "VCD_0"])
    doe_df = data.copy()
    return doe_df

In [178]:
owu = read_owu_v4('owu', root_path=root_path)
doe = read_doe('owu_doe', root_path=root_path)

owu_test = read_owu_v4('owu_test', root_path=root_path)
doe_test = read_doe('owu_test_doe', root_path=root_path)

# Data-Driven Models for Simulation

## Instant-ANN
- ANN models per variable per time point
- Training: $[Z, X(t = t_{model} - 1)] \rightarrow ANN_{i, t} \rightarrow X_i(t = t_{model})$
- Testing: $[Z, \hat{X}(t = t_{model} - 1)] \rightarrow ANN_{i, t} \rightarrow X_i(t = t_{model})$

In [179]:
def fit_ann_model(X, y, hidden_dims):
    model = MLPRegressor(hidden_layer_sizes=hidden_dims, max_iter=1000, random_state=42)
    model.fit(X, y)
    return model


def fit_multi_step_ann_model(
    doe,
    owu,
    t_steps,
    hidden_dims,
    X_columns=None,
    W_columns=None,
    best_hidden_dims=None,
):
    models = {}

    for t in range(1, t_steps):
        for i, col in enumerate(X_columns):
            X_hist = owu.loc[owu.index.get_level_values("time") == (t - 1), X_columns]
            W_hist = owu.loc[owu.index.get_level_values("time") == (t - 1), W_columns]

            y_hist = owu.loc[owu.index.get_level_values("time") == t, X_columns]

            if not X_hist.empty and not y_hist.empty:
                Z = doe.values
                X_preproc = np.hstack([Z, X_hist.values, W_hist.values])
                y = y_hist.loc[:, col].values

                if best_hidden_dims:
                    hidden_dims = best_hidden_dims.get(t)
                model = fit_ann_model(X_preproc, y, hidden_dims)
                models[(i, t)] = model

    return models


def predict_multi_step_ann_model(
    doe, X0, W, multi_step_models, t_steps, X_columns=None
):
    X_hist = X0.copy()

    for t in range(1, t_steps):
        predictions = {col: [] for col in X_columns}

        for i, col in enumerate(X_columns):
            model = multi_step_models.get((i, t))

            Z = doe.values
            X_t_minus_1 = X_hist.loc[
                X_hist.index.get_level_values("time") == (t - 1), X_columns
            ]
            W_t_minus_1 = W.loc[W.index.get_level_values("time") == (t - 1)]

            X_preproc = np.hstack([Z, X_t_minus_1.values, W_t_minus_1.values])

            prediction = model.predict(X_preproc)
            predictions[col].extend(prediction)

        next_pred = pd.DataFrame(predictions)
        next_pred.index = pd.MultiIndex.from_product(
            [list(X0.index.get_level_values("run").unique()), [t]],
            names=["run", "time"],
        )
        X_hist = pd.concat([X_hist, next_pred])

    X_pred, _ = transform_owu(X_hist)
    return X_pred


def cross_val_predict_ann(X, y, hidden_dims, cv_folds):
    kf = KFold(n_splits=cv_folds, shuffle=True, random_state=42)
    rmse_train, rmse_valid = [], []

    for train_index, valid_index in kf.split(X):
        X_train, X_valid = X[train_index], X[valid_index]
        y_train, y_valid = y[train_index], y[valid_index]

        model = fit_ann_model(X_train, y_train, hidden_dims)

        y_train_pred = model.predict(X_train)
        y_valid_pred = model.predict(X_valid)

        rmse_train.append(absolute_rmse(y_train, y_train_pred))
        rmse_valid.append(absolute_rmse(y_valid, y_valid_pred))

    return np.mean(rmse_train), np.mean(rmse_valid)


def multi_step_ann_cross_validation_for_each_day(
    doe, owu, X_columns, W_columns, hidden_dims_options, cv_folds=5
):
    all_train_eval = {}
    all_valid_eval = {}

    t_steps = 15  # Assuming t_steps is 15
    X, _ = transform_owu(owu, t_steps=t_steps, batch_first=False)
    B = doe.shape[0]

    for t in range(1, t_steps):
        all_train_eval[t] = {}
        all_valid_eval[t] = {}

        for hidden_dims in tqdm(hidden_dims_options):
            train_eval, valid_eval = [], []

            for i, col in enumerate(X_columns):
                X_hist = owu.loc[
                    owu.index.get_level_values("time") == (t - 1), X_columns
                ]
                W_hist = owu.loc[
                    owu.index.get_level_values("time") == (t - 1), W_columns
                ]

                y_hist = owu.loc[owu.index.get_level_values("time") == t, X_columns]

                if not X_hist.empty and not y_hist.empty:
                    Z = doe.values
                    X_preproc = np.hstack([Z, X_hist.values, W_hist.values])
                    y = y_hist.loc[:, col].values

                    rmse_train, rmse_valid = cross_val_predict_ann(
                        X_preproc, y, hidden_dims, cv_folds
                    )
                    train_eval.append(rmse_train)
                    valid_eval.append(rmse_valid)

            all_train_eval[t][tuple(hidden_dims)] = np.mean(train_eval)
            all_valid_eval[t][tuple(hidden_dims)] = np.mean(valid_eval)

    return all_train_eval, all_valid_eval, hidden_dims_options



### Setting

In [180]:
HIDDEN_DIMS = [64, 64]
X_columns = ['X:VCD', 'X:Glc', 'X:Lac', 'X:Titer']  # Specific columns to consider
W_columns = ['W:Feed']

### Data

In [181]:
X_train, X_columns = transform_owu(owu, t_steps=15, batch_first=False)
X_test, X_columns = transform_owu(owu_test, t_steps=15, batch_first=False)


### Train

In [182]:
models = fit_multi_step_ann_model(
    doe,
    owu,
    t_steps=15,
    hidden_dims=HIDDEN_DIMS,
    X_columns=X_columns,
    W_columns=W_columns
)

### Test

In [183]:
X0 = owu.loc[owu.index.get_level_values('time') < 1, X_columns]
W = owu[['W:Feed']]

X_train_pred = predict_multi_step_ann_model(
    doe,
    X0,
    W,
    X_columns=X_columns,
    multi_step_models=models,
    t_steps=15,
)

X0_test = owu_test.loc[owu_test.index.get_level_values('time') < 1, X_columns]
W_test = owu_test[['W:Feed']]

X_test_pred = predict_multi_step_ann_model(
    doe_test,
    X0_test,
    W_test,
    X_columns=X_columns,
    multi_step_models=models,
    t_steps=15,
)


In [184]:
plot_multi_step_ann_model_eval(
    X_train,
    X_train_pred,
    X_test,
    X_test_pred,
    X_columns=X_columns,
)


In [185]:
plot_relative_rmse_by_variables(
    X_train,
    X_train_pred,
    X_test,
    X_test_pred,
    X_columns=X_columns,
)

### K-Fold

In [186]:
hidden_dims_options = [
    [4],
    [5],
    [6],
    [7],
	[8],
    [8, 4],
    [8, 8],
	[9],
	[10],
    [16],
    [16, 4],
    [16, 8],
    [16, 16],
    [32],
    [32, 4],
    [32, 8],
    [32, 16],
    [32, 32],
    [64],
    [64, 4],
    [64, 8],
    [64, 16],
    [64, 32], 
]

all_train_eval, all_valid_eval, hidden_dims_options = multi_step_ann_cross_validation_for_each_day(
    doe,
    owu,
    X_columns=X_columns,
    W_columns=W_columns,
    hidden_dims_options=hidden_dims_options,
    cv_folds=5
)


100%|██████████| 23/23 [00:18<00:00,  1.27it/s]
100%|██████████| 23/23 [00:21<00:00,  1.06it/s]
100%|██████████| 23/23 [00:26<00:00,  1.17s/it]
100%|██████████| 23/23 [00:32<00:00,  1.39s/it]
100%|██████████| 23/23 [00:28<00:00,  1.26s/it]
100%|██████████| 23/23 [00:25<00:00,  1.10s/it]
100%|██████████| 23/23 [00:23<00:00,  1.00s/it]
100%|██████████| 23/23 [00:20<00:00,  1.11it/s]
100%|██████████| 23/23 [00:20<00:00,  1.10it/s]
100%|██████████| 23/23 [00:20<00:00,  1.12it/s]
100%|██████████| 23/23 [00:20<00:00,  1.13it/s]
100%|██████████| 23/23 [00:20<00:00,  1.14it/s]
100%|██████████| 23/23 [00:19<00:00,  1.19it/s]
100%|██████████| 23/23 [00:18<00:00,  1.22it/s]


In [187]:
plot_rmse_by_hidden_dims(all_train_eval, all_valid_eval, hidden_dims_options)

best_hidden_dims = {}
for t, evals in all_valid_eval.items():
    best_hidden_dims[t] = min(evals, key=evals.get)

print("Best hidden dimensions for each time step:", best_hidden_dims)

Best hidden dimensions for each time step: {1: (16, 4), 2: (32, 4), 3: (32,), 4: (64,), 5: (8,), 6: (16,), 7: (16,), 8: (16, 4), 9: (16,), 10: (16,), 11: (16,), 12: (16,), 13: (16,), 14: (16,)}


### Retrain

In [188]:
models = fit_multi_step_ann_model(
    doe,
    owu,
    t_steps=15,
    hidden_dims=HIDDEN_DIMS,
    X_columns=X_columns,
    W_columns=W_columns,
	best_hidden_dims=best_hidden_dims,
)

In [189]:
X0 = owu.loc[owu.index.get_level_values('time') < 1, X_columns]
W = owu[['W:Feed']]

X_train_pred = predict_multi_step_ann_model(
    doe,
    X0,
    W,
    X_columns=X_columns,
    multi_step_models=models,
    t_steps=15,
)

X0_test = owu_test.loc[owu_test.index.get_level_values('time') < 1, X_columns]
W_test = owu_test[['W:Feed']]

X_test_pred = predict_multi_step_ann_model(
    doe_test,
    X0_test,
    W_test,
    X_columns=X_columns,
    multi_step_models=models,
    t_steps=15,
)


In [190]:
plot_multi_step_ann_model_eval(
    X_train,
    X_train_pred,
    X_test,
    X_test_pred,
    X_columns=X_columns,
)


In [191]:
plot_relative_rmse_by_variables(
    X_train,
    X_train_pred,
    X_test,
    X_test_pred,
    X_columns=X_columns,
)

In [192]:
plot_predicted_profile(X_test, X_test_pred, X_columns, select_runs=[0, 1, 2, 3, 4], height=500)

## OWU-ANN
- a single model is used for all time points
- Training: $[Z, X(t = t_{model} - 1)] \rightarrow ANN_i \rightarrow X_i(t = t_{model})$​
- Testing: $[Z, \hat{X}(t = t_{model} - 1)] \rightarrow ANN_i \rightarrow X_i(t = t_{model})$

In [193]:
def fit_owu_ann_model(doe, owu, hidden_dims, t_steps=15, X_columns=None, W_columns=None, best_hidden_dims=None):
    models = {}
    Z = doe.values
    
    for i, col in enumerate(X_columns):
        X_hist_all = []
        y_hist_all = []
        
        for t in range(1, t_steps):
            X_t_minus_1 = owu.loc[owu.index.get_level_values('time') == (t - 1), X_columns]
            W_t_minus_1 = owu.loc[owu.index.get_level_values('time') == (t - 1), W_columns]
            y_t = owu.loc[owu.index.get_level_values('time') == t, col]
            
            if not X_t_minus_1.empty and not y_t.empty:
                X_preproc = np.hstack([Z, X_t_minus_1.values, W_t_minus_1.values])
                X_hist_all.append(X_preproc)
                y_hist_all.append(y_t.values)
        
        X_hist_all = np.vstack(X_hist_all)
        y_hist_all = np.concatenate(y_hist_all)
        
        if best_hidden_dims:
            hidden_dims = best_hidden_dims.get(i)
        
        model = fit_ann_model(X_hist_all, y_hist_all, hidden_dims)
        models[i] = model
    
    return models


def predict_owu_ann_model(doe, X0, W, models, t_steps, X_columns=None):
    X_hist = X0.copy()
    Z = doe.values
    
    for t in range(1, t_steps):
        predictions = {col: [] for col in X_columns}
        
        for i, col in enumerate(X_columns):
            model = models.get(i)
            
            X_t_minus_1 = X_hist.loc[X_hist.index.get_level_values('time') == (t - 1), X_columns]
            W_t_minus_1 = W.loc[W.index.get_level_values('time') == (t - 1)]

            X_preproc = np.hstack([Z, X_t_minus_1.values, W_t_minus_1.values])
            prediction = model.predict(X_preproc)
            predictions[col].extend(prediction)
        
        next_pred = pd.DataFrame(predictions)
        next_pred.index = pd.MultiIndex.from_product(
            [list(X0.index.get_level_values('run').unique()), [t]], names=["run", "time"]
        )
        X_hist = pd.concat([X_hist, next_pred])

    X_pred, _ = transform_owu(X_hist)
    return X_pred


def cross_val_predict_ann(X, y, hidden_dims, cv_folds):
    kf = KFold(n_splits=cv_folds, shuffle=True, random_state=42)
    rmse_train, rmse_valid = [], []

    for train_index, valid_index in kf.split(X):
        X_train, X_valid = X[train_index], X[valid_index]
        y_train, y_valid = y[train_index], y[valid_index]
        
        model = fit_ann_model(X_train, y_train, hidden_dims)
        
        y_train_pred = model.predict(X_train)
        y_valid_pred = model.predict(X_valid)
        
        rmse_train.append(absolute_rmse(y_train, y_train_pred))
        rmse_valid.append(absolute_rmse(y_valid, y_valid_pred))

    return np.mean(rmse_train), np.mean(rmse_valid)

def owu_ann_cross_validation(doe, owu, X_columns, W_columns, hidden_dims_options, cv_folds=5):
    all_train_eval = {}
    all_valid_eval = {}
    
    Z = doe.values

    for i, col in enumerate(X_columns):
        X_hist_all = []
        y_hist_all = []
        
        for t in range(1, 15):
            X_t_minus_1 = owu.loc[owu.index.get_level_values('time') == (t - 1), X_columns]
            W_t_minus_1 = owu.loc[owu.index.get_level_values('time') == (t - 1), W_columns]
            y_t = owu.loc[owu.index.get_level_values('time') == t, col]
            
            if not X_t_minus_1.empty and not y_t.empty:
                X_preproc = np.hstack([Z, X_t_minus_1.values, W_t_minus_1.values])
                X_hist_all.append(X_preproc)
                y_hist_all.append(y_t.values)
        
        X_hist_all = np.vstack(X_hist_all)
        y_hist_all = np.concatenate(y_hist_all)
        
        all_train_eval[i] = {}
        all_valid_eval[i] = {}
        
        for hidden_dims in tqdm(hidden_dims_options):
            rmse_train, rmse_valid = cross_val_predict_ann(X_hist_all, y_hist_all, hidden_dims, cv_folds)
            all_train_eval[i][tuple(hidden_dims)] = rmse_train
            all_valid_eval[i][tuple(hidden_dims)] = rmse_valid
    
    return all_train_eval, all_valid_eval, hidden_dims_options


### Setting

In [194]:
HIDDEN_DIMS = [64, 64]
X_columns = ['X:VCD', 'X:Glc', 'X:Lac', 'X:Titer']  # Specific columns to consider
W_columns = ['W:Feed']

### Data

In [195]:
X_train, X_columns = transform_owu(owu, t_steps=15, batch_first=False)
X_test, X_columns = transform_owu(owu_test, t_steps=15, batch_first=False)

### Train

In [196]:
models = fit_owu_ann_model(
    doe,
    owu,
    t_steps=15,
    hidden_dims=HIDDEN_DIMS,  # 初始值
    X_columns=X_columns,
    W_columns=W_columns,
)


### Test

In [197]:
X0 = owu.loc[owu.index.get_level_values('time') < 1, X_columns]
W = owu[['W:Feed']]

X_train_pred = predict_owu_ann_model(
    doe,
    X0,
    W,
    X_columns=X_columns,
    models=models,
    t_steps=15,
)

X0_test = owu_test.loc[owu_test.index.get_level_values('time') < 1, X_columns]
W_test = owu_test[['W:Feed']]

X_test_pred = predict_owu_ann_model(
    doe_test,
    X0_test,
    W_test,
    X_columns=X_columns,
    models=models,
    t_steps=15,
)

In [198]:
plot_multi_step_ann_model_eval(
    X_train,
    X_train_pred,
    X_test,
    X_test_pred,
    X_columns=X_columns,
)


In [199]:
plot_relative_rmse_by_variables(
    X_train,
    X_train_pred,
    X_test,
    X_test_pred,
    X_columns=X_columns,
)

In [200]:
plot_predicted_profile(X_test, X_test_pred, X_columns, select_runs=[0, 1, 2, 3, 4], height=500)

### K-Fold

In [201]:
hidden_dims_options = [
    [64],
    [64, 4],
    [64, 8],
    [64, 16],
    [64, 32], 
]

all_train_eval, all_valid_eval, hidden_dims_options = owu_ann_cross_validation(
    doe,
    owu,
    X_columns=X_columns,
    W_columns=W_columns,
    hidden_dims_options=hidden_dims_options,
    cv_folds=5
)

100%|██████████| 5/5 [00:14<00:00,  2.89s/it]
100%|██████████| 5/5 [00:09<00:00,  1.80s/it]
100%|██████████| 5/5 [00:08<00:00,  1.74s/it]
100%|██████████| 5/5 [00:22<00:00,  4.55s/it]


In [202]:
plot_rmse_by_hidden_dims(all_train_eval, all_valid_eval, hidden_dims_options)

best_hidden_dims = {}
for i, evals in all_valid_eval.items():
    best_hidden_dims[i] = min(evals, key=evals.get)

print("Best hidden dimensions for each variable:", best_hidden_dims)

Best hidden dimensions for each variable: {0: (64,), 1: (64, 16), 2: (64, 16), 3: (64, 8)}


### Retrain

In [203]:
best_hidden_dims

{0: (64,), 1: (64, 16), 2: (64, 16), 3: (64, 8)}

In [204]:
models1 = fit_owu_ann_model(
    doe,
    owu,
    t_steps=15,
    hidden_dims=HIDDEN_DIMS,  # 初始值
    X_columns=X_columns,
    W_columns=W_columns,
    best_hidden_dims=best_hidden_dims
)

In [205]:

X0 = owu.loc[owu.index.get_level_values('time') < 1, X_columns]
W = owu[['W:Feed']]

X_train_pred = predict_owu_ann_model(
    doe,
    X0,
    W,
    X_columns=X_columns,
    models=models1,
    t_steps=15,
)

X0_test = owu_test.loc[owu_test.index.get_level_values('time') < 1, X_columns]
W_test = owu_test[['W:Feed']]

X_test_pred = predict_owu_ann_model(
    doe_test,
    X0_test,
    W_test,
    X_columns=X_columns,
    models=models1,
    t_steps=15,
)

In [206]:
plot_multi_step_ann_model_eval(
    X_train,
    X_train_pred,
    X_test,
    X_test_pred,
    X_columns=X_columns,
)


In [207]:
plot_relative_rmse_by_variables(
    X_train,
    X_train_pred,
    X_test,
    X_test_pred,
    X_columns=X_columns,
)


In [208]:
plot_predicted_profile(X_test, X_test_pred, X_columns, select_runs=[0, 1, 2, 3, 4, 10], height=500)