In [339]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel
from kernels import SubspaceKernel

from sklearn.metrics import mean_squared_error, r2_score, root_mean_squared_error
from scipy.integrate import odeint

pio.templates.default = "plotly_white"
pcolors = px.colors.qualitative.T10
pcolors25 = px.colors.qualitative.Alphabet


In [340]:
# data_type = 'interpolation'
data_type = 'interpolation'
root_path = f'dataset/datahow_2022/{data_type}/'

def read_owu_v4(file, root_path = 'dataset/datahow_2022/interpolation/'):
    data = pd.read_csv(f'{root_path}/{file}.csv')
    owu_df = data.copy()
    num_runs = len(pd.read_csv(f'{root_path}/{file}_doe.csv'))
    if 'run' not in owu_df.columns:
        owu_df.index = pd.MultiIndex.from_product(
            [list(range(num_runs)), list(range(15))], names=["run", "time"]
        )
    else:
        owu_df.set_index(['run', 'time'], inplace=True)
    return owu_df

def read_doe(file, root_path= 'dataset/datahow_2022/interpolation/'):
    data = pd.read_csv(f'{root_path}/{file}.csv', usecols=["feed_start", "feed_end", "Glc_feed_rate", "Glc_0", "VCD_0"])
    doe_df = data.copy()
    return doe_df

In [341]:
def owu_to_tensor(owu_raw, t_steps=15, batch_first=False):

    owu = owu_raw.copy()
    owu = owu.sort_index(level=["run", "time"])
    
    X_columns = [col for col in owu.columns if "X:" in col]
    F_columns = [col for col in owu.columns if "W:" in col]

    C_X = len(X_columns)
    C_F = len(F_columns)
    B = owu.index.get_level_values("run").nunique()
    T = t_steps

    if batch_first:
        X = np.zeros((B, T, C_X))
        F = np.zeros((B, T, C_F))

    else:
        X = np.zeros((T, C_X, B))
        F = np.zeros((T, C_F, B))

    for i, (run, group) in enumerate(owu.groupby(level="run")):
        X_group = group[X_columns].copy()
        F_group = group[F_columns].copy()

        if len(group) != T:
            raise ValueError(f"Run {run} does not have {T} time steps.")

        if batch_first:
            X[i, :, :] = X_group.values
            F[i, :, :] = F_group.values
        else:
            X[:, :, i] = X_group.values
            F[:, :, i] = F_group.values

    return X, F, X_columns, F_columns


def doe_to_tensor(doe_raw, batch_first=False):
    doe = doe_raw.copy()
    doe = doe.sort_index()
    Z_columns = [col for col in doe.columns]

    C_Z = len(Z_columns)
    T = 1
    B = doe.shape[0]

    if batch_first:
        Z = np.zeros((B, T, C_Z))
        Z[:, 0, :] = doe.values
    else:
        Z = np.zeros((T, C_Z, B))
        Z[0, :, :] = doe.values.T

    return Z, Z_columns

In [342]:
def r2(y, y_pred):
    return round(r2_score(y.flatten(), y_pred.flatten()), 3)


def absolute_rmse(y, y_pred):
    return round(root_mean_squared_error(y, y_pred), 3)


def relative_rmse(y, y_pred):
    return round(root_mean_squared_error(y, y_pred) / np.std(np.array(y)), 3)

### **Process Parameters**

Please insert the values of the process manipulated variables:

- Feed start (day): day at which Glc feed is started
- Feed end (day): ay at which Glc feed is stopped
- Feed rate: mass rate (g/L/day) at which Glc is feed (continuous feed over 24 hours)
- Initial Glc concentration (g/L): Glc at time t = 0
- Initial VCD (10^6 cell/mL): VCD at time t = 0

* X:VCD: (10^6 cell/mL)
* X:Glc: (g/L)
* X:Lac: (g/L)
* X:Titer: (mg/L)
* W:Feed: (g/L/day)

In [343]:
owu = read_owu_v4('owu', root_path=root_path)
doe = read_doe('owu_doe', root_path=root_path)

owu_test = read_owu_v4('owu_test', root_path=root_path)
doe_test = read_doe('owu_test_doe', root_path=root_path)

In [344]:
X_train, F_train, X_columns, F_columns = owu_to_tensor(owu, t_steps=15, batch_first=True)
X_test, F_test, X_columns, F_columns = owu_to_tensor(owu_test, t_steps=15, batch_first=True)

Z_train, Z_columns = doe_to_tensor(doe, batch_first=True)
Z_test, Z_columns = doe_to_tensor(doe_test, batch_first=True)

init_volume = 1000 # mL
V_train = (init_volume + (F_train.sum(axis=-1, keepdims=True)).cumsum(axis=1)) / 1000 # L
V_test = (init_volume + (F_test.sum(axis=-1, keepdims=True)).cumsum(axis=1)) / 1000 # L

print(f"\nOriginal Shape: ")
print(f"X Columns: {X_columns}")
print(f"X Shape: {X_train.shape}")
print(f"X Test Shape: {X_test.shape}")
print(f"F Columns: {F_columns}")
print(f"F Shape: {F_train.shape}") # only 1 glc feed
print(f"F Test Shape: {F_test.shape}") # only 1 glc feed
print(f"Z Columns: {Z_columns}")
print(f"Z Shape: {Z_train.shape}")
print(f"Z Test Shape: {Z_test.shape}")
print(f"Volume Columns")
print(f"V Shape: {V_train.shape}")
print(f"V Test Shape: {V_test.shape}")


time_mask = np.ones(15) # T,
feed_mask = np.array([0, 1, 0, 0]) # C, 

F_train = (feed_mask[None, None, :] * F_train) / 24 # [1, 1, C] x [B, T, 1] -> [B, T, C]
F_test = (feed_mask[None, None, :] * F_test) / 24 # [1, 1, C] x [B, T, 1] -> [B, T, C]

Z_train = (time_mask[None, :, None] * Z_train) # [1, T, 1] x [B, 1, C] -> [B, T, C]
Z_test = (time_mask[None, :, None] * Z_test) # [1, T, 1] x [B, 1, C] -> [B, T, C]


print(f"\nUnfolding Shape: ")
print(f"X Columns: {X_columns}")
print(f"X Shape: {X_train.shape}")
print(f"X Test Shape: {X_test.shape}")
print(f"F Columns: {F_columns}")
print(f"F Shape: {F_train.shape}") # only 1 glc feed
print(f"F Test Shape: {F_test.shape}") # only 1 glc feed
print(f"Z Columns: {Z_columns}")
print(f"Z Shape: {Z_train.shape}")
print(f"Z Test Shape: {Z_test.shape}")
print(f"Volume Columns")
print(f"V Shape: {V_train.shape}")
print(f"V Test Shape: {V_test.shape}")



Original Shape: 
X Columns: ['X:VCD', 'X:Glc', 'X:Lac', 'X:Titer']
X Shape: (50, 15, 4)
X Test Shape: (50, 15, 4)
F Columns: ['W:Feed']
F Shape: (50, 15, 1)
F Test Shape: (50, 15, 1)
Z Columns: ['feed_start', 'feed_end', 'Glc_feed_rate', 'Glc_0', 'VCD_0']
Z Shape: (50, 1, 5)
Z Test Shape: (50, 1, 5)
Volume Columns
V Shape: (50, 15, 1)
V Test Shape: (50, 15, 1)

Unfolding Shape: 
X Columns: ['X:VCD', 'X:Glc', 'X:Lac', 'X:Titer']
X Shape: (50, 15, 4)
X Test Shape: (50, 15, 4)
F Columns: ['W:Feed']
F Shape: (50, 15, 4)
F Test Shape: (50, 15, 4)
Z Columns: ['feed_start', 'feed_end', 'Glc_feed_rate', 'Glc_0', 'VCD_0']
Z Shape: (50, 15, 5)
Z Test Shape: (50, 15, 5)
Volume Columns
V Shape: (50, 15, 1)
V Test Shape: (50, 15, 1)


In [345]:
TIME_STEP = 24
sign_mask = np.array([1, -1, 1, 1]) # C, 

In [346]:
def central_differences(X, F, V, sign_mask=None, time_step=24):
	"""
    Calculate central differences for the input data X with feed rates F and sign adjustments using sign_mask.
    
    Parameters:
    X (numpy.ndarray): Input data array of shape (batch_size, time_steps, variables)
    F (numpy.ndarray): Feed rates array of shape (batch_size, time_steps, variables)
    sign_mask (numpy.ndarray): Array to adjust signs for each variable, shape (variables,)
    time_step (int): Time step size for calculating derivatives
    
    Returns:
    numpy.ndarray: Derivatives matrix of the same shape as X
    """

	# Initial derivatives matrix
	Y = np.zeros_like(X)
	
	# Initial time step derivatives
	Y[:, 0, :] = (
		(
			(sign_mask[None, :] * (X[:, 1, :]*V[:, 1, :] - X[:, 0, :]*V[:, 0, :])) / (time_step*V[:, 0, :]) 
			+ F[:, 0, :]
		) 
	)
	
	# Central time step derivatives t=1
	Y[:, 1:-1, :] = (
		(
			(sign_mask[None, None, :] * ((X[:, 2:, :]*V[:, 2:, :] - X[:, :-2, :]*V[:, :-2, :]) / 2)) / (time_step*V[:, 1:-1, :]) 
			+ F[:, 1:-1, :]
		)
	)

	
	# Final time step derivatives
	Y[:, -1, :] = (
		(
			(sign_mask[None, :] * (X[:, -1, :]*V[:, -1, :] - X[:, -2, :]*V[:, -2, :])) / (time_step*V[:, -1, :]) 
			+ F[:, -2, :]
		)
	)

	return Y

In [347]:
Y_train = central_differences(X_train, F_train, V_train, sign_mask=sign_mask, time_step=TIME_STEP)
Y_test = central_differences(X_test, F_test, V_test, sign_mask=sign_mask, time_step=TIME_STEP)

In [348]:
def plot_state_data_color_by_rate(X, Y, t_steps=15, sign_mask=None):
    
    t = np.arange(t_steps)
    NUM_RUNS = X.shape[0]
    behavior = ['growth' if m == 1 else 'consumption' for m in sign_mask]

    # pcolors = px.colors.qualitative.Plotly
    fig = make_subplots(
        rows=2, cols=min(4, len(X_columns)),
        subplot_titles=[
            f"{var} over time - {var} rate" for var in X_columns
        ] + [
            f"{X_columns[0]} vs {var} - {var} {behavior[i]} rate" for i, var in enumerate(X_columns)
        ],
        horizontal_spacing=0.1, vertical_spacing=0.15
    )

    for i, var in enumerate(X_columns):
        fig.add_trace(
            go.Scatter(
                x=np.repeat(t, NUM_RUNS),
                y=X[:, :, i].T.flatten(),
                mode='markers',
                marker=dict(
                    color=Y[:, :, i].T.flatten(),
                    colorscale=px.colors.sequential.Viridis,
                    showscale=True,
                    colorbar=dict(len=1.0, x=0.45 if i % 2 == 0 else 1.0)
                ),
            ),
            row=1, col=i+1
        )
        fig.update_xaxes(title_text="Time", row=1, col=i+1)
        fig.update_yaxes(title_text=var, row=1, col=i+1)


    for i, var in enumerate(X_columns):
        fig.add_trace(
            go.Scatter(
                x=X[:, :, 0].T.flatten(),
                y=X[:, :, i].T.flatten(),
                mode='markers',
                marker=dict(
                    color=Y[:, :, i].T.flatten(),
                    colorscale=px.colors.sequential.Viridis
                ),
            ),
            row=2, col=i+1
        )
        fig.update_xaxes(title_text=X_columns[0], row=2, col=i+1)
        fig.update_yaxes(title_text=var, row=2, col=i+1)

    fig.update_layout(
        title_text="State plots of process colored by growth/consumption rates",
        showlegend=False,
        height=1000
    )

    fig.show()

In [349]:
plot_state_data_color_by_rate(X_train, Y_train, t_steps=15, sign_mask=sign_mask)

In [350]:
plot_state_data_color_by_rate(X_test, Y_test, t_steps=15, sign_mask=sign_mask)

In [351]:
def flatten_dataset(X, Z, F, Y):
	B, T, C = X.shape
	# X_flat = np.concatenate([X.reshape(B*T, C), Z[:,:,:-2].reshape(B*T, -1)], axis=1)
	X_flat = X.reshape(B*T, C)
	Y_flat = Y.reshape(B*T, C)
	F_flat = F.reshape(B*T, C)

	print(f"Flatten Shape: ")
	print(f"X + Z Columns : {X_columns}+{Z_columns}")
	print(f"X Columns: {X_columns}")
	print(f"X Shape: {X_flat.shape}")
	print(f"F Shape: {F_flat.shape}")
	print(f"X Columns: {X_columns}")
	print(f"Derivative X Shape: {Y_flat.shape}")
	return X_flat, F_flat, Y_flat

In [352]:
X_flat, F_flat, Y_flat = flatten_dataset(X_train, Z_train, F_train, Y_train)
X_test_flat, F_test_flat, Y_test_flat = flatten_dataset(X_test, Z_test, F_test, Y_test)

Flatten Shape: 
X + Z Columns : ['X:VCD', 'X:Glc', 'X:Lac', 'X:Titer']+['feed_start', 'feed_end', 'Glc_feed_rate', 'Glc_0', 'VCD_0']
X Columns: ['X:VCD', 'X:Glc', 'X:Lac', 'X:Titer']
X Shape: (750, 4)
F Shape: (750, 4)
X Columns: ['X:VCD', 'X:Glc', 'X:Lac', 'X:Titer']
Derivative X Shape: (750, 4)
Flatten Shape: 
X + Z Columns : ['X:VCD', 'X:Glc', 'X:Lac', 'X:Titer']+['feed_start', 'feed_end', 'Glc_feed_rate', 'Glc_0', 'VCD_0']
X Columns: ['X:VCD', 'X:Glc', 'X:Lac', 'X:Titer']
X Shape: (750, 4)
F Shape: (750, 4)
X Columns: ['X:VCD', 'X:Glc', 'X:Lac', 'X:Titer']
Derivative X Shape: (750, 4)


In [353]:
def fit_gp_model(X, y):
    n_features = X.shape[-1]

    # Apply RBF kernel to the normal features
    raw_feature_kernel = RBF(length_scale=[1e-1] * n_features, length_scale_bounds=(1e-2, 1e2))

    # The SubspaceKernel ensures that the RBF kernel is only applied to the first 10 features
    feature_kernel = SubspaceKernel(
        raw_feature_kernel, ids_to_apply=np.arange(0, n_features)
    )

    # Noise kernal
    noise_kernel = WhiteKernel(noise_level=1e-2, noise_level_bounds=(1e-10, 1e1))

    # Combine the kernel and allow for Noise
    full_kernel = 1**2 * feature_kernel + noise_kernel
    gp_model = GaussianProcessRegressor(kernel=full_kernel, n_restarts_optimizer=3)

    # Fit GP model
    gp_model.fit(X, y)
    return gp_model

In [354]:
models = {}
for i, var_name in tqdm(enumerate(X_columns),
                        desc="Training: ",
                        total=len(X_columns),
                        leave=True,
                        ncols=80):

    model = fit_gp_model(X_flat, Y_flat[:, i])
    models[i] = model


The optimal value found for dimension 3 of parameter k1__k2__length_scale is close to the specified upper bound 100.0. Increasing the bound and calling fit again may find a better value.


The optimal value found for dimension 2 of parameter k1__k2__length_scale is close to the specified upper bound 100.0. Increasing the bound and calling fit again may find a better value.


The optimal value found for dimension 3 of parameter k1__k2__length_scale is close to the specified upper bound 100.0. Increasing the bound and calling fit again may find a better value.


The optimal value found for dimension 1 of parameter k1__k2__length_scale is close to the specified upper bound 100.0. Increasing the bound and calling fit again may find a better value.


The optimal value found for dimension 3 of parameter k1__k2__length_scale is close to the specified upper bound 100.0. Increasing the bound and calling fit again may find a better value.

Training: 100%|███████████████████████████████████| 4/4

In [419]:
def ode_fcn(t, y, models, feeds=None, volumes=None, sign_mask=None):
    """
    --- Inputs ---
    t: Current timestep of the process
    y: Current states of VCD and Glucose
    feed: Feed rate for the experimentf
    g_mdl, k_mdl: Models for derivatives of growth and consumption rate
                  (sklearn expects 2D array, even when predicting on a single observation, use .reshape(1, -1))
    --- Outputs ---
    dVCD_dt, dGlc_df : Derivatives of VCD and Glucose for the next state from current one
    """
    # Current time index
    time_idx = int(t // 24)
    
    # Ensure time index does not exceed feed array bounds
    time_idx = min(time_idx, feeds.shape[0] - 1)

    # Get current feed rate
    current_feeds = feeds[time_idx]

    curr_volume = volumes[time_idx-1][0]
    after_feed_volume = volumes[time_idx][0]

    # mass balances
    dX_dt = np.zeros(len(models))
    for i, model in models.items():
        dX_dt[i] = (
            sign_mask[i] * model.predict(y.reshape(1, -1))[0] * curr_volume  
            + current_feeds[i] * after_feed_volume 
            - (y[i] * (after_feed_volume - curr_volume) / 24)
            ) / after_feed_volume

    return dX_dt


In [420]:



def run_hybrid(X, Z, F, V, models, t_end=14, time_step=24, sign_mask=None):
    """
    --- Inputs ---
    VCD_0, Glc_0: Initial conditions
    feed: Feed rate for the experiment
    t_end: End time of feed/experiment
    g_mdl, k_mdl: Models for derivatives of growth and consumption rate
    --- Outputs ---
    t: array of timesteps
    VCD, Glc: array of VCD/Glc values as evolution over the experiment run
    """
    X_pred = np.zeros_like(X)

    for i in tqdm(range(len(X)), desc="Simulating: ", total=len(X), leave=True, ncols=80):
        # Initial Values
        init_values = np.array([Z[i, 0, -1], Z[i, 0, -2], 0, 0])

        # Get all the time-steps on which we want to predict
        t_eval = np.arange(0, t_end * time_step + time_step, time_step)  # T,

        # Get feed rate data
        feeds = F[i, :, :]

        volumes = V[i, :, :]

        X_pred[i, :, :] = odeint(
            func=ode_fcn,
            y0=init_values,
            t=t_eval,
            args=(models, feeds, volumes, sign_mask),
            tfirst=True,
        )  # T, C

    return X_pred

In [421]:
X_train_pred = run_hybrid(X=X_train, Z=Z_train, F=F_train, V=V_train, models=models, t_end=14, time_step=24, sign_mask=sign_mask)
X_test_pred = run_hybrid(X=X_test, Z=Z_test, F=F_test, V=V_test, models=models, t_end=14, time_step=24, sign_mask=sign_mask)

Simulating: 100%|███████████████████████████████| 50/50 [00:44<00:00,  1.12it/s]
Simulating: 100%|███████████████████████████████| 50/50 [00:45<00:00,  1.10it/s]


In [422]:
print("Training")
for i, var in enumerate(X_columns):
    # Calculate error metrics
    rmse = relative_rmse(X_train[:, :, i], X_train_pred[:, :, i])
    score = r2(X_train[:, :, i], X_train_pred[:, :, i])
    print(f'{var} Train RMSE: {rmse}, R2: {score}')

print("\nTesting")    
for i, var in enumerate(X_columns):
    # Calculate error metrics
    rmse = relative_rmse(X_test[:, :, i], X_test_pred[:, :, i])
    score = r2(X_test[:, :, i], X_test_pred[:, :, i])
    print(f'{var} Test RMSE: {rmse}, R2: {score}')

Training
X:VCD Train RMSE: 0.345, R2: 0.854
X:Glc Train RMSE: 0.3, R2: 0.892
X:Lac Train RMSE: 0.164, R2: 0.962
X:Titer Train RMSE: 0.334, R2: 0.811

Testing
X:VCD Test RMSE: 0.272, R2: 0.914
X:Glc Test RMSE: 0.363, R2: 0.836
X:Lac Test RMSE: 0.125, R2: 0.979
X:Titer Test RMSE: 0.324, R2: 0.815


In [423]:
def plot_multi_step_gp_model_eval(
    X,
    X_pred,
    X_test,
    X_test_pred,
    X_columns=None,
):
    # X shape: [T, C, B]
    for i, col in enumerate(X_columns):
        y = X[:, i, :].copy()
        y_pred = X_pred[:, i, :].copy()
        y_test = X_test[:, i, :].copy()
        y_test_pred = X_test_pred[:, i, :].copy()

        # Metrics for training set
        train_r2 = r2(y, y_pred)
        train_abs_rmse = absolute_rmse(y, y_pred)
        train_rel_rmse = relative_rmse(y, y_pred)

        # Metrics for testing set
        test_r2 = r2(y_test, y_test_pred)
        test_abs_rmse = absolute_rmse(y_test, y_test_pred)
        test_rel_rmse = relative_rmse(y_test, y_test_pred)

        # Plot observed vs predicted
        fig = make_subplots(
            rows=1,
            cols=2,
            subplot_titles=(
                f"Train Set - {col} <br> R^2 = {train_r2} <br> Abs RMSE = {train_abs_rmse} <br> Rel RMSE = {train_rel_rmse}",
                f"Test Set - {col} <br> R^2 = {test_r2} <br> Abs RMSE = {test_abs_rmse} <br> Rel RMSE = {test_rel_rmse}",
            ),
        )

        # Train set plot
        _, _, NUM_TRAIN = X.shape
        for i in range(NUM_TRAIN):
            fig.add_trace(
                go.Scatter(
                    x=y[:, i].reshape(-1),
                    y=y_pred[:, i].reshape(-1),
                    mode="markers",
                    name=f"Run id in Train {i}",
                    legendgroup=f"train_{i}",
                ),
                row=1,
                col=1,
            )
        fig.add_shape(
            type="line",
            x0=y_pred.min(),
            y0=y_pred.min(),
            x1=y_pred.max(),
            y1=y_pred.max(),
            layer="above",
            line=dict(dash="dash"),
        )

        # Test set plot
        _, _, NUM_TEST = X_test.shape
        for j in range(NUM_TEST):
            fig.add_trace(
                go.Scatter(
                    x=y_test[:, j].reshape(-1),
                    y=y_test_pred[:, j].reshape(-1),
                    mode="markers",
                    name=f"Run id in Test {j}",
                    legendgroup=f"test_{j}",
                ),
                row=1,
                col=2,
            )
        fig.add_shape(
            type="line",
            x0=y_test_pred.min(),
            y0=y_test_pred.min(),
            x1=y_test_pred.max(),
            y1=y_test_pred.max(),
            layer="above",
            line=dict(dash="dash"),
            row=1,
            col=2,
        )

        fig.update_layout(width=1600)
        fig.update_xaxes(title="Observed values", row=1, col=1)
        fig.update_xaxes(title="Observed values", row=1, col=2)
        fig.update_yaxes(title="Predicted values", row=1, col=1)
        fig.update_yaxes(title="Predicted values", row=1, col=2)
        fig.show()


def plot_relative_rmse_by_variables(
    X,
    X_pred,
    X_test,
    X_test_pred,
    X_columns=None,
):
    relative_rmse_train = []
    relative_rmse_test = []
    for i, col in enumerate(X_columns):
        y = X[:, i, :].copy()
        y_pred = X_pred[:, i, :].copy()
        y_test = X_test[:, i, :].copy()
        y_test_pred = X_test_pred[:, i, :].copy()

        # Metrics for training set
        train_rel_rmse = relative_rmse(y, y_pred)
        relative_rmse_train.append(train_rel_rmse)

        # Metrics for testing set
        test_rel_rmse = relative_rmse(y_test, y_test_pred)
        relative_rmse_test.append(test_rel_rmse)

    fig_rmse = go.Figure()
    fig_rmse.add_trace(
        go.Bar(
            x=X_columns,
            y=relative_rmse_train,
            name="Train Set",
            marker_color=pcolors[0],
            text=[f"{v:.2f}" for v in relative_rmse_train],
            textposition="outside",
        )
    )

    fig_rmse.add_trace(
        go.Bar(
            x=X_columns,
            y=relative_rmse_test,
            name="Test Set",
            marker_color=pcolors[1],
            text=[f"{v:.2f}" for v in relative_rmse_test],
            textposition="outside",
        )
    )

    fig_rmse.update_layout(
        barmode="group",
        title="Relative RMSE for Each Variables",
        xaxis_title="Feature",
        yaxis_title="Relative RMSE",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    fig_rmse.show()


def plot_predicted_profile(X, X_pred, X_columns, select_runs=[0], height=1000):
    max_cols_per_row = 5
    num_columns = len(X_columns)
    num_rows = (num_columns + max_cols_per_row) // max_cols_per_row

    fig = make_subplots(
        rows=num_rows, cols=min(num_columns, max_cols_per_row), subplot_titles=X_columns
    )

    color_palette = px.colors.qualitative.Plotly

    for idx, j in enumerate(select_runs):
        color = color_palette[idx % len(color_palette)]
        for i, c in enumerate(X_columns):
            row = i // max_cols_per_row + 1
            col = i % max_cols_per_row + 1
            show_legend = i == 0
            fig.add_trace(
                go.Scatter(
                    x=list(range(15)),
                    y=X[:, i, j],
                    name=f"Run {j} Observed",
                    marker=dict(color=color),
                    showlegend=show_legend,
                    legendgroup=f"group_{j}",
                ),
                row=row,
                col=col,
            )
            fig.add_trace(
                go.Scatter(
                    x=list(range(15)),
                    y=X_pred[:, i, j],
                    name=f"Run {j} Predicted",
                    line=dict(dash="dash"),
                    marker=dict(color=color),
                    showlegend=show_legend,
                    legendgroup=f"group_{j}",
                ),
                row=row,
                col=col,
            )

    fig.update_layout(
        showlegend=True,
        title_text="Process variable evolution for selected runs",
        height=height,
    )
    fig.show()

In [424]:
plot_multi_step_gp_model_eval(
    X_train.transpose(1, 2, 0),
    X_train_pred.transpose(1, 2, 0),
    X_test.transpose(1, 2, 0),
    X_test_pred.transpose(1, 2, 0),
    X_columns=X_columns,
)

plot_relative_rmse_by_variables(
    X_train.transpose(1, 2, 0),
    X_train_pred.transpose(1, 2, 0),
    X_test.transpose(1, 2, 0),
    X_test_pred.transpose(1, 2, 0),
    X_columns = X_columns,
)

In [425]:
plot_predicted_profile(
    X_test.transpose(1, 2, 0), 
	X_test_pred.transpose(1, 2, 0), 
	X_columns, select_runs=[20, 21, 22, 23, 24], height=500
)