In [None]:
# Import the necessary packages
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ks_2samp
from numpy.random import randn
import seaborn as sns
%matplotlib inline
import tensorflow as tf
print(tf.__version__)
import warnings
warnings.filterwarnings("ignore")
import keras
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, BatchNormalization, Embedding, Activation
from keras.layers import concatenate, LeakyReLU, ReLU
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.utils import to_categorical
from keras.utils.vis_utils import plot_model
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score,cross_val_predict
from sklearn.metrics import r2_score
import scipy.stats

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [None]:
 # load data
data_path = "../Data/"
file_name = "Table.xlsx"

df_main = pd.read_excel(data_path + file_name, skiprows=[0])
df_main.head()

In [None]:
df_main= df_main[['Cell_Energy',
 'T_Trigger',
 'P_Max',
 'Delta_T_Ramp',
 'Power_Plate',
 'HeatingTime_Plate',
 'Rth_JR_Shell',
 'Lambda_20',
 'Lambda_200',
 'Lambda_400',
 'Lambda_600',
 'Lambda_800',
 'Time_Trigger_T1F',
 'Time_Trigger_T2F',
 'Tmax T1F',
 'Tmax T2F',
 'Tmax Plateau T2F',
 ]]

In [None]:
df_main.describe()

In [None]:
df = df_main.copy()

In [None]:
X = df[[
 'Cell_Energy',
 'T_Trigger',
 'P_Max',
 'Delta_T_Ramp',
 'Power_Plate',
 'HeatingTime_Plate',
 'Rth_JR_Shell',
 'Lambda_20',
 'Lambda_200',
 'Lambda_400',
 'Lambda_600',
 'Lambda_800']]

y = df[[
 'Time_Trigger_T1F',
 'Time_Trigger_T2F',
 'Tmax T1F',
 'Tmax T2F',
 'Tmax Plateau T2F',
 ]]

In [None]:
plt.figure(figsize=(16, 6))
sns.boxplot(data=X)

    Regression

In [None]:
X = X[[i for i in X if X[i].nunique()>1]]
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=1, test_size=0.2)

In [None]:
# Need to scale for DL model and other models - was not needed for RF (## To check)
from sklearn.preprocessing import StandardScaler

xscaler = StandardScaler()
X_train_scaled = xscaler.fit_transform(X_train)
X_test_scaled = xscaler.transform(X_test)

yscaler = StandardScaler()
y_train_scaled = yscaler.fit_transform(y_train)
y_test_scaled = yscaler.transform(y_test)

In [None]:
plt.figure(figsize=(16, 6))
sns.boxplot(data=X_train_scaled)

    CatBoost

In [None]:
from sklearn.multioutput import MultiOutputRegressor
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
from catboost import CatBoostRegressor

cb = MultiOutputRegressor(CatBoostRegressor())
cb.fit(X_train_scaled, y_train_scaled)

# evaluate model on validation set
y_pred_cb = cb.predict(X_test_scaled)
y_ans_cb = yscaler.inverse_transform(y_pred_cb)
pred_output_cb = pd.DataFrame(y_ans_cb, columns = y_test.columns)

cols = pred_output_cb.columns.intersection(y_test.columns)
# Iterate, Calculate, and Collect R-Squared Values
r_squared = {c: scipy.stats.linregress(x=pred_output_cb[c], y=y_test[c]).rvalue ** 2
             for c in cols}

r_squared

In [None]:
r2_score(y_test, y_ans_cb)

    Optimization (Bayesian/TPE) 

    Input every row of "y" values from y_test as inputs to the optimization model

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.early_stop import no_progress_loss

# Define a function that takes inputs and returns the predicted outputs using the meta model
def predict_outputs(x):
    x = np.array(x).reshape(1, -1)  # convert input tuple to 2D array
    #x_stacked = np.hstack(predict(x))  # stack the multioutput predictions horizontally
    y_pred = cb.predict(x)  # make prediction using the meta-model
    return y_pred.flatten()

# Define a function that takes the inputs and the desired output values, and returns the negative of the objective function
def objective_function(x, targets):
    y_pred = predict_outputs(x)
    error = np.sum(np.square(y_pred - targets))
    return {'loss': error, 'status': STATUS_OK}

target_outputs_scaled = y_test_scaled[:5,:] # set the desired output values (scaled)...I have a number 5 here for the number of rows. Increase to total length of y_test if needed

optimized_inputs = [] # store the optimized inputs for each desired output value

for targets in target_outputs_scaled:

    # Define the range of values for each input feature
    feature_ranges = [(np.min(X_train_scaled[:, i]), np.max(X_train_scaled[:, i])) for i in range(X_train_scaled.shape[1])]

    # Set the range of values for the inputs
    space = [hp.uniform('x{}'.format(i), feature_ranges[i][0], feature_ranges[i][1]) for i in range(X_train_scaled.shape[1])]

    # Define the kernel for the Gaussian process used in Bayesian optimization
    kernel = RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0))

    # Fit a Gaussian process to the training data
    model = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(X_train_scaled, y_train_scaled)

    # Use the fmin function from hyperopt to find the inputs that minimize the objective function
    trials = Trials()
    objective_fn = lambda x: objective_function(x, targets)
    best = fmin(objective_fn, space, algo=tpe.suggest, max_evals=1000, trials=trials, early_stop_fn=no_progress_loss(200), verbose=True)

    # Get the inputs that minimize the objective function and the corresponding predicted outputs
    x_opt_scaled = [best['x{}'.format(i)] for i in range(X_train_scaled.shape[1])]
    x_opt = xscaler.inverse_transform(np.array(x_opt_scaled).reshape(1, -1)).flatten()

    # Store the optimized inputs for this desired output value
    optimized_inputs.append(x_opt)


In [None]:
df_pred = pd.DataFrame(optimized_inputs, columns=X_train.columns) 
df_pred

##### Validation

In [None]:
dfs = [X_test,df_pred]
f_names = ["true", "pred"]
df_merge = pd.concat(dfs, ignore_index=True)
df_merge['C'] = np.repeat(f_names, list(map(len, dfs)))

fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(15, 7), sharex=False, sharey=False)
axes = axes.ravel()  # array to 1D
cols = df_merge.columns[:-1]  # create a list of dataframe columns to use

for col, ax in zip(cols, axes):
    data = df_merge
    sns.kdeplot(data=data, x=col, hue='C', shade=True, common_norm=False, ax=ax)
    ax.set(title=f'Distribution of Column: {col}', xlabel=None)
    
#fig.delaxes(axes[5])  # delete the empty subplot
fig.tight_layout()
plt.show()

In [None]:
# Analyze errors
from sklearn.metrics import mean_absolute_error

rmse_error=[]
trial = X_test.iloc[:5]
for column in df_pred.columns:
    mae = np.sqrt(mean_absolute_error(df_pred[column], trial[column]))
    rmse_error.append(mae)

rmse_error