In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import optuna

from optuna.samplers import TPESampler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression,ElasticNet
from sklearn.cross_decomposition import PLSRegression
from sklearn.kernel_ridge import KernelRidge

from sklearn import metrics
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [8]:
# load the dataset
training_df = pd.read_excel('nonlinear_selected_features.xlsx')

training_df.head()

Unnamed: 0,409,2507,RWC
0,0.0256,0.036,98.373025
1,0.0263,0.0349,99.923489
2,0.0394,0.0428,96.795629
3,0.0342,0.0232,94.518349
4,0.0377,0.036,95.493562


In [9]:
# ---------------------------------
# Define predictors and target
# ---------------------------------
X = training_df.drop(columns = ['RWC'])
y = training_df['RWC']

In [10]:
# Feature scaling
sc = StandardScaler()
X_scaled = sc.fit_transform(X)

# Splitting the Dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.1, random_state=42)

# <font color = Orange> Multiple Linear Regression - PLSR

In [11]:
# Define the objective function for Optuna
def objective(trial):
    # Set the range for n_components to be integers between 1 and the number of features
    n_components = trial.suggest_int('n_components', 1, X_train.shape[1])
    # Set the range for max_iter to be between 100 and 1000
    max_iter = trial.suggest_int('max_iter', 100, 1000)
    
    # Create the PLSR model with the specified hyperparameters
    model = PLSRegression(n_components=n_components, max_iter=max_iter)
    
    # Fit the model to the training data
    model.fit(X_train, y_train)
    
    # Get the MSE of the model on the testing data
    mse = metrics.mean_squared_error(y_test, model.predict(X_test))
    
    # Return mse as the objective value
    return mse

# Create the Optuna study
study = optuna.create_study(direction='minimize', sampler=TPESampler(seed=37))

# Optimize the objective function
study.optimize(objective, n_trials=100)

# Print the number of finished trials
print("Number of finished trials: ", len(study.trials))

# Print the best trial and save as a variable
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
    
# Create the model using the best parameters
best_plsr = PLSRegression(n_components=trial.params['n_components'], max_iter=trial.params['max_iter'])

# Fit the model
best_plsr.fit(X_train, y_train)

# Make predictions
y_pred = best_plsr.predict(X_test)

# Model Evaluation
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R-Squared value:', r2_score(y_test, y_pred))

[I 2024-03-13 12:02:11,207] A new study created in memory with name: no-name-d876ceab-f1bb-40ce-a750-320b68f5be9b
[I 2024-03-13 12:02:11,267] Trial 0 finished with value: 399.32528251196436 and parameters: {'n_components': 2, 'max_iter': 518}. Best is trial 0 with value: 399.32528251196436.
[I 2024-03-13 12:02:11,273] Trial 1 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 624}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:11,283] Trial 2 finished with value: 399.32528251196436 and parameters: {'n_components': 2, 'max_iter': 716}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:11,289] Trial 3 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 771}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:11,342] Trial 4 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 778}. Best is trial 1 with value: 306.5379003877869.
[I 

[I 2024-03-13 12:02:12,924] Trial 46 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 244}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:12,967] Trial 47 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 951}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:13,018] Trial 48 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 489}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:13,069] Trial 49 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 427}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:13,104] Trial 50 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 572}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:13,155] Trial 51 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'ma

[I 2024-03-13 12:02:14,799] Trial 93 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 355}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:14,831] Trial 94 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 449}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:14,866] Trial 95 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 222}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:14,901] Trial 96 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 973}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:14,935] Trial 97 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'max_iter': 501}. Best is trial 1 with value: 306.5379003877869.
[I 2024-03-13 12:02:14,975] Trial 98 finished with value: 306.5379003877869 and parameters: {'n_components': 1, 'ma

Number of finished trials:  100
Best trial:
  Value: 306.5379003877869
  Params: 
    n_components: 1
    max_iter: 624
Mean Squared Error: 306.5379003877869
Root Mean Squared Error: 17.50822379305756
R-Squared value: 0.588797075491406


# <font color = green> (Non-linear) Kernel Ridge Regression

In [27]:
# Define the objective function for Optuna
def objective(trial):
    # Set the range for alpha to be uniform between 0.0 and 1.0
    alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
    # Set the range for gamma to be loguniform between 1e-6 and 1e6
    gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
    # Set the range for kernel to be categorical between 'linear' and 'rbf'
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf'])
    
    # Create the Kernel Ridge Regression model with the specified hyperparameters
    model = KernelRidge(alpha=alpha, gamma=gamma, kernel=kernel)
    
    # Fit the model to the training data
    model.fit(X_train, y_train)
    
    # Get the MSE of the model on the testing data
    mse = metrics.mean_squared_error(y_test, model.predict(X_test))
    
    # Return mse as the objective value
    return mse

# Create the Optuna study
study = optuna.create_study(direction='minimize', sampler=TPESampler(seed=37))

# Optimize the objective function
study.optimize(objective, n_trials=100)

# Print the number of finished trials
print("Number of finished trials: ", len(study.trials))

# Print the best trial and save as a variable
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
    
# Create the model using the best parameters
best_krr = KernelRidge(alpha=trial.params['alpha'], gamma=trial.params['gamma'], kernel=trial.params['kernel'])

# Fit the model
best_krr.fit(X_train, y_train)

# Make predictions
y_pred = best_krr.predict(X_test)

# Model Evaluation
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R-Squared value:', r2_score(y_test, y_pred))

[I 2024-03-13 14:55:55,822] A new study created in memory with name: no-name-63e293c7-5fbb-42c9-a06d-bc4afd00a74e
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:55,832] Trial 0 finished with value: 97.21645754303192 and parameters: {'alpha': 0.9444966028573069, 'gamma': 0.3708327584362581, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:55,839] Trial 1 finished with value: 5116.937993175014 and parameters: {'alpha': 0.6200842101736271, 'gamma': 162.4382154062861, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:55,859] Trial 2 finished with value: 5804.037183558948 and parameters: {'alpha': 0.28197907007423595, 'gamma': 1099.246

  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:56,132] Trial 10 finished with value: 476.55553936794837 and parameters: {'alpha': 0.9822334069719648, 'gamma': 0.009819189785151882, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:56,190] Trial 11 finished with value: 441.2919367320876 and parameters: {'alpha': 0.9693357342873596, 'gamma': 0.011542322822511587, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:56,242] Trial 12 finished with value: 184.874238298669 and parameters: {'alpha': 0.9968213568244305, 'gamma': 0.041919492894116506, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_unif

[I 2024-03-13 14:55:56,460] Trial 19 finished with value: 823.5045752071185 and parameters: {'alpha': 0.526877474211422, 'gamma': 0.00016990904674617837, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:56,483] Trial 20 finished with value: 3719.924915287064 and parameters: {'alpha': 0.8805907894240441, 'gamma': 13.526720903384325, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:56,511] Trial 21 finished with value: 101.604306848173 and parameters: {'alpha': 0.7125193632914463, 'gamma': 0.41588121781804044, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:56,531] Tri

[I 2024-03-13 14:55:56,747] Trial 29 finished with value: 4702.799281400352 and parameters: {'alpha': 0.3021429423137746, 'gamma': 87.50561581908923, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:56,775] Trial 30 finished with value: 695.8625008298545 and parameters: {'alpha': 0.567007771591952, 'gamma': 0.0021182738066858786, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:56,797] Trial 31 finished with value: 105.69198817382853 and parameters: {'alpha': 0.7646221384515794, 'gamma': 0.4629878523328369, 'kernel': 'rbf'}. Best is trial 0 with value: 97.21645754303192.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:56,814] Tria

  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:57,099] Trial 39 finished with value: 5330.395984793137 and parameters: {'alpha': 0.8362696638638502, 'gamma': 893.2061759640305, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:57,122] Trial 40 finished with value: 5787.260475164807 and parameters: {'alpha': 0.7333550491696668, 'gamma': 0.0005267114401992216, 'kernel': 'linear'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:57,146] Trial 41 finished with value: 91.86951336297713 and parameters: {'alpha': 0.7601786726097931, 'gamma': 0.2111053368622758, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_logun

[I 2024-03-13 14:55:57,310] Trial 48 finished with value: 289.37487265549936 and parameters: {'alpha': 0.17458551245150522, 'gamma': 0.026575977078216982, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:57,336] Trial 49 finished with value: 100.80423474125327 and parameters: {'alpha': 0.6414523572711452, 'gamma': 0.20196086167775534, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:57,356] Trial 50 finished with value: 3702.254518360436 and parameters: {'alpha': 0.679254675303435, 'gamma': 14.404209254132905, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:57,383] T

  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:57,549] Trial 58 finished with value: 5280.599277306696 and parameters: {'alpha': 0.3604729361930965, 'gamma': 288.99971262054873, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:57,583] Trial 59 finished with value: 1732.4081840912922 and parameters: {'alpha': 0.6579882100987228, 'gamma': 5.741502854991124, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:57,609] Trial 60 finished with value: 4223.841048741079 and parameters: {'alpha': 0.6249715722153316, 'gamma': 23.412733536327956, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform

[I 2024-03-13 14:55:57,987] Trial 67 finished with value: 91.6539420679979 and parameters: {'alpha': 0.7662535821298053, 'gamma': 0.2073426883326104, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,013] Trial 68 finished with value: 606.6871586060253 and parameters: {'alpha': 0.8180612056835694, 'gamma': 0.004649121224846228, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,041] Trial 69 finished with value: 5778.9787313055385 and parameters: {'alpha': 0.9668470921346836, 'gamma': 1.1602627347207377, 'kernel': 'linear'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,075] T

  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,285] Trial 77 finished with value: 140.64381245473092 and parameters: {'alpha': 0.7295003803852418, 'gamma': 0.06927978383612524, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,309] Trial 78 finished with value: 94.35630318049792 and parameters: {'alpha': 0.8647645884556984, 'gamma': 0.3425985668823662, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,333] Trial 79 finished with value: 5780.891280351023 and parameters: {'alpha': 0.9122699158403474, 'gamma': 0.966847913905526, 'kernel': 'linear'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguni

[I 2024-03-13 14:55:58,521] Trial 86 finished with value: 94.06772766288037 and parameters: {'alpha': 0.8615659207287988, 'gamma': 0.3390947386382973, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,564] Trial 87 finished with value: 99.99400153598404 and parameters: {'alpha': 0.85381182461696, 'gamma': 0.40087312739278447, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,606] Trial 88 finished with value: 133.96281492858552 and parameters: {'alpha': 0.820130856994874, 'gamma': 0.06978479537128476, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,637] Trial 

  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,893] Trial 96 finished with value: 88.91244624372386 and parameters: {'alpha': 0.8693891555285476, 'gamma': 0.16539301839454218, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,921] Trial 97 finished with value: 5330.408910988087 and parameters: {'alpha': 0.865094133386189, 'gamma': 3532.7761182111744, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-13 14:55:58,954] Trial 98 finished with value: 2057.2384543780786 and parameters: {'alpha': 0.9813125123262761, 'gamma': 5.857461722269437, 'kernel': 'rbf'}. Best is trial 33 with value: 84.1511037304582.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform

Number of finished trials:  100
Best trial:
  Value: 84.1511037304582
  Params: 
    alpha: 0.9346357780756928
    gamma: 0.20355053439988252
    kernel: rbf
Mean Squared Error: 84.1511037304582
Root Mean Squared Error: 9.173391070398024
R-Squared value: 0.8871161448198882


# Polynomial Regression

In [19]:
from sklearn.preprocessing import PolynomialFeatures

# Define the objective function for Optuna
def objective(trial):
    # Set the range for the degree of polynomial features to be between 1 and 5
    degree = trial.suggest_int('degree', 1, 5)
    
    # Create the polynomial features
    poly_features = PolynomialFeatures(degree=degree)
    X_train_poly = poly_features.fit_transform(X_train)
    X_test_poly = poly_features.transform(X_test)
    
    # Create the Polynomial Regression model
    model = LinearRegression()
    
    # Fit the model to the training data
    model.fit(X_train_poly, y_train)
    
    # Get the MSE of the model on the testing data
    mse = metrics.mean_squared_error(y_test, model.predict(X_test_poly))
    
    # Return mse as the objective value
    return mse

# Create the Optuna study
study = optuna.create_study(direction='minimize', sampler=TPESampler(seed=37))

# Optimize the objective function
study.optimize(objective, n_trials=100)

# Print the number of finished trials
print("Number of finished trials: ", len(study.trials))

# Print the best trial and save as a variable
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
    
# Create the polynomial features using the best degree
best_degree = trial.params['degree']
poly_features = PolynomialFeatures(degree=best_degree)
X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)

# Create the Polynomial Regression model using the best degree
best_model = LinearRegression()

# Fit the model
best_model.fit(X_train_poly, y_train)

# Make predictions
y_pred = best_model.predict(X_test_poly)

# Model Evaluation
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R-Squared value:', r2_score(y_test, y_pred))


[I 2024-03-13 12:05:43,867] A new study created in memory with name: no-name-47c2aad8-0b2f-437e-a20b-62d3d5d87360
[I 2024-03-13 12:05:43,877] Trial 0 finished with value: 354833316.438535 and parameters: {'degree': 5}. Best is trial 0 with value: 354833316.438535.
[I 2024-03-13 12:05:43,884] Trial 1 finished with value: 961.5671267687733 and parameters: {'degree': 3}. Best is trial 1 with value: 961.5671267687733.
[I 2024-03-13 12:05:43,952] Trial 2 finished with value: 399.3252825119644 and parameters: {'degree': 1}. Best is trial 2 with value: 399.3252825119644.
[I 2024-03-13 12:05:43,987] Trial 3 finished with value: 961.5671267687733 and parameters: {'degree': 3}. Best is trial 2 with value: 399.3252825119644.
[I 2024-03-13 12:05:44,008] Trial 4 finished with value: 126015.33778758363 and parameters: {'degree': 4}. Best is trial 2 with value: 399.3252825119644.
[I 2024-03-13 12:05:44,016] Trial 5 finished with value: 126015.33778758363 and parameters: {'degree': 4}. Best is trial 2

[I 2024-03-13 12:05:45,369] Trial 53 finished with value: 399.3252825119644 and parameters: {'degree': 1}. Best is trial 2 with value: 399.3252825119644.
[I 2024-03-13 12:05:45,408] Trial 54 finished with value: 399.3252825119644 and parameters: {'degree': 1}. Best is trial 2 with value: 399.3252825119644.
[I 2024-03-13 12:05:45,447] Trial 55 finished with value: 399.3252825119644 and parameters: {'degree': 1}. Best is trial 2 with value: 399.3252825119644.
[I 2024-03-13 12:05:45,483] Trial 56 finished with value: 1224.764478506658 and parameters: {'degree': 2}. Best is trial 2 with value: 399.3252825119644.
[I 2024-03-13 12:05:45,509] Trial 57 finished with value: 399.3252825119644 and parameters: {'degree': 1}. Best is trial 2 with value: 399.3252825119644.
[I 2024-03-13 12:05:45,535] Trial 58 finished with value: 1224.764478506658 and parameters: {'degree': 2}. Best is trial 2 with value: 399.3252825119644.
[I 2024-03-13 12:05:45,549] Trial 59 finished with value: 399.3252825119644 

Number of finished trials:  100
Best trial:
  Value: 399.3252825119644
  Params: 
    degree: 1
Mean Squared Error: 399.3252825119644
Root Mean Squared Error: 19.983124943610907
R-Squared value: 0.46432815064168675


# Support Vector Regression (SVR)

In [20]:
from sklearn.svm import SVR

# Define the objective function for Optuna
def objective(trial):
    # Set the range for C (regularization parameter) to be between 0.1 and 10
    C = trial.suggest_float('C', 0.1, 10)
    # Set the range for epsilon (epsilon in the epsilon-SVR model) to be between 0.01 and 1
    epsilon = trial.suggest_float('epsilon', 0.01, 1)
    # Set the range for gamma (kernel coefficient) to be between 0.01 and 1
    gamma = trial.suggest_float('gamma', 0.01, 1)
    
    # Create the SVR model with the specified hyperparameters
    model = SVR(kernel='rbf', C=C, epsilon=epsilon, gamma=gamma)
    
    # Fit the model to the training data
    model.fit(X_train, y_train)
    
    # Get the MSE of the model on the testing data
    mse = metrics.mean_squared_error(y_test, model.predict(X_test))
    
    # Return mse as the objective value
    return mse

# Create the Optuna study
study = optuna.create_study(direction='minimize', sampler=TPESampler(seed=37))

# Optimize the objective function
study.optimize(objective, n_trials=100)

# Print the number of finished trials
print("Number of finished trials: ", len(study.trials))

# Print the best trial and save as a variable
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
    
# Create the model using the best parameters
best_model = SVR(kernel='rbf', C=trial.params['C'], epsilon=trial.params['epsilon'], gamma=trial.params['gamma'])

# Fit the model
best_model.fit(X_train, y_train)

# Make predictions
y_pred = best_model.predict(X_test)

# Model Evaluation
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R-Squared value:', r2_score(y_test, y_pred))

[I 2024-03-13 12:06:01,505] A new study created in memory with name: no-name-9fdfc497-b9e0-4701-ad8d-e7acfb362217
[I 2024-03-13 12:06:01,516] Trial 0 finished with value: 561.4861643168963 and parameters: {'C': 9.450516368287337, 'epsilon': 0.46945719256136353, 'gamma': 0.20086707866758605}. Best is trial 0 with value: 561.4861643168963.
[I 2024-03-13 12:06:01,535] Trial 1 finished with value: 844.4696787588192 and parameters: {'C': 5.86075928111192, 'epsilon': 0.6238833680718908, 'gamma': 0.6873817772885423}. Best is trial 0 with value: 561.4861643168963.
[I 2024-03-13 12:06:01,554] Trial 2 finished with value: 967.3693932967083 and parameters: {'C': 1.124031794697981, 'epsilon': 0.7480210086215892, 'gamma': 0.2891592793734936}. Best is trial 0 with value: 561.4861643168963.
[I 2024-03-13 12:06:01,561] Trial 3 finished with value: 758.6957345135879 and parameters: {'C': 7.558903412728403, 'epsilon': 0.7947785732750089, 'gamma': 0.6311265903165661}. Best is trial 0 with value: 561.4861

[I 2024-03-13 12:06:02,892] Trial 36 finished with value: 608.0513631291681 and parameters: {'C': 8.49221699867119, 'epsilon': 0.7059141050133255, 'gamma': 0.16371799925403938}. Best is trial 12 with value: 541.6288844689492.
[I 2024-03-13 12:06:02,933] Trial 37 finished with value: 596.6688988009857 and parameters: {'C': 9.23390186237786, 'epsilon': 0.8371497645805519, 'gamma': 0.09930364491786847}. Best is trial 12 with value: 541.6288844689492.
[I 2024-03-13 12:06:02,991] Trial 38 finished with value: 855.5384005323904 and parameters: {'C': 6.664162806101645, 'epsilon': 0.1325780952828347, 'gamma': 0.9923005866188921}. Best is trial 12 with value: 541.6288844689492.
[I 2024-03-13 12:06:03,020] Trial 39 finished with value: 632.3341186723721 and parameters: {'C': 8.120219377624847, 'epsilon': 0.5038351735222603, 'gamma': 0.24598279041508567}. Best is trial 12 with value: 541.6288844689492.
[I 2024-03-13 12:06:03,065] Trial 40 finished with value: 701.4795000279091 and parameters: {'C

[I 2024-03-13 12:06:04,427] Trial 73 finished with value: 574.9138154098811 and parameters: {'C': 9.19770111157144, 'epsilon': 0.4391523501478579, 'gamma': 0.16422404450911363}. Best is trial 12 with value: 541.6288844689492.
[I 2024-03-13 12:06:04,459] Trial 74 finished with value: 636.8076306400445 and parameters: {'C': 9.728337798247022, 'epsilon': 0.47958544035167855, 'gamma': 0.06822264457742638}. Best is trial 12 with value: 541.6288844689492.
[I 2024-03-13 12:06:04,502] Trial 75 finished with value: 831.4701343904972 and parameters: {'C': 8.690513469879372, 'epsilon': 0.5658664608935556, 'gamma': 0.023342944992056552}. Best is trial 12 with value: 541.6288844689492.
[I 2024-03-13 12:06:04,526] Trial 76 finished with value: 584.0132019005257 and parameters: {'C': 9.362394519489362, 'epsilon': 0.3975596114393985, 'gamma': 0.27409783997602205}. Best is trial 12 with value: 541.6288844689492.
[I 2024-03-13 12:06:04,559] Trial 77 finished with value: 565.9871298701053 and parameters:

Number of finished trials:  100
Best trial:
  Value: 541.6288844689492
  Params: 
    C: 9.968531432561862
    epsilon: 0.40121805953432715
    gamma: 0.19455786446799173
Mean Squared Error: 541.6288844689492
Root Mean Squared Error: 23.27292170031406
R-Squared value: 0.27343607100392064


# <font color = blue> Decision Trees

In [26]:
from sklearn.tree import DecisionTreeRegressor

# Define the objective function for Optuna
def objective(trial):
    # Set the range for max_depth (maximum depth of the tree) to be between 1 and 10
    max_depth = trial.suggest_int('max_depth', 1, 10)
    
    # Create the Decision Tree Regression model with the specified hyperparameters
    model = DecisionTreeRegressor(max_depth=max_depth, random_state=0)
    
    # Fit the model to the training data
    model.fit(X_train, y_train)
    
    # Get the MSE of the model on the testing data
    mse = metrics.mean_squared_error(y_test, model.predict(X_test))
    
    # Return mse as the objective value
    return mse

# Create the Optuna study
study = optuna.create_study(direction='minimize', sampler=TPESampler(seed=37))

# Optimize the objective function
study.optimize(objective, n_trials=100)

# Print the number of finished trials
print("Number of finished trials: ", len(study.trials))

# Print the best trial and save as a variable
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
    
# Create the model using the best parameters
best_model = DecisionTreeRegressor(max_depth=trial.params['max_depth'], random_state=0)

# Fit the model
best_model.fit(X_train, y_train)

# Make predictions
y_pred = best_model.predict(X_test)

# Model Evaluation
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R-Squared value:', r2_score(y_test, y_pred))

[I 2024-03-13 12:28:48,651] A new study created in memory with name: no-name-f9dfd516-3405-4860-a73a-cf99750301bc
[I 2024-03-13 12:28:48,709] Trial 0 finished with value: 262.7462064485456 and parameters: {'max_depth': 10}. Best is trial 0 with value: 262.7462064485456.
[I 2024-03-13 12:28:48,729] Trial 1 finished with value: 271.7438015041508 and parameters: {'max_depth': 5}. Best is trial 0 with value: 262.7462064485456.
[I 2024-03-13 12:28:48,758] Trial 2 finished with value: 361.5686666886378 and parameters: {'max_depth': 2}. Best is trial 0 with value: 262.7462064485456.
[I 2024-03-13 12:28:48,766] Trial 3 finished with value: 249.30925639576748 and parameters: {'max_depth': 6}. Best is trial 3 with value: 249.30925639576748.
[I 2024-03-13 12:28:48,776] Trial 4 finished with value: 260.25800688651975 and parameters: {'max_depth': 7}. Best is trial 3 with value: 249.30925639576748.
[I 2024-03-13 12:28:48,795] Trial 5 finished with value: 260.25800688651975 and parameters: {'max_dep

[I 2024-03-13 12:28:50,146] Trial 51 finished with value: 197.82674433925473 and parameters: {'max_depth': 1}. Best is trial 19 with value: 197.82674433925473.
[I 2024-03-13 12:28:50,185] Trial 52 finished with value: 361.5686666886378 and parameters: {'max_depth': 2}. Best is trial 19 with value: 197.82674433925473.
[I 2024-03-13 12:28:50,205] Trial 53 finished with value: 197.82674433925473 and parameters: {'max_depth': 1}. Best is trial 19 with value: 197.82674433925473.
[I 2024-03-13 12:28:50,240] Trial 54 finished with value: 361.5686666886378 and parameters: {'max_depth': 2}. Best is trial 19 with value: 197.82674433925473.
[I 2024-03-13 12:28:50,257] Trial 55 finished with value: 264.59978917328806 and parameters: {'max_depth': 9}. Best is trial 19 with value: 197.82674433925473.
[I 2024-03-13 12:28:50,273] Trial 56 finished with value: 260.25800688651975 and parameters: {'max_depth': 7}. Best is trial 19 with value: 197.82674433925473.
[I 2024-03-13 12:28:50,289] Trial 57 finis

Number of finished trials:  100
Best trial:
  Value: 197.82674433925473
  Params: 
    max_depth: 1
Mean Squared Error: 197.82674433925473
Root Mean Squared Error: 14.06508956029981
R-Squared value: 0.7346268252134329
