In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import optuna

from optuna.samplers import TPESampler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression,ElasticNet
from sklearn.cross_decomposition import PLSRegression
from sklearn.kernel_ridge import KernelRidge

from sklearn import metrics
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [11]:
# load the dataset
training_df = pd.read_excel('rfe_selected_features.xlsx')

training_df.head()

Unnamed: 0,2102,2117,Water Potential (Mpa)
0,0.1191,0.1275,-0.089632
1,0.126,0.133,-0.148237
2,0.0846,0.0893,-0.128243
3,0.1205,0.1281,-0.164785
4,0.1049,0.111,-0.326122


In [12]:
# ---------------------------------
# Define predictors and target
# ---------------------------------
X = training_df.drop(columns = ['Water Potential (Mpa)'])
y = training_df['Water Potential (Mpa)']

In [13]:
# Feature scaling
sc = StandardScaler()
X_scaled = sc.fit_transform(X)

# Splitting the Dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.1, random_state=42)

# <font color = Orange> Multiple Linear Regression - PLSR

In [14]:
# Define the objective function for Optuna
def objective(trial):
    # Set the range for n_components to be integers between 1 and the number of features
    n_components = trial.suggest_int('n_components', 1, X_train.shape[1])
    # Set the range for max_iter to be between 100 and 1000
    max_iter = trial.suggest_int('max_iter', 100, 1000)
    
    # Create the PLSR model with the specified hyperparameters
    model = PLSRegression(n_components=n_components, max_iter=max_iter)
    
    # Fit the model to the training data
    model.fit(X_train, y_train)
    
    # Get the MSE of the model on the testing data
    mse = metrics.mean_squared_error(y_test, model.predict(X_test))
    
    # Return mse as the objective value
    return mse

# Create the Optuna study
study = optuna.create_study(direction='minimize', sampler=TPESampler(seed=37))

# Optimize the objective function
study.optimize(objective, n_trials=100)

# Print the number of finished trials
print("Number of finished trials: ", len(study.trials))

# Print the best trial and save as a variable
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
    
# Create the model using the best parameters
best_plsr = PLSRegression(n_components=trial.params['n_components'], max_iter=trial.params['max_iter'])

# Fit the model
best_plsr.fit(X_train, y_train)

# Make predictions
y_pred = best_plsr.predict(X_test)

# Model Evaluation
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R-Squared value:', r2_score(y_test, y_pred))

[I 2024-03-12 11:33:41,732] A new study created in memory with name: no-name-3dba0fba-e469-40ae-88e0-f33ac494c5ad
[I 2024-03-12 11:33:41,768] Trial 0 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 518}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:41,786] Trial 1 finished with value: 3.9984493908814147 and parameters: {'n_components': 1, 'max_iter': 624}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:41,815] Trial 2 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 716}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:41,835] Trial 3 finished with value: 3.9984493908814147 and parameters: {'n_components': 1, 'max_iter': 771}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:41,845] Trial 4 finished with value: 3.9984493908814147 and parameters: {'n_components': 1, 'max_iter': 778}. Best is trial 0 with value: 1.99898227345370

[I 2024-03-12 11:33:45,560] Trial 46 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 773}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:45,655] Trial 47 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 389}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:45,812] Trial 48 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 193}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:45,934] Trial 49 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 527}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:46,110] Trial 50 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 669}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:46,245] Trial 51 finished with value: 1.9989822734537084 and parameters: {'n_componen

[I 2024-03-12 11:33:50,712] Trial 92 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 424}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:50,892] Trial 93 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 115}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:51,005] Trial 94 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 476}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:51,136] Trial 95 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 501}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:51,259] Trial 96 finished with value: 1.9989822734537084 and parameters: {'n_components': 2, 'max_iter': 580}. Best is trial 0 with value: 1.9989822734537084.
[I 2024-03-12 11:33:51,413] Trial 97 finished with value: 1.9989822734537084 and parameters: {'n_componen

Number of finished trials:  100
Best trial:
  Value: 1.9989822734537084
  Params: 
    n_components: 2
    max_iter: 518
Mean Squared Error: 1.9989822734537084
Root Mean Squared Error: 1.4138536959154255
R-Squared value: 0.5034285959078163


# <font color = Red> Non-linear Kernel Ridge Regression (KRR)

In [16]:
# Define the objective function for Optuna
def objective(trial):
    # Set the range for alpha to be uniform between 0.0 and 1.0
    alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
    # Set the range for gamma to be loguniform between 1e-6 and 1e6
    gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
    # Set the range for kernel to be categorical between 'linear' and 'rbf'
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf'])
    
    # Create the Kernel Ridge Regression model with the specified hyperparameters
    model = KernelRidge(alpha=alpha, gamma=gamma, kernel=kernel)
    
    # Fit the model to the training data
    model.fit(X_train, y_train)
    
    # Get the MSE of the model on the testing data
    mse = metrics.mean_squared_error(y_test, model.predict(X_test))
    
    # Return mse as the objective value
    return mse

# Create the Optuna study
study = optuna.create_study(direction='minimize', sampler=TPESampler(seed=37))

# Optimize the objective function
study.optimize(objective, n_trials=100)

# Print the number of finished trials
print("Number of finished trials: ", len(study.trials))

# Print the best trial and save as a variable
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
    
# Create the model using the best parameters
best_krr = KernelRidge(alpha=trial.params['alpha'], gamma=trial.params['gamma'], kernel=trial.params['kernel'])

# Fit the model
best_krr.fit(X_train, y_train)

# Make predictions
y_pred = best_krr.predict(X_test)

# Model Evaluation
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R-Squared value:', r2_score(y_test, y_pred))

[I 2024-03-12 11:34:05,623] A new study created in memory with name: no-name-40328d99-587a-4e21-ba2c-bf30dd141284
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:05,633] Trial 0 finished with value: 5.246216173774505 and parameters: {'alpha': 0.9444966028573069, 'gamma': 0.3708327584362581, 'kernel': 'rbf'}. Best is trial 0 with value: 5.246216173774505.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:05,641] Trial 1 finished with value: 9.39708408673029 and parameters: {'alpha': 0.6200842101736271, 'gamma': 162.4382154062861, 'kernel': 'rbf'}. Best is trial 0 with value: 5.246216173774505.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:05,751] Trial 2 finished with value: 7.495528122048817 and parameters: {'alpha': 0.28197907007423595, 'gamma': 1099.2463

[I 2024-03-12 11:34:05,842] Trial 9 finished with value: 7.829932278835866 and parameters: {'alpha': 0.6402713827422477, 'gamma': 1.6968526427052133e-06, 'kernel': 'linear'}. Best is trial 0 with value: 5.246216173774505.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:05,890] Trial 10 finished with value: 5.184188702342875 and parameters: {'alpha': 0.9822334069719648, 'gamma': 0.009819189785151882, 'kernel': 'rbf'}. Best is trial 10 with value: 5.184188702342875.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:06,006] Trial 11 finished with value: 5.136954512866111 and parameters: {'alpha': 0.9693357342873596, 'gamma': 0.011542322822511587, 'kernel': 'rbf'}. Best is trial 11 with value: 5.136954512866111.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:06

  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:06,500] Trial 19 finished with value: 9.993883130959047 and parameters: {'alpha': 0.7119836943626693, 'gamma': 1031.0147426306964, 'kernel': 'rbf'}. Best is trial 15 with value: 4.537197987609212.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:06,563] Trial 20 finished with value: 8.55377687194945 and parameters: {'alpha': 0.8767841059707147, 'gamma': 2.8072967702708422, 'kernel': 'rbf'}. Best is trial 15 with value: 4.537197987609212.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:06,608] Trial 21 finished with value: 4.545237895262397 and parameters: {'alpha': 0.6853411537814692, 'gamma': 0.23122146190484735, 'kernel': 'rbf'}. Best is trial 15 with value: 4.537197987609212.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_logunif

  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:07,231] Trial 29 finished with value: 4.456639639639617 and parameters: {'alpha': 0.6715100899814398, 'gamma': 0.03897014864323282, 'kernel': 'rbf'}. Best is trial 25 with value: 4.351423054542829.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:07,294] Trial 30 finished with value: 6.138151944811567 and parameters: {'alpha': 0.6719221987874346, 'gamma': 4.381310027581516e-06, 'kernel': 'rbf'}. Best is trial 25 with value: 4.351423054542829.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:07,364] Trial 31 finished with value: 4.457561011393711 and parameters: {'alpha': 0.7336314480900816, 'gamma': 0.051185429448682686, 'kernel': 'rbf'}. Best is trial 25 with value: 4.351423054542829.
  alpha = trial.suggest_u

  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:07,788] Trial 38 finished with value: 7.81962745097823 and parameters: {'alpha': 0.6198260698673357, 'gamma': 24.431954044424433, 'kernel': 'linear'}. Best is trial 34 with value: 4.308623095257254.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:07,845] Trial 39 finished with value: 5.421223321321636 and parameters: {'alpha': 0.5603581535398895, 'gamma': 0.0019346487645805024, 'kernel': 'rbf'}. Best is trial 34 with value: 4.308623095257254.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:07,900] Trial 40 finished with value: 9.25517855866129 and parameters: {'alpha': 0.31676862693665686, 'gamma': 151.92417154287673, 'kernel': 'rbf'}. Best is trial 34 with value: 4.308623095257254.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_lo

[I 2024-03-12 11:34:08,373] Trial 47 finished with value: 6.130573400067097 and parameters: {'alpha': 0.027585438022680736, 'gamma': 0.012522736515244855, 'kernel': 'linear'}. Best is trial 46 with value: 3.356936045464392.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:08,440] Trial 48 finished with value: 5.0905790576342484 and parameters: {'alpha': 0.17458551245150522, 'gamma': 0.0009936823133233164, 'kernel': 'rbf'}. Best is trial 46 with value: 3.356936045464392.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:08,484] Trial 49 finished with value: 10.249366130513868 and parameters: {'alpha': 0.17776684918212393, 'gamma': 85498.63588704451, 'kernel': 'rbf'}. Best is trial 46 with value: 3.356936045464392.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:3

  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:09,087] Trial 58 finished with value: 6.881741445162755 and parameters: {'alpha': 0.10144308862432444, 'gamma': 0.006991031422412552, 'kernel': 'linear'}. Best is trial 46 with value: 3.356936045464392.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:09,155] Trial 59 finished with value: 6.087750178596227 and parameters: {'alpha': 0.2389987473970168, 'gamma': 1.5896668657085188e-05, 'kernel': 'rbf'}. Best is trial 46 with value: 3.356936045464392.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:09,224] Trial 60 finished with value: 5.217636389776911 and parameters: {'alpha': 0.13273084692548062, 'gamma': 0.0005698231175131856, 'kernel': 'rbf'}. Best is trial 46 with value: 3.356936045464392.
  alpha = trial.s

[I 2024-03-12 11:34:09,842] Trial 67 finished with value: 1.8623141886734693 and parameters: {'alpha': 0.000532105698242095, 'gamma': 0.012310219003380975, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:09,910] Trial 68 finished with value: 3.082037918727155 and parameters: {'alpha': 0.07524408595644583, 'gamma': 0.018978181324132305, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:09,958] Trial 69 finished with value: 1.9865141622836175 and parameters: {'alpha': 0.0023784449435464644, 'gamma': 0.018889791472361212, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 

  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:10,580] Trial 78 finished with value: 2.703314752109033 and parameters: {'alpha': 0.031277851048741825, 'gamma': 0.3500613132807537, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:10,611] Trial 79 finished with value: 7.991816371300018 and parameters: {'alpha': 0.02085343945636562, 'gamma': 2.583567141347694, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:10,691] Trial 80 finished with value: 8.656620411910334 and parameters: {'alpha': 0.040937101844383504, 'gamma': 10.368665734374328, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_

  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:11,256] Trial 87 finished with value: 7.790334687028351 and parameters: {'alpha': 0.07214448707758069, 'gamma': 2.088538611183289, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:11,314] Trial 88 finished with value: 7.14897838758155 and parameters: {'alpha': 0.15361730854564992, 'gamma': 0.08947868405172354, 'kernel': 'linear'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:11,378] Trial 89 finished with value: 3.432293718030715 and parameters: {'alpha': 0.2978639791702058, 'gamma': 0.11673899137761447, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest

  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:11,971] Trial 97 finished with value: 8.666348211472519 and parameters: {'alpha': 0.0515360908793915, 'gamma': 3.8381192359924, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:12,030] Trial 98 finished with value: 2.303953303351841 and parameters: {'alpha': 0.08020672503178777, 'gamma': 0.15590558327543302, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.
  alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
  gamma = trial.suggest_loguniform('gamma', 1e-6, 1e6)
[I 2024-03-12 11:34:12,129] Trial 99 finished with value: 2.2361575942578913 and parameters: {'alpha': 0.07442862297931667, 'gamma': 0.16807599029987821, 'kernel': 'rbf'}. Best is trial 67 with value: 1.8623141886734693.


Number of finished trials:  100
Best trial:
  Value: 1.8623141886734693
  Params: 
    alpha: 0.000532105698242095
    gamma: 0.012310219003380975
    kernel: rbf
Mean Squared Error: 1.8623141886734693
Root Mean Squared Error: 1.364666328694846
R-Squared value: 0.537378603196605


# Polynomial Regression 

In [23]:
from sklearn.preprocessing import PolynomialFeatures

# Define the objective function for Optuna
def objective(trial):
    # Set the range for the degree of polynomial features to be between 1 and 5
    degree = trial.suggest_int('degree', 1, 5)
    
    # Create the polynomial features
    poly_features = PolynomialFeatures(degree=degree)
    X_train_poly = poly_features.fit_transform(X_train)
    X_test_poly = poly_features.transform(X_test)
    
    # Create the Polynomial Regression model
    model = LinearRegression()
    
    # Fit the model to the training data
    model.fit(X_train_poly, y_train)
    
    # Get the MSE of the model on the testing data
    mse = metrics.mean_squared_error(y_test, model.predict(X_test_poly))
    
    # Return mse as the objective value
    return mse

# Create the Optuna study
study = optuna.create_study(direction='minimize', sampler=TPESampler(seed=37))

# Optimize the objective function
study.optimize(objective, n_trials=100)

# Print the number of finished trials
print("Number of finished trials: ", len(study.trials))

# Print the best trial and save as a variable
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
    
# Create the polynomial features using the best degree
best_degree = trial.params['degree']
poly_features = PolynomialFeatures(degree=best_degree)
X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)

# Create the Polynomial Regression model using the best degree
best_model = LinearRegression()

# Fit the model
best_model.fit(X_train_poly, y_train)

# Make predictions
y_pred = best_model.predict(X_test_poly)

# Model Evaluation
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R-Squared value:', r2_score(y_test, y_pred))


[I 2024-03-12 11:46:49,253] A new study created in memory with name: no-name-abcde2df-e4a6-4e01-b47b-0400e71f0277
[I 2024-03-12 11:46:49,277] Trial 0 finished with value: 327.6588384045944 and parameters: {'degree': 5}. Best is trial 0 with value: 327.6588384045944.
[I 2024-03-12 11:46:49,294] Trial 1 finished with value: 24.407551020655827 and parameters: {'degree': 3}. Best is trial 1 with value: 24.407551020655827.
[I 2024-03-12 11:46:49,313] Trial 2 finished with value: 1.998982273453711 and parameters: {'degree': 1}. Best is trial 2 with value: 1.998982273453711.
[I 2024-03-12 11:46:49,339] Trial 3 finished with value: 24.407551020655827 and parameters: {'degree': 3}. Best is trial 2 with value: 1.998982273453711.
[I 2024-03-12 11:46:49,350] Trial 4 finished with value: 350.0253004842595 and parameters: {'degree': 4}. Best is trial 2 with value: 1.998982273453711.
[I 2024-03-12 11:46:49,360] Trial 5 finished with value: 350.0253004842595 and parameters: {'degree': 4}. Best is tria

[I 2024-03-12 11:46:51,485] Trial 53 finished with value: 1.998982273453711 and parameters: {'degree': 1}. Best is trial 2 with value: 1.998982273453711.
[I 2024-03-12 11:46:51,528] Trial 54 finished with value: 1.998982273453711 and parameters: {'degree': 1}. Best is trial 2 with value: 1.998982273453711.
[I 2024-03-12 11:46:51,572] Trial 55 finished with value: 1.998982273453711 and parameters: {'degree': 1}. Best is trial 2 with value: 1.998982273453711.
[I 2024-03-12 11:46:51,600] Trial 56 finished with value: 8.425151700914546 and parameters: {'degree': 2}. Best is trial 2 with value: 1.998982273453711.
[I 2024-03-12 11:46:51,619] Trial 57 finished with value: 1.998982273453711 and parameters: {'degree': 1}. Best is trial 2 with value: 1.998982273453711.
[I 2024-03-12 11:46:51,690] Trial 58 finished with value: 8.425151700914546 and parameters: {'degree': 2}. Best is trial 2 with value: 1.998982273453711.
[I 2024-03-12 11:46:51,724] Trial 59 finished with value: 1.998982273453711 

Number of finished trials:  100
Best trial:
  Value: 1.998982273453711
  Params: 
    degree: 1
Mean Squared Error: 1.998982273453711
Root Mean Squared Error: 1.4138536959154264
R-Squared value: 0.5034285959078156


# Support Vector Regression (SVR)

In [25]:
from sklearn.svm import SVR

# Define the objective function for Optuna
def objective(trial):
    # Set the range for C (regularization parameter) to be between 0.1 and 10
    C = trial.suggest_float('C', 0.1, 10)
    # Set the range for epsilon (epsilon in the epsilon-SVR model) to be between 0.01 and 1
    epsilon = trial.suggest_float('epsilon', 0.01, 1)
    # Set the range for gamma (kernel coefficient) to be between 0.01 and 1
    gamma = trial.suggest_float('gamma', 0.01, 1)
    
    # Create the SVR model with the specified hyperparameters
    model = SVR(kernel='rbf', C=C, epsilon=epsilon, gamma=gamma)
    
    # Fit the model to the training data
    model.fit(X_train, y_train)
    
    # Get the MSE of the model on the testing data
    mse = metrics.mean_squared_error(y_test, model.predict(X_test))
    
    # Return mse as the objective value
    return mse

# Create the Optuna study
study = optuna.create_study(direction='minimize', sampler=TPESampler(seed=37))

# Optimize the objective function
study.optimize(objective, n_trials=100)

# Print the number of finished trials
print("Number of finished trials: ", len(study.trials))

# Print the best trial and save as a variable
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
    
# Create the model using the best parameters
best_model = SVR(kernel='rbf', C=trial.params['C'], epsilon=trial.params['epsilon'], gamma=trial.params['gamma'])

# Fit the model
best_model.fit(X_train, y_train)

# Make predictions
y_pred = best_model.predict(X_test)

# Model Evaluation
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R-Squared value:', r2_score(y_test, y_pred))

[I 2024-03-12 11:49:36,063] A new study created in memory with name: no-name-0307a26a-7799-41b0-a581-ebe4518d0d0b
[I 2024-03-12 11:49:36,141] Trial 0 finished with value: 3.2388522068544994 and parameters: {'C': 9.450516368287337, 'epsilon': 0.46945719256136353, 'gamma': 0.20086707866758605}. Best is trial 0 with value: 3.2388522068544994.
[I 2024-03-12 11:49:36,147] Trial 1 finished with value: 3.653954179638923 and parameters: {'C': 5.86075928111192, 'epsilon': 0.6238833680718908, 'gamma': 0.6873817772885423}. Best is trial 0 with value: 3.2388522068544994.
[I 2024-03-12 11:49:36,153] Trial 2 finished with value: 5.524598198859851 and parameters: {'C': 1.124031794697981, 'epsilon': 0.7480210086215892, 'gamma': 0.2891592793734936}. Best is trial 0 with value: 3.2388522068544994.
[I 2024-03-12 11:49:36,161] Trial 3 finished with value: 2.834943170233039 and parameters: {'C': 7.558903412728403, 'epsilon': 0.7947785732750089, 'gamma': 0.6311265903165661}. Best is trial 3 with value: 2.83

[I 2024-03-12 11:49:38,857] Trial 36 finished with value: 3.3669894400597733 and parameters: {'C': 5.402283423725547, 'epsilon': 0.8215931993504941, 'gamma': 0.5814896644669809}. Best is trial 24 with value: 2.0539364861779204.
[I 2024-03-12 11:49:38,934] Trial 37 finished with value: 2.579631714848507 and parameters: {'C': 7.57140582927733, 'epsilon': 0.9306286523080083, 'gamma': 0.2721760678287165}. Best is trial 24 with value: 2.0539364861779204.
[I 2024-03-12 11:49:39,013] Trial 38 finished with value: 3.2983334401195585 and parameters: {'C': 8.245264904634547, 'epsilon': 0.7654420078428912, 'gamma': 0.9923005866188921}. Best is trial 24 with value: 2.0539364861779204.
[I 2024-03-12 11:49:39,151] Trial 39 finished with value: 3.049423805225345 and parameters: {'C': 7.137184821062819, 'epsilon': 0.6670242955994157, 'gamma': 0.45807958489651823}. Best is trial 24 with value: 2.0539364861779204.
[I 2024-03-12 11:49:39,218] Trial 40 finished with value: 4.134284303161542 and parameters

[I 2024-03-12 11:49:42,238] Trial 72 finished with value: 2.1256499132323783 and parameters: {'C': 9.728180641631592, 'epsilon': 0.9681841922295518, 'gamma': 0.4024964122243991}. Best is trial 63 with value: 2.0511029048477414.
[I 2024-03-12 11:49:42,320] Trial 73 finished with value: 2.192712074641673 and parameters: {'C': 9.819514544865221, 'epsilon': 0.923217716970084, 'gamma': 0.2533601950015478}. Best is trial 63 with value: 2.0511029048477414.
[I 2024-03-12 11:49:42,412] Trial 74 finished with value: 2.2344005952907136 and parameters: {'C': 9.301609844064535, 'epsilon': 0.9675708533091492, 'gamma': 0.45637988379503136}. Best is trial 63 with value: 2.0511029048477414.
[I 2024-03-12 11:49:42,506] Trial 75 finished with value: 2.271058316932487 and parameters: {'C': 9.749922499355401, 'epsilon': 0.8162626251383888, 'gamma': 0.31248819110622156}. Best is trial 63 with value: 2.0511029048477414.
[I 2024-03-12 11:49:42,584] Trial 76 finished with value: 4.410880368044168 and parameter

Number of finished trials:  100
Best trial:
  Value: 2.0511029048477414
  Params: 
    C: 9.974658190400055
    epsilon: 0.9996892294206571
    gamma: 0.36118651177601996
Mean Squared Error: 2.0511029048477414
Root Mean Squared Error: 1.4321672056180246
R-Squared value: 0.4904811998967503
