##Initial Code

In [1]:
# Importing necessary libraries for data analysis and manipulation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# For handling warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [3]:

df_aapl = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/stocks/AAPL.csv')

In [4]:
import numpy as np
from scipy.stats import boxcox

df_aapl['Close_log'] = np.log(df_aapl['Close'] + 1)
df_aapl['Close_sqrt'] = np.sqrt(df_aapl['Close'])
df_aapl['Close_boxcox'], _ = boxcox(df_aapl['Close'] + 1)


In [5]:

skew_original = df_aapl['Close'].skew()
skew_log = df_aapl['Close_log'].skew()
skew_sqrt = df_aapl['Close_sqrt'].skew()
skew_boxcox = pd.Series(df_aapl['Close_boxcox']).skew()

print(f"Original Skewness: {skew_original}")
print(f"Log Transformation Skewness: {skew_log}")
print(f"Square Root Transformation Skewness: {skew_sqrt}")
print(f"Box-Cox Transformation Skewness: {skew_boxcox}")


Original Skewness: 2.5045276102319933
Log Transformation Skewness: 0.8535555176510303
Square Root Transformation Skewness: 1.6211545809555206
Box-Cox Transformation Skewness: 0.43527466713563334


In [6]:

df_aapl['Open_log'] = np.log(df_aapl['Open'])
df_aapl['High_log'] = np.log(df_aapl['High'])
df_aapl['Low_log'] = np.log(df_aapl['Low'])
df_aapl['Adj Close_log'] = np.log(df_aapl['Adj Close'])
df_aapl['Volume_log'] = np.log(df_aapl['Volume'])


df_aapl['Open_sqrt'] = np.sqrt(df_aapl['Open'])
df_aapl['High_sqrt'] = np.sqrt(df_aapl['High'])
df_aapl['Low_sqrt'] = np.sqrt(df_aapl['Low'])
df_aapl['Adj Close_sqrt'] = np.sqrt(df_aapl['Adj Close'])
df_aapl['Volume_sqrt'] = np.sqrt(df_aapl['Volume'])

from scipy.stats import boxcox
df_aapl['Open_boxcox'], _ = boxcox(df_aapl['Open'])
df_aapl['High_boxcox'], _ = boxcox(df_aapl['High'])
df_aapl['Low_boxcox'], _ = boxcox(df_aapl['Low'])
df_aapl['Adj Close_boxcox'], _ = boxcox(df_aapl['Adj Close'])

In [7]:

skewness_before = df_aapl[['Open', 'High', 'Low', 'Adj Close', 'Volume']].skew()
skewness_after = df_aapl[['Open_log', 'High_log', 'Low_log', 'Adj Close_log',
                          'Open_sqrt', 'High_sqrt', 'Low_sqrt', 'Adj Close_sqrt', 'Volume_sqrt',
                          'Open_boxcox', 'High_boxcox', 'Low_boxcox', 'Adj Close_boxcox']].skew()

print("Skewness Before Transformation:\n", skewness_before)
print("\nSkewness After Transformation:\n", skewness_after)


Skewness Before Transformation:
 Open         2.504632
High         2.502208
Low          2.506714
Adj Close    2.550677
Volume       3.565699
dtype: float64

Skewness After Transformation:
 Open_log            0.482872
High_log            0.481997
Low_log             0.484246
Adj Close_log       0.494009
Open_sqrt           1.620771
High_sqrt           1.621456
Low_sqrt            1.620661
Adj Close_sqrt      1.679402
Volume_sqrt         1.299776
Open_boxcox         0.181226
High_boxcox         0.179749
Low_boxcox          0.182882
Adj Close_boxcox    0.180085
dtype: float64


In [8]:
from scipy import stats

df_aapl['Open_boxcox'], _ = stats.boxcox(df_aapl['Open'] + 1)
df_aapl['High_boxcox'], _ = stats.boxcox(df_aapl['High'] + 1)
df_aapl['Low_boxcox'], _ = stats.boxcox(df_aapl['Low'] + 1)
df_aapl['Adj Close_boxcox'], _ = stats.boxcox(df_aapl['Adj Close'] + 1)
df_aapl['Close_boxcox'], _ = stats.boxcox(df_aapl['Close'] + 1)

skewness_after_boxcox = df_aapl[['Open_boxcox', 'High_boxcox', 'Low_boxcox', 'Adj Close_boxcox', 'Close_boxcox']].skew()

print("Skewness After Box-Cox Transformation:")
print(skewness_after_boxcox)


Skewness After Box-Cox Transformation:
Open_boxcox         0.435237
High_boxcox         0.433381
Low_boxcox          0.437331
Adj Close_boxcox    0.458762
Close_boxcox        0.435275
dtype: float64


In [9]:

df_aapl_cleaned = df_aapl[['Date', 'Open', 'High', 'Low', 'Adj Close', 'Close', 'Volume',
                           'Open_boxcox', 'High_boxcox', 'Low_boxcox', 'Adj Close_boxcox',
                           'Close_boxcox']]

print(df_aapl_cleaned.head())


         Date      Open      High       Low  Adj Close     Close     Volume  \
0  1980-12-12  0.128348  0.128906  0.128348   0.098943  0.128348  469033600   
1  1980-12-15  0.122210  0.122210  0.121652   0.093781  0.121652  175884800   
2  1980-12-16  0.113281  0.113281  0.112723   0.086898  0.112723  105728000   
3  1980-12-17  0.115513  0.116071  0.115513   0.089049  0.115513   86441600   
4  1980-12-18  0.118862  0.119420  0.118862   0.091630  0.118862   73449600   

   Open_boxcox  High_boxcox  Low_boxcox  Adj Close_boxcox  Close_boxcox  
0     0.117689     0.118173    0.117674          0.092374      0.117689  
1     0.112503     0.112516    0.112016          0.087857      0.112030  
2     0.104886     0.104897    0.104395          0.081785      0.104407  
3     0.106798     0.107287    0.106786          0.083688      0.106798  
4     0.109657     0.110145    0.109644          0.085966      0.109657  


##Train Validation Test

In [11]:
from sklearn.model_selection import train_test_split

X = df_aapl_cleaned[['Open_boxcox', 'High_boxcox', 'Low_boxcox']]
Y = df_aapl_cleaned['Close_boxcox']

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, shuffle=False)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, shuffle=False)

print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}, Test set: {X_test.shape}")


Training set: (7736, 3), Validation set: (1658, 3), Test set: (1658, 3)


#LSTM

## Initial

In [12]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import time
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)  # LSTM output directly connected to FC layer

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Take the last time step output
        return out

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    return mae, mse, rmse, r2, mape

# Convert datasets to PyTorch tensors
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1)

X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1)

# LSTM Configurations (2, 3, and 5 layers)
lstm_layers = [2, 3, 5]
hidden_dim = 64
output_dim = 1
input_dim = X_train.shape[1]

# Dictionary to store results
lstm_results = {}

for num_layers in lstm_layers:
    print(f"\nTraining LSTM with {num_layers} layers...")
    model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    num_epochs = 100

    start_train = time.time()
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()
    end_train = time.time()
    train_time = end_train - start_train

    # Evaluation
    model.eval()
    with torch.no_grad():
        start_val = time.time()
        Y_train_pred = model(X_train_torch).numpy()
        end_val = time.time()
        train_eval_time = end_val - start_val

        start_val = time.time()
        Y_val_pred = model(X_val_torch).numpy()
        end_val = time.time()
        val_time = end_val - start_val

        start_test = time.time()
        Y_test_pred = model(X_test_torch).numpy()
        end_test = time.time()
        test_time = end_test - start_test

    # Calculate Metrics
    train_metrics = calculate_metrics(Y_train, Y_train_pred)
    val_metrics = calculate_metrics(Y_val, Y_val_pred)
    test_metrics = calculate_metrics(Y_test, Y_test_pred)

    # Store results
    lstm_results[num_layers] = {
        "train_metrics": train_metrics,
        "val_metrics": val_metrics,
        "test_metrics": test_metrics,
        "train_time": train_time,
        "val_time": val_time,
        "test_time": test_time
    }

    # Print Results
    print(f"\nLSTM ({num_layers} layers) Metrics:")
    print(f"Training Time: {train_time:.4f} sec, Validation Time: {val_time:.4f} sec, Testing Time: {test_time:.4f} sec")

    print("Training set metrics:")
    print(f"MAE: {train_metrics[0]:.4f}, MSE: {train_metrics[1]:.4f}, RMSE: {train_metrics[2]:.4f}, R²: {train_metrics[3]:.4f}, MAPE: {train_metrics[4]:.2f}%")

    print("Validation set metrics:")
    print(f"MAE: {val_metrics[0]:.4f}, MSE: {val_metrics[1]:.4f}, RMSE: {val_metrics[2]:.4f}, R²: {val_metrics[3]:.4f}, MAPE: {val_metrics[4]:.2f}%")

    print("Test set metrics:")
    print(f"MAE: {test_metrics[0]:.4f}, MSE: {test_metrics[1]:.4f}, RMSE: {test_metrics[2]:.4f}, R²: {test_metrics[3]:.4f}, MAPE: {test_metrics[4]:.2f}%")



Training LSTM with 2 layers...

LSTM (2 layers) Metrics:
Training Time: 21.6387 sec, Validation Time: 0.0173 sec, Testing Time: 0.0142 sec
Training set metrics:
MAE: 0.0334, MSE: 0.0018, RMSE: 0.0419, R²: 0.9900, MAPE: 19.14%
Validation set metrics:
MAE: 0.0726, MSE: 0.0053, RMSE: 0.0731, R²: 0.0919, MAPE: 4.14%
Test set metrics:
MAE: 0.0866, MSE: 0.0075, RMSE: 0.0866, R²: -0.2437, MAPE: 4.30%

Training LSTM with 3 layers...

LSTM (3 layers) Metrics:
Training Time: 31.3031 sec, Validation Time: 0.0184 sec, Testing Time: 0.0190 sec
Training set metrics:
MAE: 0.0471, MSE: 0.0030, RMSE: 0.0547, R²: 0.9829, MAPE: 19.93%
Validation set metrics:
MAE: 0.1160, MSE: 0.0135, RMSE: 0.1161, R²: -1.2908, MAPE: 6.65%
Test set metrics:
MAE: 0.0980, MSE: 0.0097, RMSE: 0.0987, R²: -0.6153, MAPE: 4.89%

Training LSTM with 5 layers...

LSTM (5 layers) Metrics:
Training Time: 58.5818 sec, Validation Time: 0.0529 sec, Testing Time: 0.0424 sec
Training set metrics:
MAE: 0.0490, MSE: 0.0033, RMSE: 0.0571, R

##Optuna

In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.1-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.8/231.8 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.9-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import time
import optuna
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    return mae, mse, rmse, r2, mape

# LSTM Configurations (2, 3, and 5 layers)
lstm_layers = [2, 3, 5]
output_dim = 1
input_dim = X_train.shape[1]

# Optuna Optimization Function
def objective(trial, num_layers):
    hidden_dim = trial.suggest_int("hidden_dim", 32, 128)
    lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)

    model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    num_epochs = 50
    patience = 5
    best_val_loss = float('inf')
    early_stop_counter = 0

    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            Y_val_pred = model(X_val_torch).numpy()
            val_loss = mean_squared_error(Y_val, Y_val_pred)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
        else:
            early_stop_counter += 1

        if early_stop_counter >= patience:
            break

    return best_val_loss

# Dictionary to store results
lstm_results = {}

for num_layers in lstm_layers:
    print(f"\nOptimizing LSTM with {num_layers} layers using Optuna...")

    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective(trial, num_layers), n_trials=10)

    best_params = study.best_params
    hidden_dim = best_params["hidden_dim"]
    lr = best_params["lr"]

    print(f"Best Params for {num_layers} layers - Hidden Dim: {hidden_dim}, LR: {lr}")

    model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    num_epochs = 100
    patience = 10
    best_val_loss = float('inf')
    early_stop_counter = 0

    start_train = time.time()
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            Y_val_pred = model(X_val_torch).numpy()
            val_loss = mean_squared_error(Y_val, Y_val_pred)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
        else:
            early_stop_counter += 1

        if early_stop_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

    end_train = time.time()
    train_time = end_train - start_train

    model.eval()
    with torch.no_grad():
        start_val = time.time()
        Y_train_pred = model(X_train_torch).numpy()
        end_val = time.time()
        train_eval_time = end_val - start_val

        start_val = time.time()
        Y_val_pred = model(X_val_torch).numpy()
        end_val = time.time()
        val_time = end_val - start_val

        start_test = time.time()
        Y_test_pred = model(X_test_torch).numpy()
        end_test = time.time()
        test_time = end_test - start_test

    train_metrics = calculate_metrics(Y_train, Y_train_pred)
    val_metrics = calculate_metrics(Y_val, Y_val_pred)
    test_metrics = calculate_metrics(Y_test, Y_test_pred)

    lstm_results[num_layers] = {
        "train_metrics": train_metrics,
        "val_metrics": val_metrics,
        "test_metrics": test_metrics,
        "train_time": train_time,
        "val_time": val_time,
        "test_time": test_time
    }

    print(f"\nLSTM ({num_layers} layers) Metrics:")
    print(f"Training Time: {train_time:.4f} sec, Validation Time: {val_time:.4f} sec, Testing Time: {test_time:.4f} sec")

    print("Training set metrics:")
    print(f"MAE: {train_metrics[0]:.4f}, MSE: {train_metrics[1]:.4f}, RMSE: {train_metrics[2]:.4f}, R²: {train_metrics[3]:.4f}, MAPE: {train_metrics[4]:.2f}%")

    print("Validation set metrics:")
    print(f"MAE: {val_metrics[0]:.4f}, MSE: {val_metrics[1]:.4f}, RMSE: {val_metrics[2]:.4f}, R²: {val_metrics[3]:.4f}, MAPE: {val_metrics[4]:.2f}%")

    print("Test set metrics:")
    print(f"MAE: {test_metrics[0]:.4f}, MSE: {test_metrics[1]:.4f}, RMSE: {test_metrics[2]:.4f}, R²: {test_metrics[3]:.4f}, MAPE: {test_metrics[4]:.2f}%")

[I 2025-03-12 05:53:44,978] A new study created in memory with name: no-name-2430c4ad-7db9-49d5-b458-23bafb1194d3



Optimizing LSTM with 2 layers using Optuna...


[I 2025-03-12 05:54:02,400] Trial 0 finished with value: 0.04651753221181586 and parameters: {'hidden_dim': 122, 'lr': 0.003077529655765805}. Best is trial 0 with value: 0.04651753221181586.
[I 2025-03-12 05:54:21,958] Trial 1 finished with value: 0.037621788033646755 and parameters: {'hidden_dim': 118, 'lr': 0.0017894994745394272}. Best is trial 1 with value: 0.037621788033646755.
[I 2025-03-12 05:54:41,753] Trial 2 finished with value: 0.05117221209141317 and parameters: {'hidden_dim': 124, 'lr': 0.0011337036357827077}. Best is trial 1 with value: 0.037621788033646755.
[I 2025-03-12 05:54:53,618] Trial 3 finished with value: 0.06570410271790128 and parameters: {'hidden_dim': 114, 'lr': 0.0022621777804527816}. Best is trial 1 with value: 0.037621788033646755.
[I 2025-03-12 05:55:01,417] Trial 4 finished with value: 3.1210358708843216 and parameters: {'hidden_dim': 49, 'lr': 0.0002781186916902079}. Best is trial 1 with value: 0.037621788033646755.
[I 2025-03-12 05:55:06,152] Trial 5 fi

Best Params for 2 layers - Hidden Dim: 118, LR: 0.0017894994745394272
Early stopping at epoch 36


[I 2025-03-12 05:56:12,311] A new study created in memory with name: no-name-46c3af5d-1e7f-43ae-98dd-8e3d8c38d79b



LSTM (2 layers) Metrics:
Training Time: 17.9259 sec, Validation Time: 0.0294 sec, Testing Time: 0.0308 sec
Training set metrics:
MAE: 0.1350, MSE: 0.0256, RMSE: 0.1600, R²: 0.8537, MAPE: 58.04%
Validation set metrics:
MAE: 0.4124, MSE: 0.1705, RMSE: 0.4129, R²: -27.9823, MAPE: 23.61%
Test set metrics:
MAE: 0.4827, MSE: 0.2335, RMSE: 0.4832, R²: -37.6857, MAPE: 23.95%

Optimizing LSTM with 3 layers using Optuna...


[I 2025-03-12 05:56:25,297] Trial 0 finished with value: 2.711929630328987 and parameters: {'hidden_dim': 47, 'lr': 0.00045914268703724866}. Best is trial 0 with value: 2.711929630328987.
[I 2025-03-12 05:56:31,233] Trial 1 finished with value: 0.3832047882099273 and parameters: {'hidden_dim': 49, 'lr': 0.0027771895746656616}. Best is trial 1 with value: 0.3832047882099273.
[I 2025-03-12 05:56:44,218] Trial 2 finished with value: 0.28953137169245086 and parameters: {'hidden_dim': 83, 'lr': 0.00206639658042873}. Best is trial 2 with value: 0.28953137169245086.
[I 2025-03-12 05:57:18,496] Trial 3 finished with value: 2.4604542816649806 and parameters: {'hidden_dim': 110, 'lr': 0.00019417014039787003}. Best is trial 2 with value: 0.28953137169245086.
[I 2025-03-12 05:57:27,167] Trial 4 finished with value: 2.8758925260374126 and parameters: {'hidden_dim': 42, 'lr': 0.00034177012464908525}. Best is trial 2 with value: 0.28953137169245086.
[I 2025-03-12 05:57:54,040] Trial 5 finished with v

Best Params for 3 layers - Hidden Dim: 99, LR: 0.0013394787225778622
Early stopping at epoch 39


[I 2025-03-12 05:59:00,483] A new study created in memory with name: no-name-eddca303-4914-405f-a5cd-ca55259014fc



LSTM (3 layers) Metrics:
Training Time: 22.2688 sec, Validation Time: 0.0336 sec, Testing Time: 0.0330 sec
Training set metrics:
MAE: 0.1863, MSE: 0.0451, RMSE: 0.2125, R²: 0.7421, MAPE: 83.03%
Validation set metrics:
MAE: 0.5273, MSE: 0.2788, RMSE: 0.5280, R²: -46.4013, MAPE: 30.18%
Test set metrics:
MAE: 0.6257, MSE: 0.3923, RMSE: 0.6263, R²: -64.0004, MAPE: 31.03%

Optimizing LSTM with 5 layers using Optuna...


[I 2025-03-12 05:59:22,532] Trial 0 finished with value: 0.8711299455473696 and parameters: {'hidden_dim': 68, 'lr': 0.0012053178706390789}. Best is trial 0 with value: 0.8711299455473696.
[I 2025-03-12 06:00:15,466] Trial 1 finished with value: 2.800176766629129 and parameters: {'hidden_dim': 103, 'lr': 0.0001227770177631478}. Best is trial 0 with value: 0.8711299455473696.
[I 2025-03-12 06:00:24,443] Trial 2 finished with value: 0.7451103741238901 and parameters: {'hidden_dim': 92, 'lr': 0.008281463580527138}. Best is trial 2 with value: 0.7451103741238901.
[I 2025-03-12 06:01:23,919] Trial 3 finished with value: 2.4446187821917045 and parameters: {'hidden_dim': 120, 'lr': 0.00019087451666886644}. Best is trial 2 with value: 0.7451103741238901.
[I 2025-03-12 06:01:31,337] Trial 4 finished with value: 0.8489208349149941 and parameters: {'hidden_dim': 48, 'lr': 0.004481448413800887}. Best is trial 2 with value: 0.7451103741238901.
[I 2025-03-12 06:01:47,832] Trial 5 finished with value

Best Params for 5 layers - Hidden Dim: 92, LR: 0.008281463580527138
Early stopping at epoch 23

LSTM (5 layers) Metrics:
Training Time: 19.2698 sec, Validation Time: 0.0539 sec, Testing Time: 0.0534 sec
Training set metrics:
MAE: 0.1093, MSE: 0.0165, RMSE: 0.1283, R²: 0.9060, MAPE: 35.17%
Validation set metrics:
MAE: 0.3217, MSE: 0.1089, RMSE: 0.3299, R²: -17.5096, MAPE: 18.27%
Test set metrics:
MAE: 0.5825, MSE: 0.3450, RMSE: 0.5874, R²: -56.1737, MAPE: 28.79%


##BOHB

In [None]:
!pip install ConfigSpace hpbandster

Collecting ConfigSpace
  Downloading configspace-1.2.1.tar.gz (130 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/131.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.0/131.0 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting hpbandster
  Downloading hpbandster-0.7.4.tar.gz (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.3/51.3 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting Pyro4 (from hpbandster)
  Downloading Pyro4-4.82-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting serpent (from hpbandster)
  Downloading serpent-1.41-py3-none-any.whl.metadata (5.8 kB)
Collecting netifaces (from hpbandster)
  Downloading netifaces-0.11.0.

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
import hpbandster.core.nameserver as hpns
from hpbandster.optimizers import BOHB
from hpbandster.core.worker import Worker
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
import time

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    return mae, mse, rmse, r2, mape

# Convert datasets to PyTorch tensors and move to GPU
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# LSTM Configuration
num_layers = 2
hidden_dim = 64
output_dim = 1
input_dim = X_train.shape[1]

print(f"Training LSTM with {num_layers} layers...")

lstm_model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)
num_epochs = 100

# Train LSTM
start_time = time.time()
for epoch in range(num_epochs):
    lstm_model.train()
    optimizer.zero_grad()
    outputs = lstm_model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()
train_time = time.time() - start_time

# Extract Feature Representations
lstm_model.eval()
with torch.no_grad():
    val_start = time.time()
    train_features = lstm_model(X_train_torch).cpu().numpy()
    val_features = lstm_model(X_val_torch).cpu().numpy()
    val_time = time.time() - val_start

    test_start = time.time()
    test_features = lstm_model(X_test_torch).cpu().numpy()
    test_time = time.time() - test_start

# Define ConfigSpace for BOHB
def get_config_space():
    cs = CS.ConfigurationSpace()
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("hidden_dim", 32, 128, default_value=64))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("num_layers", 1, 5, default_value=2))
    cs.add_hyperparameter(CSH.UniformFloatHyperparameter("learning_rate", 0.0001, 0.01, default_value=0.001, log=True))
    return cs

# BOHB Worker for LSTM
class LSTMWorker(Worker):
    def compute(self, config, budget, **kwargs):
        model = LSTMModel(input_dim, config["hidden_dim"], config["num_layers"], output_dim).to(device)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
        num_epochs = 100

        for epoch in range(num_epochs):
            model.train()
            optimizer.zero_grad()
            outputs = model(X_train_torch)
            loss = criterion(outputs, Y_train_torch)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            Y_val_pred = model(X_val_torch).cpu().numpy()
        mae = mean_absolute_error(Y_val, Y_val_pred)
        return {"loss": mae, "info": config}

# Run BOHB
NS = hpns.NameServer(run_id="lstm_bohb", host="127.0.0.2", port=None)
NS.start()

worker = LSTMWorker(nameserver="127.0.0.2", run_id="lstm_bohb")
worker.run(background=True)

bohb = BOHB(configspace=get_config_space(), run_id="lstm_bohb", nameserver="127.0.0.2", min_budget=1, max_budget=3)
res = bohb.run(n_iterations=25)
bohb.shutdown()
NS.shutdown()

# Train Best LSTM Model
best_config = res.get_incumbent_id()
best_params = res.get_id2config_mapping()[best_config]["config"]

best_lstm_model = LSTMModel(input_dim, best_params["hidden_dim"], best_params["num_layers"], output_dim).to(device)
optimizer = optim.Adam(best_lstm_model.parameters(), lr=best_params["learning_rate"])
criterion = nn.MSELoss()

for epoch in range(100):
    best_lstm_model.train()
    optimizer.zero_grad()
    outputs = best_lstm_model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()

# Predictions
best_lstm_model.eval()
with torch.no_grad():
    Y_train_pred = best_lstm_model(X_train_torch).cpu().numpy()
    Y_val_pred = best_lstm_model(X_val_torch).cpu().numpy()
    Y_test_pred = best_lstm_model(X_test_torch).cpu().numpy()

# Calculate Metrics
train_metrics = calculate_metrics(Y_train, Y_train_pred)
val_metrics = calculate_metrics(Y_val, Y_val_pred)
test_metrics = calculate_metrics(Y_test, Y_test_pred)

# Print Results
print("Train Metrics:", train_metrics, "Time:", train_time)
print("Validation Metrics:", val_metrics, "Time:", val_time)
print("Test Metrics:", test_metrics, "Time:", test_time)

Using device: cuda
Training LSTM with 2 layers...
Train Metrics: (0.010655230130874207, 0.00016445769845203624, 0.0128241061463182, 0.9990603544597149, 5.853473452873122) Time: 1.545008659362793
Validation Metrics: (0.0056614460681615195, 4.700666093324246e-05, 0.006856140381675572, 0.9920077662513312, 0.3324592692614566) Time: 0.3037400245666504
Test Metrics: (0.022358513718004992, 0.0005990847309494662, 0.024476207446201017, 0.9007327416491633, 1.0915918018252004) Time: 0.0021195411682128906


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
import hpbandster.core.nameserver as hpns
from hpbandster.optimizers import BOHB
from hpbandster.core.worker import Worker
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
import time

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    return mae, mse, rmse, r2, mape

# Convert datasets to PyTorch tensors and move to GPU
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# LSTM Configuration
num_layers = 3
hidden_dim = 64
output_dim = 1
input_dim = X_train.shape[1]

print(f"Training LSTM with {num_layers} layers...")

lstm_model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)
num_epochs = 100

# Train LSTM
start_time = time.time()
for epoch in range(num_epochs):
    lstm_model.train()
    optimizer.zero_grad()
    outputs = lstm_model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()
train_time = time.time() - start_time

# Extract Feature Representations
lstm_model.eval()
with torch.no_grad():
    val_start = time.time()
    train_features = lstm_model(X_train_torch).cpu().numpy()
    val_features = lstm_model(X_val_torch).cpu().numpy()
    val_time = time.time() - val_start

    test_start = time.time()
    test_features = lstm_model(X_test_torch).cpu().numpy()
    test_time = time.time() - test_start

# Define ConfigSpace for BOHB
def get_config_space():
    cs = CS.ConfigurationSpace()
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("hidden_dim", 32, 128, default_value=64))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("num_layers", 1, 5, default_value=2))
    cs.add_hyperparameter(CSH.UniformFloatHyperparameter("learning_rate", 0.0001, 0.01, default_value=0.001, log=True))
    return cs

# BOHB Worker for LSTM
class LSTMWorker(Worker):
    def compute(self, config, budget, **kwargs):
        model = LSTMModel(input_dim, config["hidden_dim"], config["num_layers"], output_dim).to(device)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
        num_epochs = 100

        for epoch in range(num_epochs):
            model.train()
            optimizer.zero_grad()
            outputs = model(X_train_torch)
            loss = criterion(outputs, Y_train_torch)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            Y_val_pred = model(X_val_torch).cpu().numpy()
        mae = mean_absolute_error(Y_val, Y_val_pred)
        return {"loss": mae, "info": config}

# Run BOHB
NS = hpns.NameServer(run_id="lstm_bohb", host="127.0.0.2", port=None)
NS.start()

worker = LSTMWorker(nameserver="127.0.0.2", run_id="lstm_bohb")
worker.run(background=True)

bohb = BOHB(configspace=get_config_space(), run_id="lstm_bohb", nameserver="127.0.0.2", min_budget=1, max_budget=3)
res = bohb.run(n_iterations=25)
bohb.shutdown()
NS.shutdown()

# Train Best LSTM Model
best_config = res.get_incumbent_id()
best_params = res.get_id2config_mapping()[best_config]["config"]

best_lstm_model = LSTMModel(input_dim, best_params["hidden_dim"], best_params["num_layers"], output_dim).to(device)
optimizer = optim.Adam(best_lstm_model.parameters(), lr=best_params["learning_rate"])
criterion = nn.MSELoss()

for epoch in range(100):
    best_lstm_model.train()
    optimizer.zero_grad()
    outputs = best_lstm_model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()

# Predictions
best_lstm_model.eval()
with torch.no_grad():
    Y_train_pred = best_lstm_model(X_train_torch).cpu().numpy()
    Y_val_pred = best_lstm_model(X_val_torch).cpu().numpy()
    Y_test_pred = best_lstm_model(X_test_torch).cpu().numpy()

# Calculate Metrics
train_metrics = calculate_metrics(Y_train, Y_train_pred)
val_metrics = calculate_metrics(Y_val, Y_val_pred)
test_metrics = calculate_metrics(Y_test, Y_test_pred)

# Print Results
print("Train Metrics:", train_metrics, "Time:", train_time)
print("Validation Metrics:", val_metrics, "Time:", val_time)
print("Test Metrics:", test_metrics, "Time:", test_time)

Using device: cuda
Training LSTM with 3 layers...
Train Metrics: (0.03998856216987476, 0.002213173078862887, 0.047044373509091256, 0.9873548138335454, 15.303968598041118) Time: 1.267256259918213
Validation Metrics: (0.1547264064233051, 0.024172983542141546, 0.15547663342811854, -3.109973587485605, 8.838209398599703) Time: 0.43764328956604004
Test Metrics: (0.18964332011431392, 0.03600476257207245, 0.18974920967443434, -4.965924156401063, 9.411479859545574) Time: 0.003278017044067383


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
import hpbandster.core.nameserver as hpns
from hpbandster.optimizers import BOHB
from hpbandster.core.worker import Worker
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
import time

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    return mae, mse, rmse, r2, mape

# Convert datasets to PyTorch tensors and move to GPU
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# LSTM Configuration
num_layers = 5
hidden_dim = 64
output_dim = 1
input_dim = X_train.shape[1]

print(f"Training LSTM with {num_layers} layers...")

lstm_model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)
num_epochs = 100

# Train LSTM
start_time = time.time()
for epoch in range(num_epochs):
    lstm_model.train()
    optimizer.zero_grad()
    outputs = lstm_model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()
train_time = time.time() - start_time

# Extract Feature Representations
lstm_model.eval()
with torch.no_grad():
    val_start = time.time()
    train_features = lstm_model(X_train_torch).cpu().numpy()
    val_features = lstm_model(X_val_torch).cpu().numpy()
    val_time = time.time() - val_start

    test_start = time.time()
    test_features = lstm_model(X_test_torch).cpu().numpy()
    test_time = time.time() - test_start

# Define ConfigSpace for BOHB
def get_config_space():
    cs = CS.ConfigurationSpace()
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("hidden_dim", 32, 128, default_value=64))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("num_layers", 1, 5, default_value=2))
    cs.add_hyperparameter(CSH.UniformFloatHyperparameter("learning_rate", 0.0001, 0.01, default_value=0.001, log=True))
    return cs

# BOHB Worker for LSTM
class LSTMWorker(Worker):
    def compute(self, config, budget, **kwargs):
        model = LSTMModel(input_dim, config["hidden_dim"], config["num_layers"], output_dim).to(device)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
        num_epochs = 100

        for epoch in range(num_epochs):
            model.train()
            optimizer.zero_grad()
            outputs = model(X_train_torch)
            loss = criterion(outputs, Y_train_torch)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            Y_val_pred = model(X_val_torch).cpu().numpy()
        mae = mean_absolute_error(Y_val, Y_val_pred)
        return {"loss": mae, "info": config}

# Run BOHB
NS = hpns.NameServer(run_id="lstm_bohb", host="127.0.0.2", port=None)
NS.start()

worker = LSTMWorker(nameserver="127.0.0.2", run_id="lstm_bohb")
worker.run(background=True)

bohb = BOHB(configspace=get_config_space(), run_id="lstm_bohb", nameserver="127.0.0.2", min_budget=1, max_budget=3)
res = bohb.run(n_iterations=25)
bohb.shutdown()
NS.shutdown()

# Train Best LSTM Model
best_config = res.get_incumbent_id()
best_params = res.get_id2config_mapping()[best_config]["config"]

best_lstm_model = LSTMModel(input_dim, best_params["hidden_dim"], best_params["num_layers"], output_dim).to(device)
optimizer = optim.Adam(best_lstm_model.parameters(), lr=best_params["learning_rate"])
criterion = nn.MSELoss()

for epoch in range(100):
    best_lstm_model.train()
    optimizer.zero_grad()
    outputs = best_lstm_model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()

# Predictions
best_lstm_model.eval()
with torch.no_grad():
    Y_train_pred = best_lstm_model(X_train_torch).cpu().numpy()
    Y_val_pred = best_lstm_model(X_val_torch).cpu().numpy()
    Y_test_pred = best_lstm_model(X_test_torch).cpu().numpy()

# Calculate Metrics
train_metrics = calculate_metrics(Y_train, Y_train_pred)
val_metrics = calculate_metrics(Y_val, Y_val_pred)
test_metrics = calculate_metrics(Y_test, Y_test_pred)

# Print Results
print("Train Metrics:", train_metrics, "Time:", train_time)
print("Validation Metrics:", val_metrics, "Time:", val_time)
print("Test Metrics:", test_metrics, "Time:", test_time)

Using device: cuda
Training LSTM with 5 layers...
Train Metrics: (0.010890893246101709, 0.0001801770319481916, 0.01342300383476782, 0.9989705404725623, 6.340996167077166) Time: 2.062328815460205
Validation Metrics: (0.004326330782998235, 2.7755266808108614e-05, 0.005268326756011686, 0.9952809543225778, 0.24744428256284226) Time: 0.7281322479248047
Test Metrics: (0.02698135570084039, 0.0008263490171925536, 0.028746287015761766, 0.8630754597140073, 1.3213878153713008) Time: 0.009002208709716797


#Stacked Model

## XGBoost

### Initial

In [None]:
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Enable GPU for TensorFlow
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("GPU activated for TensorFlow!")
    except RuntimeError as e:
        print(e)

# Function to define and train an LSTM model on GPU
def train_lstm(X_train, Y_train, X_val, Y_val, layers):
    with tf.device('/GPU:0'):
        model = Sequential()
        model.add(LSTM(64, return_sequences=(layers > 1), input_shape=(X_train.shape[1], 1)))
        for _ in range(layers - 1):
            model.add(LSTM(64, return_sequences=(_ < layers - 2)))
        model.add(Dense(1))

        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=20, batch_size=16, verbose=0)
        return model

# Reshaping input for LSTM
X_train_r = np.expand_dims(X_train, axis=-1)
X_val_r = np.expand_dims(X_val, axis=-1)
X_test_r = np.expand_dims(X_test, axis=-1)

# Train 2, 3, and 5-layer LSTM models
lstm_models = {}
lstm_predictions = {}

start_train_time = time.time()
for layers in [2, 3, 5]:
    model = train_lstm(X_train_r, Y_train, X_val_r, Y_val, layers)
    Y_train_pred = model.predict(X_train_r)
    Y_val_pred = model.predict(X_val_r)
    Y_test_pred = model.predict(X_test_r)

    lstm_models[layers] = model
    lstm_predictions[layers] = (Y_train_pred, Y_val_pred, Y_test_pred)
train_time_total = time.time() - start_train_time

# Prepare input for XGBoost
X_train_xgb = np.column_stack([lstm_predictions[layers][0] for layers in [2, 3, 5]])
X_val_xgb = np.column_stack([lstm_predictions[layers][1] for layers in [2, 3, 5]])
X_test_xgb = np.column_stack([lstm_predictions[layers][2] for layers in [2, 3, 5]])

# Train XGBoost model
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.05, max_depth=3, objective='reg:squarederror', tree_method='gpu_hist')
start_train_xgb = time.time()
xgb_model.fit(X_train_xgb, Y_train, eval_set=[(X_val_xgb, Y_val)], verbose=False)
train_time_total += time.time() - start_train_xgb

# Predictions from XGBoost
start_validate_time = time.time()
Y_train_pred_xgb = xgb_model.predict(X_train_xgb)
Y_val_pred_xgb = xgb_model.predict(X_val_xgb)
validate_time_total = time.time() - start_validate_time

start_test_time = time.time()
Y_test_pred_xgb = xgb_model.predict(X_test_xgb)
test_time_total = time.time() - start_test_time

# Function to calculate metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return mae, mse, rmse, r2, mape

# Compute and print metrics
metrics_train = compute_metrics(Y_train, Y_train_pred_xgb)
metrics_val = compute_metrics(Y_val, Y_val_pred_xgb)
metrics_test = compute_metrics(Y_test, Y_test_pred_xgb)

print(f"Train Metrics: MAE={metrics_train[0]:.4f}, MSE={metrics_train[1]:.4f}, RMSE={metrics_train[2]:.4f}, R²={metrics_train[3]:.4f}, MAPE={metrics_train[4]:.2f}%")
print(f"Validation Metrics: MAE={metrics_val[0]:.4f}, MSE={metrics_val[1]:.4f}, RMSE={metrics_val[2]:.4f}, R²={metrics_val[3]:.4f}, MAPE={metrics_val[4]:.2f}%")
print(f"Test Metrics: MAE={metrics_test[0]:.4f}, MSE={metrics_test[1]:.4f}, RMSE={metrics_test[2]:.4f}, R²={metrics_test[3]:.4f}, MAPE={metrics_test[4]:.2f}%")

# Print overall training, validation, and test times
print("Execution Times:")
print(f"Total Training Time: {train_time_total:.2f} seconds")
print(f"Total Validation Time: {validate_time_total:.2f} seconds")
print(f"Total Testing Time: {test_time_total:.2f} seconds")

GPU activated for TensorFlow!
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Train Metrics: MAE=0.0046, MSE=0.0000, RMSE=0.0065, R²=0.9998, MAPE=1.60%
Validation Metrics: MAE=0.1719, MSE=0.0354, RMSE=0.1882, R²=-5.0249, MAPE=9.67%
Test Metrics: MAE=0.4409, MSE=0.2005, RMSE=0.4477, R²=-32.2178, MAPE=21.76%
Execution Times:
Total Training Time: 296.99 seconds
Total Validation Time: 0

###Optuna

In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.1-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.8/231.8 kB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.9-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [None]:
import time
import numpy as np
import optuna
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Enable GPU for TensorFlow
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("GPU activated for TensorFlow!")
    except RuntimeError as e:
        print(e)

# Function to define and train an LSTM model on GPU
def train_lstm(X_train, Y_train, X_val, Y_val, units, layers, batch_size):
    with tf.device('/GPU:0'):
        model = Sequential()
        model.add(LSTM(units, return_sequences=(layers > 1), input_shape=(X_train.shape[1], 1)))
        for _ in range(layers - 1):
            model.add(LSTM(units, return_sequences=(_ < layers - 2)))
        model.add(Dense(1))

        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=20, batch_size=batch_size, verbose=0)
        return model

# Function to compute metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return mae, mse, rmse, r2, mape

# Start tracking total time
total_start_time = time.time()

# Optuna objective function
def objective(trial):
    # LSTM hyperparameters
    units = trial.suggest_int("units", 32, 128, step=16)
    layers = trial.suggest_int("layers", 2, 5)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])

    # XGBoost hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.1, log=True)
    max_depth = trial.suggest_int("max_depth", 2, 6)
    n_estimators = trial.suggest_int("n_estimators", 50, 200, step=50)

    # Reshaping input for LSTM
    X_train_r = np.expand_dims(X_train, axis=-1)
    X_val_r = np.expand_dims(X_val, axis=-1)
    X_test_r = np.expand_dims(X_test, axis=-1)

    # Start training timer
    train_start_time = time.time()

    # Train LSTM models
    lstm_models = {}
    lstm_predictions = {}
    for layer in [2, 3, 5]:
        lstm_model = train_lstm(X_train_r, Y_train, X_val_r, Y_val, units, layer, batch_size)
        Y_train_pred = lstm_model.predict(X_train_r)
        Y_val_pred = lstm_model.predict(X_val_r)
        Y_test_pred = lstm_model.predict(X_test_r)

        lstm_models[layer] = lstm_model
        lstm_predictions[layer] = (Y_train_pred, Y_val_pred, Y_test_pred)

    # Prepare input for XGBoost
    X_train_xgb = np.column_stack([lstm_predictions[layers][0] for layers in [2, 3, 5]])
    X_val_xgb = np.column_stack([lstm_predictions[layers][1] for layers in [2, 3, 5]])

    # Train XGBoost model
    xgb_model = XGBRegressor(
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        max_depth=max_depth,
        objective='reg:squarederror',
        tree_method='gpu_hist'
    )
    xgb_model.fit(X_train_xgb, Y_train, eval_set=[(X_val_xgb, Y_val)], verbose=False)

    train_end_time = time.time()
    train_time = train_end_time - train_start_time

    # Get validation score (optimize for RMSE)
    Y_val_pred_xgb = xgb_model.predict(X_val_xgb)
    _, _, rmse, _, _ = compute_metrics(Y_val, Y_val_pred_xgb)

    return rmse

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)

# Best hyperparameters
best_params = study.best_params
print("Best Parameters:", best_params)

# Start training timer
train_start_time = time.time()

# Train final models with best parameters
final_lstm_models = {}
final_lstm_predictions = {}
for layer in [2, 3, 5]:
    model = train_lstm(X_train_r, Y_train, X_val_r, Y_val, best_params['units'], layer, best_params['batch_size'])
    final_lstm_models[layer] = model
    final_lstm_predictions[layer] = (
        model.predict(X_train_r),
        model.predict(X_val_r),
        model.predict(X_test_r)
    )

# Prepare final inputs for XGBoost
X_train_xgb = np.column_stack([final_lstm_predictions[layers][0] for layers in [2, 3, 5]])
X_val_xgb = np.column_stack([final_lstm_predictions[layers][1] for layers in [2, 3, 5]])
X_test_xgb = np.column_stack([final_lstm_predictions[layers][2] for layers in [2, 3, 5]])

# Train final XGBoost model
final_xgb_model = XGBRegressor(
    n_estimators=best_params['n_estimators'],
    learning_rate=best_params['learning_rate'],
    max_depth=best_params['max_depth'],
    objective='reg:squarederror',
    tree_method='gpu_hist'
)
final_xgb_model.fit(X_train_xgb, Y_train, eval_set=[(X_val_xgb, Y_val)], verbose=False)

train_end_time = time.time()
train_time = train_end_time - train_start_time

# Start validation timer
val_start_time = time.time()

# Validation predictions
Y_val_pred_xgb = final_xgb_model.predict(X_val_xgb)

val_end_time = time.time()
val_time = val_end_time - val_start_time

# Start test timer
test_start_time = time.time()

# Test predictions
Y_test_pred_xgb = final_xgb_model.predict(X_test_xgb)

test_end_time = time.time()
test_time = test_end_time - test_start_time

# Compute final metrics
metrics_train = compute_metrics(Y_train, Y_train_pred_xgb)
metrics_val = compute_metrics(Y_val, Y_val_pred_xgb)
metrics_test = compute_metrics(Y_test, Y_test_pred_xgb)

# Total execution time
total_end_time = time.time()
total_time = total_end_time - total_start_time

print(f"Train Metrics: MAE={metrics_train[0]:.4f}, MSE={metrics_train[1]:.4f}, RMSE={metrics_train[2]:.4f}, R²={metrics_train[3]:.4f}, MAPE={metrics_train[4]:.2f}%")
print(f"Validation Metrics: MAE={metrics_val[0]:.4f}, MSE={metrics_val[1]:.4f}, RMSE={metrics_val[2]:.4f}, R²={metrics_val[3]:.4f}, MAPE={metrics_val[4]:.2f}%")
print(f"Test Metrics: MAE={metrics_test[0]:.4f}, MSE={metrics_test[1]:.4f}, RMSE={metrics_test[2]:.4f}, R²={metrics_test[3]:.4f}, MAPE={metrics_test[4]:.2f}%")

print(f"\nOverall Timing:")
print(f"Total Training Time: {train_time:.2f} seconds")
print(f"Total Validation Time: {val_time:.2f} seconds")
print(f"Total Testing Time: {test_time:.2f} seconds")
print(f"Total Execution Time: {total_time:.2f} seconds")


[I 2025-03-12 06:48:12,725] A new study created in memory with name: no-name-637810bd-b5a3-4e6a-9762-c349365912ac


GPU activated for TensorFlow!
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 06:49:39,043] Trial 0 finished with value: 0.17889190653762826 and parameters: {'units': 112, 'layers': 4, 'batch_size': 64, 'learning_rate': 0.08800885048692345, 'max_depth': 2, 'n_estimators': 100}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 06:52:17,620] Trial 1 finished with value: 0.2679860093297166 and parameters: {'units': 80, 'layers': 3, 'batch_size': 32, 'learning_rate': 0.016375032223498908, 'max_depth': 2, 'n_estimators': 200}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 06:54:42,336] Trial 2 finished with value: 0.18689273594910502 and parameters: {'units': 64, 'layers': 4, 'batch_size': 32, 'learning_rate': 0.04734702011305001, 'max_depth': 6, 'n_estimators': 100}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 06:56:09,030] Trial 3 finished with value: 0.38869341426471016 and parameters: {'units': 32, 'layers': 2, 'batch_size': 64, 'learning_rate': 0.01397592045049318, 'max_depth': 2, 'n_estimators': 150}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 06:58:50,086] Trial 4 finished with value: 0.5731680800823078 and parameters: {'units': 48, 'layers': 3, 'batch_size': 32, 'learning_rate': 0.011166791985854307, 'max_depth': 3, 'n_estimators': 100}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:00:16,103] Trial 5 finished with value: 0.22903690842543675 and parameters: {'units': 48, 'layers': 5, 'batch_size': 64, 'learning_rate': 0.017161567578010725, 'max_depth': 3, 'n_estimators': 200}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:04:43,094] Trial 6 finished with value: 0.302287823926586 and parameters: {'units': 48, 'layers': 3, 'batch_size': 16, 'learning_rate': 0.015329713639797087, 'max_depth': 4, 'n_estimators': 150}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:06:10,493] Trial 7 finished with value: 0.24241766156808883 and parameters: {'units': 64, 'layers': 5, 'batch_size': 64, 'learning_rate': 0.014803123407768213, 'max_depth': 4, 'n_estimators': 200}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:08:43,673] Trial 8 finished with value: 0.20531508264431245 and parameters: {'units': 112, 'layers': 3, 'batch_size': 32, 'learning_rate': 0.025224119479849504, 'max_depth': 6, 'n_estimators': 150}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


[I 2025-03-12 07:10:07,430] Trial 9 finished with value: 0.25470951421475685 and parameters: {'units': 128, 'layers': 3, 'batch_size': 64, 'learning_rate': 0.023391710267954512, 'max_depth': 2, 'n_estimators': 150}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-03-12 07:15:12,074] Trial 10 finished with value: 0.18493218780467005 and parameters: {'units': 112, 'layers': 4, 'batch_size': 16, 'learning_rate': 0.09589856375170491, 'max_depth': 5, 'n_estimators': 50}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:20:01,537] Trial 11 finished with value: 0.18549611299151095 and parameters: {'units': 112, 'layers': 4, 'batch_size': 16, 'learning_rate': 0.0947531298879789, 'max_depth': 5, 'n_estimators': 50}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:24:36,642] Trial 12 finished with value: 0.19236567080267497 and parameters: {'units': 96, 'layers': 4, 'batch_size': 16, 'learning_rate': 0.08509604134464822, 'max_depth': 5, 'n_estimators': 50}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:29:06,072] Trial 13 finished with value: 0.23776413432132124 and parameters: {'units': 128, 'layers': 5, 'batch_size': 16, 'learning_rate': 0.059593976128942315, 'max_depth': 5, 'n_estimators': 50}. Best is trial 0 with value: 0.17889190653762826.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-03-12 07:30:33,087] Trial 14 finished with value: 0.17887844425376298 and parameters: {'units': 96, 'layers': 4, 'batch_size': 64, 'learning_rate': 0.059512746340798485, 'max_depth': 4, 'n_estimators': 100}. Best is trial 14 with value: 0.17887844425376298.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-03-12 07:32:00,301] Trial 15 finished with value: 0.18976948278072467 and parameters: {'units': 96, 'layers': 2, 'batch_size': 64, 'learning_rate': 0.04863503271024656, 'max_depth': 3, 'n_estimators': 100}. Best is trial 14 with value: 0.17887844425376298.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:33:25,827] Trial 16 finished with value: 0.17840428707682254 and parameters: {'units': 96, 'layers': 4, 'batch_size': 64, 'learning_rate': 0.06559588131957612, 'max_depth': 3, 'n_estimators': 100}. Best is trial 16 with value: 0.17840428707682254.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:34:51,307] Trial 17 finished with value: 0.2037167715822176 and parameters: {'units': 80, 'layers': 5, 'batch_size': 64, 'learning_rate': 0.03813229068689874, 'max_depth': 4, 'n_estimators': 100}. Best is trial 16 with value: 0.17840428707682254.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:36:21,518] Trial 18 finished with value: 0.17974361476365114 and parameters: {'units': 96, 'layers': 4, 'batch_size': 64, 'learning_rate': 0.06122669416415635, 'max_depth': 3, 'n_estimators': 100}. Best is trial 16 with value: 0.17840428707682254.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


[I 2025-03-12 07:37:42,852] Trial 19 finished with value: 0.17577605821255066 and parameters: {'units': 96, 'layers': 5, 'batch_size': 64, 'learning_rate': 0.06936160364486853, 'max_depth': 4, 'n_estimators': 150}. Best is trial 19 with value: 0.17577605821255066.


Best Parameters: {'units': 96, 'layers': 5, 'batch_size': 64, 'learning_rate': 0.06936160364486853, 'max_depth': 4, 'n_estimators': 150}
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Train Metrics: MAE=0.0046, MSE=0.0000, RMSE=0.0065, R²=0.9998, MAPE=1.60%
Validation Metrics: MAE=0.1582, MSE=0.0309, RMSE=0.1758, R²=-4.2540, MAPE=8.88%
Test Metrics: MAE=0.4272, MSE=0.1885, RMSE=0.4

### bohb

In [None]:
!pip install hpbandster configspace



In [None]:
import numpy as np
import xgboost as xgb
import torch
import torch.nn as nn
import torch.optim as optim
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
import hpbandster.core.nameserver as hpns
from hpbandster.optimizers import BOHB
from hpbandster.core.worker import Worker
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
import time

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    return mae, mse, rmse, r2, mape

# Convert datasets to PyTorch tensors
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1)

X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1)

# LSTM Configurations
lstm_layers = [2, 3, 5]
hidden_dim = 64
output_dim = 1
input_dim = X_train.shape[1]

# Dictionary to store LSTM feature representations
lstm_features = []

for num_layers in lstm_layers:
    print(f"Training LSTM with {num_layers} layers...")

    lstm_model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)
    num_epochs = 100

    start_time = time.time()
    for epoch in range(num_epochs):
        lstm_model.train()
        optimizer.zero_grad()
        outputs = lstm_model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()
    train_time = time.time() - start_time

    # Extract Feature Representations
    lstm_model.eval()
    with torch.no_grad():
        val_start = time.time()
        train_features = lstm_model(X_train_torch).numpy()
        val_features = lstm_model(X_val_torch).numpy()
        val_time = time.time() - val_start

        test_start = time.time()
        test_features = lstm_model(X_test_torch).numpy()
        test_time = time.time() - test_start

    lstm_features.append((train_features, val_features, test_features, train_time, val_time, test_time))

# Concatenate Features from All Layers
final_train_features = np.hstack([feat[0] for feat in lstm_features])
final_val_features = np.hstack([feat[1] for feat in lstm_features])
final_test_features = np.hstack([feat[2] for feat in lstm_features])

# Record Time for Each Stage
total_train_time = sum([feat[3] for feat in lstm_features])
total_val_time = sum([feat[4] for feat in lstm_features])
total_test_time = sum([feat[5] for feat in lstm_features])

# Define ConfigSpace for BOHB
def get_config_space():
    cs = CS.ConfigurationSpace()
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("n_estimators", 50, 500, default_value=100))
    cs.add_hyperparameter(CSH.UniformFloatHyperparameter("learning_rate", 0.01, 0.3, default_value=0.1))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("max_depth", 3, 10, default_value=6))
    cs.add_hyperparameter(CSH.UniformFloatHyperparameter("subsample", 0.5, 1.0, default_value=0.8))
    cs.add_hyperparameter(CSH.UniformFloatHyperparameter("colsample_bytree", 0.5, 1.0, default_value=0.8))
    return cs

# BOHB Worker for XGBoost
class XGBoostWorker(Worker):
    def compute(self, config, budget, **kwargs):
        model = xgb.XGBRegressor(
            n_estimators=config["n_estimators"],
            learning_rate=config["learning_rate"],
            max_depth=config["max_depth"],
            subsample=config["subsample"],
            colsample_bytree=config["colsample_bytree"],
            random_state=42
        )
        model.fit(final_train_features, Y_train)
        Y_val_pred = model.predict(final_val_features)
        mae = mean_absolute_error(Y_val, Y_val_pred)
        return {"loss": mae, "info": config}

# Run BOHB
NS = hpns.NameServer(run_id="lstm_xgb_bohb", host="127.0.0.2", port=None)
NS.start()

worker = XGBoostWorker(nameserver="127.0.0.2", run_id="lstm_xgb_bohb")
worker.run(background=True)

bohb = BOHB(configspace=get_config_space(), run_id="lstm_xgb_bohb", nameserver="127.0.0.2", min_budget=1, max_budget=3)
res = bohb.run(n_iterations=50)
bohb.shutdown()
NS.shutdown()

# Train Best XGBoost Model
best_config = res.get_incumbent_id()
best_params = res.get_id2config_mapping()[best_config]["config"]

best_xgb_model = xgb.XGBRegressor(
    n_estimators=best_params["n_estimators"],
    learning_rate=best_params["learning_rate"],
    max_depth=best_params["max_depth"],
    subsample=best_params["subsample"],
    colsample_bytree=best_params["colsample_bytree"],
    random_state=42
)

best_xgb_model.fit(final_train_features, Y_train)

# Predictions
Y_train_pred = best_xgb_model.predict(final_train_features)
Y_val_pred = best_xgb_model.predict(final_val_features)
Y_test_pred = best_xgb_model.predict(final_test_features)

# Calculate Metrics
train_metrics = calculate_metrics(Y_train, Y_train_pred)
val_metrics = calculate_metrics(Y_val, Y_val_pred)
test_metrics = calculate_metrics(Y_test, Y_test_pred)

# Print Results
print("Train Metrics:", train_metrics, "Time:", total_train_time)
print("Validation Metrics:", val_metrics, "Time:", total_val_time)
print("Test Metrics:", test_metrics, "Time:", total_test_time)


Training LSTM with 2 layers...
Training LSTM with 3 layers...
Training LSTM with 5 layers...
Train Metrics: (0.003584944664315948, 3.0063306480354804e-05, 0.005483001594049996, 0.9998282302858037, 1.0322452722211097) Time: 54.99114274978638
Validation Metrics: (0.15581174276444293, 0.030132125789281858, 0.17358607602363116, -4.123167395238743, 8.743781299923103) Time: 0.16620230674743652
Test Metrics: (0.4247630431221444, 0.18645871157432706, 0.4318086515741979, -29.89587188155887, 20.953608295965513) Time: 0.016576290130615234


# Catboost

## Initial

In [None]:
!pip install catboost

In [None]:
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from catboost import CatBoostRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Ensure TensorFlow runs on CPU
tf.config.set_visible_devices([], 'GPU')
print("Running on CPU")

# Function to define and train an LSTM model on CPU
def train_lstm(X_train, Y_train, X_val, Y_val, layers):
    model = Sequential()
    model.add(LSTM(64, return_sequences=(layers > 1), input_shape=(X_train.shape[1], 1)))
    for _ in range(layers - 1):
        model.add(LSTM(64, return_sequences=(_ < layers - 2)))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mse')
    start_time = time.time()
    model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=20, batch_size=16, verbose=0)
    train_time = time.time() - start_time
    return model, train_time

# Reshaping input for LSTM
X_train_r = np.expand_dims(X_train, axis=-1)
X_val_r = np.expand_dims(X_val, axis=-1)
X_test_r = np.expand_dims(X_test, axis=-1)

# Train 2, 3, and 5-layer LSTM models
lstm_models = {}
lstm_predictions = {}
times = {}

for layers in [2, 3, 5]:
    model, train_time = train_lstm(X_train_r, Y_train, X_val_r, Y_val, layers)
    Y_train_pred = model.predict(X_train_r)
    Y_val_pred = model.predict(X_val_r)
    Y_test_pred = model.predict(X_test_r)

    lstm_models[layers] = model
    lstm_predictions[layers] = (Y_train_pred, Y_val_pred, Y_test_pred)
    times[f'LSTM-{layers}'] = train_time

# Prepare input for CatBoost
X_train_cat = np.column_stack([lstm_predictions[layers][0] for layers in [2, 3, 5]])
X_val_cat = np.column_stack([lstm_predictions[layers][1] for layers in [2, 3, 5]])
X_test_cat = np.column_stack([lstm_predictions[layers][2] for layers in [2, 3, 5]])

# Train CatBoost model
cat_model = CatBoostRegressor(iterations=100, learning_rate=0.05, depth=3, loss_function='RMSE', task_type='CPU', verbose=0)

start_time = time.time()
cat_model.fit(X_train_cat, Y_train, eval_set=(X_val_cat, Y_val), verbose=0)
times['CatBoost'] = time.time() - start_time

# Predictions from CatBoost
start_time = time.time()
Y_train_pred_cat = cat_model.predict(X_train_cat)
times['CatBoost Train'] = time.time() - start_time

start_time = time.time()
Y_val_pred_cat = cat_model.predict(X_val_cat)
times['CatBoost Validate'] = time.time() - start_time

start_time = time.time()
Y_test_pred_cat = cat_model.predict(X_test_cat)
times['CatBoost Test'] = time.time() - start_time

# Function to calculate metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return mae, mse, rmse, r2, mape

# Compute and print metrics
metrics_train = compute_metrics(Y_train, Y_train_pred_cat)
metrics_val = compute_metrics(Y_val, Y_val_pred_cat)
metrics_test = compute_metrics(Y_test, Y_test_pred_cat)

print(f"Train Metrics: MAE={metrics_train[0]:.4f}, MSE={metrics_train[1]:.4f}, RMSE={metrics_train[2]:.4f}, R²={metrics_train[3]:.4f}, MAPE={metrics_train[4]:.2f}%")
print(f"Validation Metrics: MAE={metrics_val[0]:.4f}, MSE={metrics_val[1]:.4f}, RMSE={metrics_val[2]:.4f}, R²={metrics_val[3]:.4f}, MAPE={metrics_val[4]:.2f}%")
print(f"Test Metrics: MAE={metrics_test[0]:.4f}, MSE={metrics_test[1]:.4f}, RMSE={metrics_test[2]:.4f}, R²={metrics_test[3]:.4f}, MAPE={metrics_test[4]:.2f}%")

# Print training times
print("Training Times:")
for model, t in times.items():
    print(f"{model}: {t:.2f} seconds")


Running on CPU
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Train Metrics: MAE=0.0079, MSE=0.0001, RMSE=0.0112, R²=0.9993, MAPE=2.94%
Validation Metrics: MAE=0.1994, MSE=0.0456, RMSE=0.2136, R²=-6.7588, MAPE=11.24%
Test Metrics: MAE=0.4684, MSE=0.2254, RMSE=0.4748, R²=-36.3564, MAPE=23.12%
Training Times:
LSTM-2: 107.05 seconds
LSTM-3: 127.05 seconds
LSTM-5: 184.52 seconds
CatBo

## Optuna

In [None]:
!pip install optuna



In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import LSTM, Dense
import catboost as cb
import optuna
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import time
from tensorflow.keras.callbacks import EarlyStopping

# Reshape input for LSTM
X_train_r = np.expand_dims(X_train.values, axis=-1)
X_val_r = np.expand_dims(X_val.values, axis=-1)
X_test_r = np.expand_dims(X_test.values, axis=-1)

def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return mae, mse, rmse, r2, mape

training_times = []  # Store training times per trial

def objective(trial):
    # LSTM hyperparameters
    lstm_units = trial.suggest_int("lstm_units", 32, 128, step=16)
    lstm_learning_rate = trial.suggest_loguniform("lstm_learning_rate", 1e-4, 1e-2)
    lstm_batch_size = trial.suggest_categorical("lstm_batch_size", [16, 32, 64])
    lstm_epochs = trial.suggest_int("lstm_epochs", 10, 50, step=10)

    lstm_predictions = {}
    start_train_time = time.time()

    for layers in [2, 3, 5]:  # Train LSTM models with 2, 3, and 5 layers
        model = keras.Sequential()
        model.add(LSTM(lstm_units, return_sequences=(layers > 1), input_shape=(X_train_r.shape[1], 1)))

        for _ in range(layers - 1):
            model.add(LSTM(lstm_units, return_sequences=(_ < layers - 2)))

        model.add(Dense(1))  # Output layer

        model.compile(optimizer=keras.optimizers.Adam(learning_rate=lstm_learning_rate), loss="mse")

        early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

        model.fit(X_train_r, Y_train, validation_data=(X_val_r, Y_val), epochs=lstm_epochs, batch_size=lstm_batch_size, verbose=0, callbacks=[early_stopping])

        lstm_predictions[layers] = {
            "train": model.predict(X_train_r).flatten(),
            "val": model.predict(X_val_r).flatten(),
            "test": model.predict(X_test_r).flatten()
        }

    # Stack LSTM outputs as features for CatBoost
    X_train_cat = np.column_stack([lstm_predictions[2]["train"], lstm_predictions[3]["train"], lstm_predictions[5]["train"]])
    X_val_cat = np.column_stack([lstm_predictions[2]["val"], lstm_predictions[3]["val"], lstm_predictions[5]["val"]])

    # CatBoost hyperparameters
    cat_params = {
        "depth": trial.suggest_int("cat_depth", 3, 10),
        "learning_rate": trial.suggest_loguniform("cat_learning_rate", 0.01, 0.3),
        "iterations": trial.suggest_int("cat_iterations", 50, 200, step=50),
        "loss_function": "RMSE",
        "verbose": 0
    }

    cat_model = cb.CatBoostRegressor(**cat_params)
    cat_model.fit(X_train_cat, Y_train, eval_set=(X_val_cat, Y_val), verbose=0)

    training_times.append(time.time() - start_train_time)

    # Compute RMSE without 'squared' argument
    rmse = np.sqrt(mean_squared_error(Y_val, cat_model.predict(X_val_cat)))
    return rmse  # Return RMSE for validation set

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

# Extract best parameters separately
best_lstm_params = {key: study.best_params[key] for key in ["lstm_units", "lstm_learning_rate", "lstm_batch_size", "lstm_epochs"]}
best_cat_params = {key: study.best_params[key] for key in ["cat_depth", "cat_learning_rate", "cat_iterations"]}

final_lstm_predictions = {}

start_total_training_time = time.time()
for layers in [2, 3, 5]:
    model = keras.Sequential()
    model.add(LSTM(best_lstm_params["lstm_units"], return_sequences=(layers > 1), input_shape=(X_train_r.shape[1], 1)))

    for _ in range(layers - 1):
        model.add(LSTM(best_lstm_params["lstm_units"], return_sequences=(_ < layers - 2)))

    model.add(Dense(1))
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=best_lstm_params["lstm_learning_rate"]), loss="mse")

    early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
    model.fit(X_train_r, Y_train, validation_data=(X_val_r, Y_val), epochs=best_lstm_params["lstm_epochs"], batch_size=best_lstm_params["lstm_batch_size"], verbose=0, callbacks=[early_stopping])

    final_lstm_predictions[layers] = {
        "train": model.predict(X_train_r).flatten(),
        "val": model.predict(X_val_r).flatten(),
        "test": model.predict(X_test_r).flatten()
    }

# Stack LSTM outputs as features for CatBoost
X_train_cat = np.column_stack([final_lstm_predictions[2]["train"], final_lstm_predictions[3]["train"], final_lstm_predictions[5]["train"]])
X_val_cat = np.column_stack([final_lstm_predictions[2]["val"], final_lstm_predictions[3]["val"], final_lstm_predictions[5]["val"]])
X_test_cat = np.column_stack([final_lstm_predictions[2]["test"], final_lstm_predictions[3]["test"], final_lstm_predictions[5]["test"]])

# Train final CatBoost model with best parameters
final_cat_model = cb.CatBoostRegressor(
    depth=best_cat_params["cat_depth"],
    learning_rate=best_cat_params["cat_learning_rate"],
    iterations=best_cat_params["cat_iterations"],
    loss_function="RMSE",
    verbose=0
)
final_cat_model.fit(X_train_cat, Y_train, eval_set=(X_val_cat, Y_val), verbose=0)

total_training_time = time.time() - start_total_training_time

# Compute final metrics
metrics_train = compute_metrics(Y_train, final_cat_model.predict(X_train_cat))
metrics_val = compute_metrics(Y_val, final_cat_model.predict(X_val_cat))
metrics_test = compute_metrics(Y_test, final_cat_model.predict(X_test_cat))

print(f"Total Training Time: {total_training_time:.2f} seconds")
print(f"Train Metrics: {metrics_train}")
print(f"Validation Metrics: {metrics_val}")
print(f"Test Metrics: {metrics_test}")
print("Best hyperparameters:", study.best_params)


[I 2025-03-12 17:26:33,782] A new study created in memory with name: no-name-ebcda9ec-032e-43f1-8725-3498eb66c6fa


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-03-12 17:28:31,258] Trial 0 finished with value: 0.25816994917365316 and parameters: {'lstm_units': 64, 'lstm_learning_rate': 0.0001705971958100671, 'lstm_batch_size': 64, 'lstm_epochs': 10, 'cat_depth': 7, 'cat_learning_rate': 0.020639603404140214, 'cat_iterations': 150}. Best is trial 0 with value: 0.25816994917365316.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-03-12 17:31:27,419] Trial 1 finished with value: 0.1962773193804212 and parameters: {'lstm_units': 80, 'lstm_learning_rate': 0.0008710210231706901, 'lstm_batch_size': 32, 'lstm_epochs': 40, 'cat_depth': 3, 'cat_learning_rate': 0.030622147259098682, 'cat_iterations': 200}. Best is trial 1 with value: 0.1962773193804212.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step


[I 2025-03-12 17:34:33,719] Trial 2 finished with value: 0.26267095914943134 and parameters: {'lstm_units': 128, 'lstm_learning_rate': 0.00027669303492747937, 'lstm_batch_size': 64, 'lstm_epochs': 10, 'cat_depth': 4, 'cat_learning_rate': 0.06333834021957514, 'cat_iterations': 50}. Best is trial 1 with value: 0.1962773193804212.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-03-12 17:38:13,343] Trial 3 finished with value: 0.6450866298658534 and parameters: {'lstm_units': 64, 'lstm_learning_rate': 0.004158754741398677, 'lstm_batch_size': 16, 'lstm_epochs': 40, 'cat_depth': 6, 'cat_learning_rate': 0.010104134101946076, 'cat_iterations': 100}. Best is trial 1 with value: 0.1962773193804212.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step


[I 2025-03-12 17:46:22,223] Trial 4 finished with value: 0.28885128765335555 and parameters: {'lstm_units': 128, 'lstm_learning_rate': 0.00031989849938777223, 'lstm_batch_size': 16, 'lstm_epochs': 50, 'cat_depth': 9, 'cat_learning_rate': 0.013190442535242788, 'cat_iterations': 200}. Best is trial 1 with value: 0.1962773193804212.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-12 17:53:16,629] Trial 5 finished with value: 0.20152323943935466 and parameters: {'lstm_units': 112, 'lstm_learning_rate': 0.00020441323526829903, 'lstm_batch_size': 32, 'lstm_epochs': 30, 'cat_depth': 9, 'cat_learning_rate': 0.030867809759269654, 'cat_iterations': 150}. Best is trial 1 with value: 0.1962773193804212.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step


[I 2025-03-12 17:55:58,179] Trial 6 finished with value: 0.3183646992828983 and parameters: {'lstm_units': 128, 'lstm_learning_rate': 0.00605108383557619, 'lstm_batch_size': 64, 'lstm_epochs': 20, 'cat_depth': 9, 'cat_learning_rate': 0.015650285383408798, 'cat_iterations': 150}. Best is trial 1 with value: 0.1962773193804212.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step


[I 2025-03-12 18:03:01,220] Trial 7 finished with value: 0.17747179784786724 and parameters: {'lstm_units': 128, 'lstm_learning_rate': 0.00027484521818618376, 'lstm_batch_size': 16, 'lstm_epochs': 40, 'cat_depth': 3, 'cat_learning_rate': 0.1756009026226736, 'cat_iterations': 150}. Best is trial 7 with value: 0.17747179784786724.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step


[I 2025-03-12 18:06:17,214] Trial 8 finished with value: 0.18194091770413157 and parameters: {'lstm_units': 48, 'lstm_learning_rate': 0.005846682996782721, 'lstm_batch_size': 16, 'lstm_epochs': 20, 'cat_depth': 9, 'cat_learning_rate': 0.044980192488315356, 'cat_iterations': 150}. Best is trial 7 with value: 0.17747179784786724.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-03-12 18:08:15,488] Trial 9 finished with value: 0.17552266867295574 and parameters: {'lstm_units': 64, 'lstm_learning_rate': 0.008530989433351942, 'lstm_batch_size': 64, 'lstm_epochs': 50, 'cat_depth': 5, 'cat_learning_rate': 0.2182292510555109, 'cat_iterations': 150}. Best is trial 9 with value: 0.17552266867295574.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Total Training Time: 121.51 seconds
Train Metrics: (0.004257372915513549, 3.947103273340496e-05, 0.006282597610336425, 0.9997744783122446, 1.295182957032049)
Validation Metrics: (0.15793739230056988, 0.030807504712369788, 0.17552066747927375, -4.237996633008031, 8.86556731118651)
Test Metrics: (0.426916664431017, 0.18829290875990368, 0

## BOHB

In [None]:
!pip install ConfigSpace

Collecting ConfigSpace
  Downloading configspace-1.2.1.tar.gz (130 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/131.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.0/131.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: ConfigSpace
  Building wheel for ConfigSpace (pyproject.toml) ... [?25l[?25hdone
  Created wheel for ConfigSpace: filename=configspace-1.2.1-py3-none-any.whl size=115950 sha256=dbd60cdc5f756f07232079b0b07b7296f4f26bd79cce4fefc6be782088d0f828
  Stored in directory: /root/.cache/pip/wheels/11/0f/36/d5027c3eeb038827889830f7efbe6a1bad8956b3eb44ab2f44
Successfully built ConfigSpace
Installing collected packages: ConfigSpace
Successfully installed ConfigSpace-1.2.1


In [None]:
!pip install hpbandster

Collecting hpbandster
  Downloading hpbandster-0.7.4.tar.gz (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.3/51.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting Pyro4 (from hpbandster)
  Downloading Pyro4-4.82-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting serpent (from hpbandster)
  Downloading serpent-1.41-py3-none-any.whl.metadata (5.8 kB)
Collecting netifaces (from hpbandster)
  Downloading netifaces-0.11.0.tar.gz (30 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading Pyro4-4.82-py2.py3-none-any.whl (89 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading serpent-1.41-py3-none-any.whl (9.6 kB)
Building wheels for collected packages: hpbandster, netifaces
  Building whe

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
import hpbandster.core.nameserver as hpns
from hpbandster.optimizers import BOHB
from hpbandster.core.worker import Worker
from catboost import CatBoostRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
import time

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    return mae, mse, rmse, r2, mape

# Convert datasets to PyTorch tensors
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1)

X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1)

# LSTM Configurations
lstm_layers = [2, 3, 5]
hidden_dim = 64
output_dim = 1
input_dim = X_train.shape[1]

lstm_features = []

for num_layers in lstm_layers:
    print(f"Training LSTM with {num_layers} layers...")

    lstm_model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)
    num_epochs = 100

    start_time = time.time()
    for epoch in range(num_epochs):
        lstm_model.train()
        optimizer.zero_grad()
        outputs = lstm_model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()
    train_time = time.time() - start_time

    lstm_model.eval()
    with torch.no_grad():
        val_start = time.time()
        train_features = lstm_model(X_train_torch).numpy()
        val_features = lstm_model(X_val_torch).numpy()
        val_time = time.time() - val_start

        test_start = time.time()
        test_features = lstm_model(X_test_torch).numpy()
        test_time = time.time() - test_start

    lstm_features.append((train_features, val_features, test_features, train_time, val_time, test_time))

# Concatenate Features
final_train_features = np.hstack([feat[0] for feat in lstm_features])
final_val_features = np.hstack([feat[1] for feat in lstm_features])
final_test_features = np.hstack([feat[2] for feat in lstm_features])

total_train_time = sum([feat[3] for feat in lstm_features])
total_val_time = sum([feat[4] for feat in lstm_features])
total_test_time = sum([feat[5] for feat in lstm_features])

# Define ConfigSpace for BOHB
def get_config_space():
    cs = CS.ConfigurationSpace()
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("n_estimators", 50, 500, default_value=100))
    cs.add_hyperparameter(CSH.UniformFloatHyperparameter("learning_rate", 0.01, 0.3, default_value=0.1))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("depth", 3, 10, default_value=6))
    cs.add_hyperparameter(CSH.UniformFloatHyperparameter("subsample", 0.5, 1.0, default_value=0.8))
    return cs

# BOHB Worker for CatBoost
class CatBoostWorker(Worker):
    def compute(self, config, budget, **kwargs):
        model = CatBoostRegressor(
            iterations=config["n_estimators"],
            learning_rate=config["learning_rate"],
            depth=config["depth"],
            subsample=config["subsample"],
            loss_function='RMSE',
            verbose=False,
            random_seed=42
        )
        model.fit(final_train_features, Y_train)
        Y_val_pred = model.predict(final_val_features)
        mae = mean_absolute_error(Y_val, Y_val_pred)
        return {"loss": mae, "info": config}

# Run BOHB
NS = hpns.NameServer(run_id="lstm_catboost_bohb", host="127.0.0.2", port=None)
NS.start()

worker = CatBoostWorker(nameserver="127.0.0.2", run_id="lstm_catboost_bohb")
worker.run(background=True)

bohb = BOHB(configspace=get_config_space(), run_id="lstm_catboost_bohb", nameserver="127.0.0.2", min_budget=1, max_budget=3)
res = bohb.run(n_iterations=50)
bohb.shutdown()
NS.shutdown()

# Train Best CatBoost Model
best_config = res.get_incumbent_id()
best_params = res.get_id2config_mapping()[best_config]["config"]

best_catboost_model = CatBoostRegressor(
    iterations=best_params["n_estimators"],
    learning_rate=best_params["learning_rate"],
    depth=best_params["depth"],
    subsample=best_params["subsample"],
    loss_function='RMSE',
    verbose=False,
    random_seed=42
)

best_catboost_model.fit(final_train_features, Y_train)

# Predictions
Y_train_pred = best_catboost_model.predict(final_train_features)
Y_val_pred = best_catboost_model.predict(final_val_features)
Y_test_pred = best_catboost_model.predict(final_test_features)

# Calculate Metrics
train_metrics = calculate_metrics(Y_train, Y_train_pred)
val_metrics = calculate_metrics(Y_val, Y_val_pred)
test_metrics = calculate_metrics(Y_test, Y_test_pred)

# Print Results
print("Train Metrics:", train_metrics, "Time:", total_train_time)
print("Validation Metrics:", val_metrics, "Time:", total_val_time)
print("Test Metrics:", test_metrics, "Time:", total_test_time)


Training LSTM with 2 layers...
Training LSTM with 3 layers...
Training LSTM with 5 layers...
Train Metrics: (0.0035267679216303507, 2.885922865108158e-05, 0.005372078615497132, 0.9998351099051409, 1.0097136704254701) Time: 53.528841972351074
Validation Metrics: (0.1574971157320392, 0.03066736914552056, 0.1751210128611657, -4.2141713068238875, 8.840333520845986) Time: 0.16505217552185059
Test Metrics: (0.42647220587716317, 0.1879136111576758, 0.43349003582282697, -30.136946115892822, 21.038537337252098) Time: 0.016920089721679688


#Lightboost

##Initial

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import lightgbm as lgb
import pandas as pd
import time
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        return out[:, -1, :]

# Set Parameters
input_size = 3
hidden_size = 64
num_layers_list = [2, 3, 5]
learning_rate = 0.001
num_epochs = 100

# MinMax Scaling
scaler = MinMaxScaler()
Y_train_scaled = scaler.fit_transform(Y_train.values.reshape(-1, 1))
Y_val_scaled = scaler.transform(Y_val.values.reshape(-1, 1))
Y_test_scaled = scaler.transform(Y_test.values.reshape(-1, 1))

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)

Y_train_torch = torch.tensor(Y_train_scaled, dtype=torch.float32).to(device)
Y_val_torch = torch.tensor(Y_val_scaled, dtype=torch.float32).to(device)
Y_test_torch = torch.tensor(Y_test_scaled, dtype=torch.float32).to(device)

# Store embeddings for LGBM
train_embeddings, val_embeddings, test_embeddings = [], [], []
train_time, val_time, test_time = 0, 0, 0

# Train multiple LSTM models
for num_layers in num_layers_list:
    print(f"\nTraining LSTM with {num_layers} layers...")
    model = LSTMModel(input_size, hidden_size, num_layers).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Train LSTM
    start_time = time.time()
    model.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")
    train_time += time.time() - start_time

    # Extract embeddings
    model.eval()
    with torch.no_grad():
        start_time = time.time()
        train_embed = model(X_train_torch).cpu().numpy()
        val_embed = model(X_val_torch).cpu().numpy()
        test_embed = model(X_test_torch).cpu().numpy()
        val_time += time.time() - start_time
        test_time += time.time() - start_time

    train_embeddings.append(train_embed)
    val_embeddings.append(val_embed)
    test_embeddings.append(test_embed)

# Concatenate embeddings from all LSTM models
X_train_lgb = np.concatenate(train_embeddings, axis=1)
X_val_lgb = np.concatenate(val_embeddings, axis=1)
X_test_lgb = np.concatenate(test_embeddings, axis=1)

# Ensure correct label shape
Y_train_lgb = Y_train.values.flatten()
Y_val_lgb = Y_val.values.flatten()
Y_test_lgb = Y_test.values.flatten()

# Train LightGBM on LSTM embeddings
print("\nTraining LightGBM on Combined LSTM Embeddings...")
start_time = time.time()
lgb_train = lgb.Dataset(X_train_lgb, label=Y_train_lgb)
lgb_val = lgb.Dataset(X_val_lgb, label=Y_val_lgb, reference=lgb_train)

lgb_params = {
    "objective": "regression",
    "metric": "rmse",
    "boosting_type": "gbdt",
    "learning_rate": 0.05,
    "num_leaves": 31
}

lgb_model = lgb.train(lgb_params, lgb_train, valid_sets=[lgb_train, lgb_val], num_boost_round=200, callbacks=[lgb.log_evaluation(50)])
train_time += time.time() - start_time

# Predictions
start_time = time.time()
train_pred_lgb = lgb_model.predict(X_train_lgb)
val_pred_lgb = lgb_model.predict(X_val_lgb)
test_pred_lgb = lgb_model.predict(X_test_lgb)
test_time += time.time() - start_time

# Compute evaluation metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / np.maximum(np.abs(y_true), 1e-8))) * 100  # Avoid divide-by-zero
    return mae, mse, rmse, r2, mape

# Store results
results_df = pd.DataFrame([
    ["LSTM(2,3,5) + LGBM", "Train", *compute_metrics(Y_train_lgb, train_pred_lgb), train_time],
    ["LSTM(2,3,5) + LGBM", "Validation", *compute_metrics(Y_val_lgb, val_pred_lgb), val_time],
    ["LSTM(2,3,5) + LGBM", "Test", *compute_metrics(Y_test_lgb, test_pred_lgb), test_time]
], columns=["Model", "Dataset", "MAE", "MSE", "RMSE", "R²", "MAPE", "Time (s)"])

print("\nFinal Model Performance\n")
print(results_df.to_string(index=False))


Training LSTM with 2 layers...
Epoch [10/100], Loss: 0.1365
Epoch [20/100], Loss: 0.1283
Epoch [30/100], Loss: 0.1168
Epoch [40/100], Loss: 0.1007
Epoch [50/100], Loss: 0.0797
Epoch [60/100], Loss: 0.0573
Epoch [70/100], Loss: 0.0398
Epoch [80/100], Loss: 0.0303
Epoch [90/100], Loss: 0.0251
Epoch [100/100], Loss: 0.0209

Training LSTM with 3 layers...
Epoch [10/100], Loss: 0.1326
Epoch [20/100], Loss: 0.1253
Epoch [30/100], Loss: 0.1130
Epoch [40/100], Loss: 0.0904
Epoch [50/100], Loss: 0.0599
Epoch [60/100], Loss: 0.0455
Epoch [70/100], Loss: 0.0359
Epoch [80/100], Loss: 0.0253
Epoch [90/100], Loss: 0.0173
Epoch [100/100], Loss: 0.0112

Training LSTM with 5 layers...
Epoch [10/100], Loss: 0.1347
Epoch [20/100], Loss: 0.1276
Epoch [30/100], Loss: 0.1145
Epoch [40/100], Loss: 0.0839
Epoch [50/100], Loss: 0.0619
Epoch [60/100], Loss: 0.0475
Epoch [70/100], Loss: 0.0327
Epoch [80/100], Loss: 0.0170
Epoch [90/100], Loss: 0.0082
Epoch [100/100], Loss: 0.0055

Training LightGBM on Combined 

## Optuna

In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.1-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.8/231.8 kB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.9-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [None]:
import time
import numpy as np
import optuna
import tensorflow as tf
import lightgbm as lgb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler

# Enable GPU for TensorFlow
if tf.config.list_physical_devices('GPU'):
    try:
        tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)
        print("GPU activated for TensorFlow!")
    except RuntimeError as e:
        print(e)

# Function to define and train an LSTM model on GPU
def train_lstm(X_train, Y_train, X_val, Y_val, units, layers, batch_size):
    with tf.device('/GPU:0'):
        model = Sequential()
        model.add(LSTM(units, return_sequences=(layers > 1), input_shape=(X_train.shape[1], 1)))
        for _ in range(layers - 1):
            model.add(LSTM(units, return_sequences=(_ < layers - 2)))
        model.add(Dense(1))

        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=20, batch_size=batch_size, verbose=0)
        return model

# Function to compute metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return mae, mse, rmse, r2, mape

# Optuna objective function
def objective(trial):
    # LSTM hyperparameters
    units = trial.suggest_int("units", 32, 128, step=16)
    layers = trial.suggest_int("layers", 2, 5)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])

    # LightGBM hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.1, log=True)
    num_leaves = trial.suggest_int("num_leaves", 20, 50)
    max_depth = trial.suggest_int("max_depth", 3, 10)
    min_data_in_leaf = trial.suggest_int("min_data_in_leaf", 5, 30)

    # Reshape input for LSTM
    X_train_r = np.expand_dims(X_train, axis=-1)
    X_val_r = np.expand_dims(X_val, axis=-1)
    X_test_r = np.expand_dims(X_test, axis=-1)

    train_start_time = time.time()

    # Train LSTM
    lstm_model = train_lstm(X_train_r, Y_train, X_val_r, Y_val, units, layers, batch_size)
    train_time = time.time() - train_start_time

    val_start_time = time.time()

    # Extract features from LSTM
    Y_train_pred = lstm_model.predict(X_train_r)
    Y_val_pred = lstm_model.predict(X_val_r)
    Y_test_pred = lstm_model.predict(X_test_r)

    val_time = time.time() - val_start_time

    # Scale extracted features
    scaler = MinMaxScaler()
    X_train_lgb = scaler.fit_transform(Y_train_pred)
    X_val_lgb = scaler.transform(Y_val_pred)
    X_test_lgb = scaler.transform(Y_test_pred)

    # Train LightGBM
    lgb_train = lgb.Dataset(X_train_lgb, label=Y_train)
    lgb_val = lgb.Dataset(X_val_lgb, label=Y_val, reference=lgb_train)


    model = lgb.train({
    "objective": "regression", "metric": "rmse", "boosting_type": "gbdt",
    "num_leaves": num_leaves, "learning_rate": learning_rate,
    "max_depth": max_depth, "min_data_in_leaf": min_data_in_leaf
}, lgb_train, valid_sets=[lgb_train, lgb_val], num_boost_round=200, callbacks=[lgb.log_evaluation(0)])



    Y_val_pred_lgb = model.predict(X_val_lgb)
    _, _, rmse, _, _ = compute_metrics(Y_val, Y_val_pred_lgb)

    return rmse

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=15)

# Train final models with best parameters
best_params = study.best_params

final_train_start = time.time()
final_lstm_model = train_lstm(np.expand_dims(X_train, axis=-1), Y_train, np.expand_dims(X_val, axis=-1), Y_val,
                              best_params['units'], best_params['layers'], best_params['batch_size'])
final_train_time = time.time() - final_train_start

final_val_start = time.time()
Y_train_pred = final_lstm_model.predict(np.expand_dims(X_train, axis=-1))
Y_val_pred = final_lstm_model.predict(np.expand_dims(X_val, axis=-1))
Y_test_pred = final_lstm_model.predict(np.expand_dims(X_test, axis=-1))
final_val_time = time.time() - final_val_start

scaler = MinMaxScaler()
X_train_lgb = scaler.fit_transform(Y_train_pred)
X_val_lgb = scaler.transform(Y_val_pred)
X_test_lgb = scaler.transform(Y_test_pred)

final_lgb_train = lgb.Dataset(X_train_lgb, label=Y_train)
final_lgb_val = lgb.Dataset(X_val_lgb, label=Y_val, reference=final_lgb_train)

final_lgb_model = lgb.train(best_params, final_lgb_train, valid_sets=[final_lgb_train, final_lgb_val], num_boost_round=200)

final_test_start = time.time()
Y_test_pred_lgb = final_lgb_model.predict(X_test_lgb)
final_test_time = time.time() - final_test_start

metrics_train = compute_metrics(Y_train.to_numpy().flatten(), Y_train_pred.flatten())
metrics_val = compute_metrics(Y_val.to_numpy().flatten(), Y_val_pred.flatten())
metrics_test = compute_metrics(Y_test.to_numpy().flatten(), Y_test_pred_lgb.flatten())


print(f"Train Metrics: MAE={metrics_train[0]:.4f}, MSE={metrics_train[1]:.4f}, RMSE={metrics_train[2]:.4f}, R²={metrics_train[3]:.4f}, MAPE={metrics_train[4]:.2f}%")
print(f"Validation Metrics: MAE={metrics_val[0]:.4f}, MSE={metrics_val[1]:.4f}, RMSE={metrics_val[2]:.4f}, R²={metrics_val[3]:.4f}, MAPE={metrics_val[4]:.2f}%")
print(f"Test Metrics: MAE={metrics_test[0]:.4f}, MSE={metrics_test[1]:.4f}, RMSE={metrics_test[2]:.4f}, R²={metrics_test[3]:.4f}, MAPE={metrics_test[4]:.2f}%")
print(f"Total Training Time: {final_train_time:.2f} sec | Validation Time: {final_val_time:.2f} sec | Testing Time: {final_test_time:.2f} sec")

[I 2025-03-12 10:16:20,688] A new study created in memory with name: no-name-bcf82d8d-d4a5-456d-b09c-fc8c989e4a7e


GPU activated for TensorFlow!
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000112 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:17:06,291] Trial 0 finished with value: 0.17429273242439094 and parameters: {'units': 32, 'layers': 3, 'batch_size': 32, 'learning_rate': 0.056911248396717594, 'num_leaves': 25, 'max_depth': 8, 'min_data_in_leaf': 22}. Best is trial 0 with value: 0.17429273242439094.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000104 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:18:12,846] Trial 1 finished with value: 0.1742805358248773 and parameters: {'units': 128, 'layers': 5, 'batch_size': 32, 'learning_rate': 0.0962007088198759, 'num_leaves': 29, 'max_depth': 10, 'min_data_in_leaf': 8}. Best is trial 1 with value: 0.1742805358248773.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000181 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:18:39,879] Trial 2 finished with value: 0.1742767101719378 and parameters: {'units': 112, 'layers': 2, 'batch_size': 64, 'learning_rate': 0.08608171188110691, 'num_leaves': 48, 'max_depth': 10, 'min_data_in_leaf': 16}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000160 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:20:12,045] Trial 3 finished with value: 0.20752555728636782 and parameters: {'units': 96, 'layers': 3, 'batch_size': 16, 'learning_rate': 0.017293731737602312, 'num_leaves': 48, 'max_depth': 5, 'min_data_in_leaf': 22}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000119 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:20:56,354] Trial 4 finished with value: 0.2767937465671527 and parameters: {'units': 128, 'layers': 3, 'batch_size': 32, 'learning_rate': 0.011979063004636607, 'num_leaves': 26, 'max_depth': 10, 'min_data_in_leaf': 10}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000104 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:21:31,589] Trial 5 finished with value: 0.17443020988475816 and parameters: {'units': 80, 'layers': 5, 'batch_size': 64, 'learning_rate': 0.06237673813838819, 'num_leaves': 43, 'max_depth': 4, 'min_data_in_leaf': 5}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:22:15,562] Trial 6 finished with value: 0.17437736903676795 and parameters: {'units': 32, 'layers': 2, 'batch_size': 32, 'learning_rate': 0.047131569303028725, 'num_leaves': 22, 'max_depth': 8, 'min_data_in_leaf': 7}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000130 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:23:03,289] Trial 7 finished with value: 0.17444283468950897 and parameters: {'units': 64, 'layers': 3, 'batch_size': 32, 'learning_rate': 0.07873362107737836, 'num_leaves': 41, 'max_depth': 3, 'min_data_in_leaf': 21}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000133 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:24:31,216] Trial 8 finished with value: 0.1742771439509039 and parameters: {'units': 80, 'layers': 2, 'batch_size': 16, 'learning_rate': 0.0751161070599288, 'num_leaves': 37, 'max_depth': 8, 'min_data_in_leaf': 9}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:26:05,919] Trial 9 finished with value: 0.18134520961882286 and parameters: {'units': 80, 'layers': 4, 'batch_size': 16, 'learning_rate': 0.025810517755316478, 'num_leaves': 38, 'max_depth': 4, 'min_data_in_leaf': 16}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000100 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:26:29,268] Trial 10 finished with value: 0.17484687467108034 and parameters: {'units': 112, 'layers': 2, 'batch_size': 64, 'learning_rate': 0.036949833077404945, 'num_leaves': 50, 'max_depth': 7, 'min_data_in_leaf': 28}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000102 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:27:44,137] Trial 11 finished with value: 0.17427751887206752 and parameters: {'units': 64, 'layers': 2, 'batch_size': 16, 'learning_rate': 0.09770932527223122, 'num_leaves': 33, 'max_depth': 9, 'min_data_in_leaf': 14}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000102 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:28:07,749] Trial 12 finished with value: 0.1777818744635142 and parameters: {'units': 96, 'layers': 2, 'batch_size': 64, 'learning_rate': 0.028136147300306644, 'num_leaves': 45, 'max_depth': 6, 'min_data_in_leaf': 12}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000199 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:28:38,162] Trial 13 finished with value: 0.1742791274996243 and parameters: {'units': 96, 'layers': 4, 'batch_size': 64, 'learning_rate': 0.06684431352843298, 'num_leaves': 34, 'max_depth': 9, 'min_data_in_leaf': 18}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000104 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038


[I 2025-03-12 10:29:48,603] Trial 14 finished with value: 0.17439040129346967 and parameters: {'units': 64, 'layers': 2, 'batch_size': 16, 'learning_rate': 0.04496966817326947, 'num_leaves': 38, 'max_depth': 8, 'min_data_in_leaf': 29}. Best is trial 2 with value: 0.1742767101719378.


[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000100 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 1
[LightGBM] [Info] Start training from score 0.454038
Train Metrics: MAE=0.0040, MSE=0.0000, RMSE=0.0056, R²=0.9998, MAPE=1.17%
Validation Metrics: MAE=0.0185, MSE=0.0004, RMSE=0.0191, R²=0.9378, MAPE=1.05%
Test Metrics: MAE=0.4255, MSE=0.1871, RMSE=0.4326, R²=-30.0043, MAPE=20.99%
Total Training Time: 22.32 sec | Validation Time: 1.24 sec | Testing Time: 0.01 sec


##BOHB

In [None]:
!pip install hpbandster configSpace

Collecting hpbandster
  Downloading hpbandster-0.7.4.tar.gz (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.3/51.3 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting configSpace
  Downloading configspace-1.2.1.tar.gz (130 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.0/131.0 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting Pyro4 (from hpbandster)
  Downloading Pyro4-4.82-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting serpent (from hpbandster)
  Downloading serpent-1.41-py3-none-any.whl.metadata (5.8 kB)
Collecting netifaces (from hpbandster)
  Downloading netifaces-0.11.0.t

In [None]:
import time
import numpy as np
import lightgbm as lgb
import torch
import torch.nn as nn
import torch.optim as optim
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
import hpbandster.core.nameserver as hpns
from hpbandster.optimizers import BOHB
from hpbandster.core.worker import Worker
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    return mae, mse, rmse, r2, mape

# Convert datasets to PyTorch tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1)

Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1)

# LSTM Training
lstm_layers = [2, 3, 5]
hidden_dim = 64
output_dim = 1
input_dim = X_train.shape[1]
lstm_features = []
train_time_start = time.time()

for num_layers in lstm_layers:
    lstm_model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)
    num_epochs = 100

    for epoch in range(num_epochs):
        lstm_model.train()
        optimizer.zero_grad()
        outputs = lstm_model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()

    lstm_model.eval()
    with torch.no_grad():
        train_features = lstm_model(X_train_torch).numpy()
        val_features = lstm_model(X_val_torch).numpy()
        test_features = lstm_model(X_test_torch).numpy()

    lstm_features.append((train_features, val_features, test_features))

train_time = time.time() - train_time_start

# Stack extracted features
train_features_stacked = np.hstack([feat[0] for feat in lstm_features])
val_features_stacked = np.hstack([feat[1] for feat in lstm_features])
test_features_stacked = np.hstack([feat[2] for feat in lstm_features])

# Define ConfigSpace for BOHB (LightGBM)
def get_config_space():
    cs = CS.ConfigurationSpace()
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("num_leaves", 20, 300, default_value=50))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("max_depth", 3, 12, default_value=6))
    cs.add_hyperparameter(CSH.UniformFloatHyperparameter("learning_rate", 0.01, 0.3, default_value=0.1))
    cs.add_hyperparameter(CSH.UniformFloatHyperparameter("feature_fraction", 0.5, 1.0, default_value=0.8))
    return cs

# BOHB Optimization
bohb_start_time = time.time()

NS = hpns.NameServer(run_id="stacked_lstm_lgb_bohb", host="127.0.0.1", port=None)
NS.start()

class LightGBMWorker(Worker):
    def __init__(self, train_features, val_features, **kwargs):
        super().__init__(**kwargs)
        self.train_features = train_features
        self.val_features = val_features

    def compute(self, config, budget, **kwargs):
        model = lgb.LGBMRegressor(
            num_leaves=config["num_leaves"],
            max_depth=config["max_depth"],
            learning_rate=config["learning_rate"],
            feature_fraction=config["feature_fraction"],
            random_state=42
        )
        model.fit(self.train_features, Y_train)
        val_pred = model.predict(self.val_features)
        mae = mean_absolute_error(Y_val, val_pred)
        return {"loss": mae, "info": config}

worker = LightGBMWorker(
    train_features=train_features_stacked,
    val_features=val_features_stacked,
    nameserver="127.0.0.1",
    run_id="stacked_lstm_lgb_bohb"
)
worker.run(background=True)

bohb = BOHB(
    configspace=get_config_space(),
    run_id="stacked_lstm_lgb_bohb",
    nameserver="127.0.0.1",
    min_budget=1,
    max_budget=3
)
res = bohb.run(n_iterations=50)
bohb.shutdown()
NS.shutdown()
bohb_time = time.time() - bohb_start_time

# Train final LightGBM model
best_config = res.get_incumbent_id()
best_params = res.get_id2config_mapping()[best_config]["config"]
best_lgb_model = lgb.LGBMRegressor(**best_params, random_state=42)
best_lgb_model.fit(train_features_stacked, Y_train)

# Predictions and metrics
val_time_start = time.time()
Y_val_pred = best_lgb_model.predict(val_features_stacked)
val_time = time.time() - val_time_start

test_time_start = time.time()
Y_test_pred = best_lgb_model.predict(test_features_stacked)
test_time = time.time() - test_time_start

train_metrics = calculate_metrics(Y_train, best_lgb_model.predict(train_features_stacked))
val_metrics = calculate_metrics(Y_val, Y_val_pred)
test_metrics = calculate_metrics(Y_test, Y_test_pred)

# Print results
print("Train Time:", train_time, "BOHB Optimization Time:", bohb_time, "Validation Time:", val_time, "Test Time:", test_time)
print("Train Metrics:", train_metrics)
print("Validation Metrics:", val_metrics)
print("Test Metrics:", test_metrics)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000353 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 7736, number of used features: 3
[LightGBM] [Info] Start training from score 0.454038
Train Time: 76.72150444984436 BOHB Optimization Time: 30.87048602104187 Validation Time: 0.005930423736572266 Test Time: 0.005907535552978516
Train Metrics: (0.0034154536185345124, 2.6371892757062174e-05, 0.005135357120693961, 0.9998493215567972, 0.9895300460277248)
Validation Metrics: (0.15634523120140575, 0.03030139991553575, 0.17407297296115715, -4.151946975740545, 8.774335135193532)
Test Metrics: (0.4253056256637224, 0.18691994561220834, 0.43234239395669766, -29.97228921986047, 20.980568418634792)
