#Initial

In [24]:
# Importing necessary libraries for data analysis and manipulation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# For handling warnings
import warnings
warnings.filterwarnings('ignore')

In [25]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [26]:
df_aapl = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/stocks/AAPL.csv')

In [27]:
import numpy as np
from scipy.stats import boxcox

df_aapl['Close_log'] = np.log(df_aapl['Close'] + 1)
df_aapl['Close_sqrt'] = np.sqrt(df_aapl['Close'])
df_aapl['Close_boxcox'], _ = boxcox(df_aapl['Close'] + 1)


In [28]:

skew_original = df_aapl['Close'].skew()
skew_log = df_aapl['Close_log'].skew()
skew_sqrt = df_aapl['Close_sqrt'].skew()
skew_boxcox = pd.Series(df_aapl['Close_boxcox']).skew()

print(f"Original Skewness: {skew_original}")
print(f"Log Transformation Skewness: {skew_log}")
print(f"Square Root Transformation Skewness: {skew_sqrt}")
print(f"Box-Cox Transformation Skewness: {skew_boxcox}")


Original Skewness: 2.5045276102319933
Log Transformation Skewness: 0.8535555176510303
Square Root Transformation Skewness: 1.6211545809555206
Box-Cox Transformation Skewness: 0.43527466713563334


In [29]:

df_aapl['Open_log'] = np.log(df_aapl['Open'])
df_aapl['High_log'] = np.log(df_aapl['High'])
df_aapl['Low_log'] = np.log(df_aapl['Low'])
df_aapl['Adj Close_log'] = np.log(df_aapl['Adj Close'])
df_aapl['Volume_log'] = np.log(df_aapl['Volume'])


df_aapl['Open_sqrt'] = np.sqrt(df_aapl['Open'])
df_aapl['High_sqrt'] = np.sqrt(df_aapl['High'])
df_aapl['Low_sqrt'] = np.sqrt(df_aapl['Low'])
df_aapl['Adj Close_sqrt'] = np.sqrt(df_aapl['Adj Close'])
df_aapl['Volume_sqrt'] = np.sqrt(df_aapl['Volume'])

from scipy.stats import boxcox
df_aapl['Open_boxcox'], _ = boxcox(df_aapl['Open'])
df_aapl['High_boxcox'], _ = boxcox(df_aapl['High'])
df_aapl['Low_boxcox'], _ = boxcox(df_aapl['Low'])
df_aapl['Adj Close_boxcox'], _ = boxcox(df_aapl['Adj Close'])

In [30]:

skewness_before = df_aapl[['Open', 'High', 'Low', 'Adj Close', 'Volume']].skew()
skewness_after = df_aapl[['Open_log', 'High_log', 'Low_log', 'Adj Close_log',
                          'Open_sqrt', 'High_sqrt', 'Low_sqrt', 'Adj Close_sqrt', 'Volume_sqrt',
                          'Open_boxcox', 'High_boxcox', 'Low_boxcox', 'Adj Close_boxcox']].skew()

print("Skewness Before Transformation:\n", skewness_before)
print("\nSkewness After Transformation:\n", skewness_after)


Skewness Before Transformation:
 Open         2.504632
High         2.502208
Low          2.506714
Adj Close    2.550677
Volume       3.565699
dtype: float64

Skewness After Transformation:
 Open_log            0.482872
High_log            0.481997
Low_log             0.484246
Adj Close_log       0.494009
Open_sqrt           1.620771
High_sqrt           1.621456
Low_sqrt            1.620661
Adj Close_sqrt      1.679402
Volume_sqrt         1.299776
Open_boxcox         0.181226
High_boxcox         0.179749
Low_boxcox          0.182882
Adj Close_boxcox    0.180085
dtype: float64


In [31]:
from scipy import stats

df_aapl['Open_boxcox'], _ = stats.boxcox(df_aapl['Open'] + 1)
df_aapl['High_boxcox'], _ = stats.boxcox(df_aapl['High'] + 1)
df_aapl['Low_boxcox'], _ = stats.boxcox(df_aapl['Low'] + 1)
df_aapl['Adj Close_boxcox'], _ = stats.boxcox(df_aapl['Adj Close'] + 1)
df_aapl['Close_boxcox'], _ = stats.boxcox(df_aapl['Close'] + 1)

skewness_after_boxcox = df_aapl[['Open_boxcox', 'High_boxcox', 'Low_boxcox', 'Adj Close_boxcox', 'Close_boxcox']].skew()

print("Skewness After Box-Cox Transformation:")
print(skewness_after_boxcox)


Skewness After Box-Cox Transformation:
Open_boxcox         0.435237
High_boxcox         0.433381
Low_boxcox          0.437331
Adj Close_boxcox    0.458762
Close_boxcox        0.435275
dtype: float64


In [32]:

df_aapl_cleaned = df_aapl[['Date', 'Open', 'High', 'Low', 'Adj Close', 'Close', 'Volume',
                           'Open_boxcox', 'High_boxcox', 'Low_boxcox', 'Adj Close_boxcox',
                           'Close_boxcox']]

print(df_aapl_cleaned.head())


         Date      Open      High       Low  Adj Close     Close     Volume  \
0  1980-12-12  0.128348  0.128906  0.128348   0.098943  0.128348  469033600   
1  1980-12-15  0.122210  0.122210  0.121652   0.093781  0.121652  175884800   
2  1980-12-16  0.113281  0.113281  0.112723   0.086898  0.112723  105728000   
3  1980-12-17  0.115513  0.116071  0.115513   0.089049  0.115513   86441600   
4  1980-12-18  0.118862  0.119420  0.118862   0.091630  0.118862   73449600   

   Open_boxcox  High_boxcox  Low_boxcox  Adj Close_boxcox  Close_boxcox  
0     0.117689     0.118173    0.117674          0.092374      0.117689  
1     0.112503     0.112516    0.112016          0.087857      0.112030  
2     0.104886     0.104897    0.104395          0.081785      0.104407  
3     0.106798     0.107287    0.106786          0.083688      0.106798  
4     0.109657     0.110145    0.109644          0.085966      0.109657  


#Train, Validation and Testing

In [33]:
from sklearn.model_selection import train_test_split

X = df_aapl_cleaned[['Open_boxcox', 'High_boxcox', 'Low_boxcox']]
Y = df_aapl_cleaned['Close_boxcox']

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, shuffle=False)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, shuffle=False)

print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}, Test set: {X_test.shape}")


Training set: (7736, 3), Validation set: (1658, 3), Test set: (1658, 3)


# Extra tree

## Initial

In [12]:
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Enable GPU for TensorFlow
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("GPU activated for TensorFlow!")
    except RuntimeError as e:
        print(e)

# Function to define and train an LSTM model on GPU
def train_lstm(X_train, Y_train, X_val, Y_val, layers):
    with tf.device('/GPU:0'):
        model = Sequential()
        model.add(LSTM(64, return_sequences=(layers > 1), input_shape=(X_train.shape[1], 1)))
        for _ in range(layers - 1):
            model.add(LSTM(64, return_sequences=(_ < layers - 2)))
        model.add(Dense(1))

        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=20, batch_size=16, verbose=0)
        return model

# Reshaping input for LSTM
X_train_r = np.expand_dims(X_train, axis=-1)
X_val_r = np.expand_dims(X_val, axis=-1)
X_test_r = np.expand_dims(X_test, axis=-1)

# Initialize timing dictionary
times = {}

# Train Phase (includes training all LSTMs and Extra Trees)
start_train_time = time.time()

# Train 2, 3, and 5-layer LSTM models
lstm_models = {}
lstm_predictions = {}

for layers in [2, 3, 5]:
    model = train_lstm(X_train_r, Y_train, X_val_r, Y_val, layers)
    lstm_models[layers] = model

# Generate predictions from all LSTM models
for layers in [2, 3, 5]:
    Y_train_pred = lstm_models[layers].predict(X_train_r)
    Y_val_pred = lstm_models[layers].predict(X_val_r)
    Y_test_pred = lstm_models[layers].predict(X_test_r)
    lstm_predictions[layers] = (Y_train_pred, Y_val_pred, Y_test_pred)

# Prepare input for Extra Trees
X_train_et = np.column_stack([lstm_predictions[layers][0] for layers in [2, 3, 5]])
X_val_et = np.column_stack([lstm_predictions[layers][1] for layers in [2, 3, 5]])
X_test_et = np.column_stack([lstm_predictions[layers][2] for layers in [2, 3, 5]])

# Train Extra Trees model
et_model = ExtraTreesRegressor(n_estimators=100, max_depth=None, min_samples_split=2,
                              min_samples_leaf=1, random_state=42, n_jobs=-1)
et_model.fit(X_train_et, Y_train)

times['Total Train Time'] = time.time() - start_train_time

# Validation Phase
start_val_time = time.time()
Y_val_pred_et = et_model.predict(X_val_et)
times['Total Validate Time'] = time.time() - start_val_time

# Test Phase
start_test_time = time.time()
Y_test_pred_et = et_model.predict(X_test_et)
times['Total Test Time'] = time.time() - start_test_time

# Function to calculate metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return mae, mse, rmse, r2, mape

# Compute and print metrics
metrics_train = compute_metrics(Y_train, et_model.predict(X_train_et))
metrics_val = compute_metrics(Y_val, Y_val_pred_et)
metrics_test = compute_metrics(Y_test, Y_test_pred_et)

print("\nPerformance Metrics:")
print(f"Train Metrics: MAE={metrics_train[0]:.4f}, MSE={metrics_train[1]:.4f}, RMSE={metrics_train[2]:.4f}, R²={metrics_train[3]:.4f}, MAPE={metrics_train[4]:.2f}%")
print(f"Validation Metrics: MAE={metrics_val[0]:.4f}, MSE={metrics_val[1]:.4f}, RMSE={metrics_val[2]:.4f}, R²={metrics_val[3]:.4f}, MAPE={metrics_val[4]:.2f}%")
print(f"Test Metrics: MAE={metrics_test[0]:.4f}, MSE={metrics_test[1]:.4f}, RMSE={metrics_test[2]:.4f}, R²={metrics_test[3]:.4f}, MAPE={metrics_test[4]:.2f}%")

# Print timing information
print("\nTiming Information:")
for phase, t in times.items():
    print(f"{phase}: {t:.2f} seconds")


GPU activated for TensorFlow!
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Performance Metrics:
Train Metrics: MAE=0.0001, MSE=0.0000, RMSE=0.0003, R²=1.0000, MAPE=0.03%
Validation Metrics: MAE=0.1352, MSE=0.0239, RMSE=0.1546, R²=-3.0652, MAPE=7.56%
Test Metrics: MAE=0.4034, MSE=0.1687, RMSE=0.4108, R²=-26.9577, MAPE=19.89%

Timing Information:
Total Train Time: 281.51 seconds
T

## Optuna

In [34]:
!pip install optuna



In [35]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense
from sklearn.ensemble import ExtraTreesRegressor
import optuna
from sklearn.metrics import mean_absolute_error

# --- GPU Setup ---
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_memory_growth(gpus[0], True)

# --- LSTM Training Function (supports 2/3/5 layers) ---
def train_lstm(X_train, Y_train, units, layers, lr, batch_size, epochs):
    model = tf.keras.Sequential()
    for _ in range(layers - 1):
        model.add(LSTM(units, return_sequences=True))
    model.add(LSTM(units))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss='mse')
    model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=0)
    return model

# --- Optuna Objective ---
def objective(trial):
    # LSTM Hyperparameters (optimized per layer count)
    lstm_params = {
        'units': trial.suggest_int('lstm_units', 32, 128),
        'lr': trial.suggest_float('lstm_lr', 1e-4, 1e-2, log=True),
        'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64]),
        'epochs': trial.suggest_int('epochs', 10, 50)
    }

    # Train ALL LSTMs (2, 3, 5 layers)
    lstm_preds = []
    for layers in [2, 3, 5]:  # YOUR REQUIRED STACKING
        model = train_lstm(
            X_train=np.expand_dims(X_train, -1),
            Y_train=Y_train,
            layers=layers,
            **lstm_params
        )
        lstm_preds.append(model.predict(np.expand_dims(X_val, -1)).flatten())

    # Stack predictions
    X_val_et = np.column_stack(lstm_preds)

    # ExtraTrees Hyperparameters
    et_params = {
        'n_estimators': trial.suggest_int('et_n_estimators', 50, 200),
        'max_depth': trial.suggest_int('et_max_depth', 3, 10),
        'min_samples_split': trial.suggest_int('et_min_samples_split', 2, 10),
        'max_features': trial.suggest_categorical('et_max_features', ['sqrt', 'log2'])
    }

    # Train ExtraTrees
    et_model = ExtraTreesRegressor(**et_params, random_state=42, n_jobs=-1)
    et_model.fit(X_val_et, Y_val)
    return mean_absolute_error(Y_val, et_model.predict(X_val_et))

# --- Run Optimization ---
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

# --- Train Final Model with Best Params ---
best_params = study.best_params
print("Best Params:", best_params)

# (Re-train with best params on full data and evaluate)

[I 2025-03-29 14:37:13,281] A new study created in memory with name: no-name-a307de8a-3239-4ead-b7a6-c9414b71fb0f


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 14:41:00,629] Trial 0 finished with value: 0.0024811500242978264 and parameters: {'lstm_units': 121, 'lstm_lr': 0.007007548974473547, 'batch_size': 32, 'epochs': 35, 'et_n_estimators': 144, 'et_max_depth': 8, 'et_min_samples_split': 5, 'et_max_features': 'log2'}. Best is trial 0 with value: 0.0024811500242978264.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 14:45:09,275] Trial 1 finished with value: 0.006596162349422881 and parameters: {'lstm_units': 96, 'lstm_lr': 0.00346804116518289, 'batch_size': 16, 'epochs': 19, 'et_n_estimators': 108, 'et_max_depth': 4, 'et_min_samples_split': 10, 'et_max_features': 'sqrt'}. Best is trial 0 with value: 0.0024811500242978264.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 14:48:00,878] Trial 2 finished with value: 0.0025266940391710448 and parameters: {'lstm_units': 101, 'lstm_lr': 0.005985035395540961, 'batch_size': 64, 'epochs': 50, 'et_n_estimators': 156, 'et_max_depth': 6, 'et_min_samples_split': 10, 'et_max_features': 'log2'}. Best is trial 0 with value: 0.0024811500242978264.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 14:51:43,600] Trial 3 finished with value: 0.001858540757759991 and parameters: {'lstm_units': 50, 'lstm_lr': 0.00018410646844921956, 'batch_size': 32, 'epochs': 35, 'et_n_estimators': 157, 'et_max_depth': 8, 'et_min_samples_split': 3, 'et_max_features': 'log2'}. Best is trial 3 with value: 0.001858540757759991.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


[I 2025-03-29 14:59:07,691] Trial 4 finished with value: 0.011144615563957436 and parameters: {'lstm_units': 79, 'lstm_lr': 0.0005033634006682623, 'batch_size': 16, 'epochs': 37, 'et_n_estimators': 122, 'et_max_depth': 3, 'et_min_samples_split': 10, 'et_max_features': 'sqrt'}. Best is trial 3 with value: 0.001858540757759991.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:01:25,238] Trial 5 finished with value: 0.0027352477637309267 and parameters: {'lstm_units': 46, 'lstm_lr': 0.00023669500940889089, 'batch_size': 64, 'epochs': 41, 'et_n_estimators': 163, 'et_max_depth': 6, 'et_min_samples_split': 5, 'et_max_features': 'sqrt'}. Best is trial 3 with value: 0.001858540757759991.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step


[I 2025-03-29 15:05:13,754] Trial 6 finished with value: 0.0025290962936505403 and parameters: {'lstm_units': 83, 'lstm_lr': 0.0037163884551604854, 'batch_size': 16, 'epochs': 18, 'et_n_estimators': 74, 'et_max_depth': 6, 'et_min_samples_split': 4, 'et_max_features': 'sqrt'}. Best is trial 3 with value: 0.001858540757759991.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


[I 2025-03-29 15:09:08,153] Trial 7 finished with value: 0.006788274535953749 and parameters: {'lstm_units': 37, 'lstm_lr': 0.002830671195654449, 'batch_size': 16, 'epochs': 20, 'et_n_estimators': 54, 'et_max_depth': 4, 'et_min_samples_split': 9, 'et_max_features': 'log2'}. Best is trial 3 with value: 0.001858540757759991.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step


[I 2025-03-29 15:10:22,506] Trial 8 finished with value: 0.0016425890967256217 and parameters: {'lstm_units': 125, 'lstm_lr': 0.0037823195051217983, 'batch_size': 64, 'epochs': 22, 'et_n_estimators': 183, 'et_max_depth': 9, 'et_min_samples_split': 6, 'et_max_features': 'log2'}. Best is trial 8 with value: 0.0016425890967256217.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:11:47,734] Trial 9 finished with value: 0.010997257247735967 and parameters: {'lstm_units': 102, 'lstm_lr': 0.00013003764290600415, 'batch_size': 64, 'epochs': 24, 'et_n_estimators': 104, 'et_max_depth': 3, 'et_min_samples_split': 5, 'et_max_features': 'sqrt'}. Best is trial 8 with value: 0.0016425890967256217.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:12:35,466] Trial 10 finished with value: 0.0016444758773383591 and parameters: {'lstm_units': 123, 'lstm_lr': 0.001395676569699605, 'batch_size': 64, 'epochs': 12, 'et_n_estimators': 198, 'et_max_depth': 10, 'et_min_samples_split': 7, 'et_max_features': 'log2'}. Best is trial 8 with value: 0.0016425890967256217.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step


[I 2025-03-29 15:13:21,775] Trial 11 finished with value: 0.0016610847529081258 and parameters: {'lstm_units': 128, 'lstm_lr': 0.0011010180140717208, 'batch_size': 64, 'epochs': 11, 'et_n_estimators': 195, 'et_max_depth': 10, 'et_min_samples_split': 7, 'et_max_features': 'log2'}. Best is trial 8 with value: 0.0016425890967256217.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


[I 2025-03-29 15:14:07,281] Trial 12 finished with value: 0.0016545220509906946 and parameters: {'lstm_units': 116, 'lstm_lr': 0.0012989357292726635, 'batch_size': 64, 'epochs': 11, 'et_n_estimators': 195, 'et_max_depth': 10, 'et_min_samples_split': 7, 'et_max_features': 'log2'}. Best is trial 8 with value: 0.0016425890967256217.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:15:38,035] Trial 13 finished with value: 0.0017083543382603258 and parameters: {'lstm_units': 112, 'lstm_lr': 0.0016291538747506696, 'batch_size': 64, 'epochs': 27, 'et_n_estimators': 180, 'et_max_depth': 9, 'et_min_samples_split': 7, 'et_max_features': 'log2'}. Best is trial 8 with value: 0.0016425890967256217.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:16:33,492] Trial 14 finished with value: 0.001744727534457605 and parameters: {'lstm_units': 72, 'lstm_lr': 0.009975528724033648, 'batch_size': 64, 'epochs': 15, 'et_n_estimators': 182, 'et_max_depth': 8, 'et_min_samples_split': 8, 'et_max_features': 'log2'}. Best is trial 8 with value: 0.0016425890967256217.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step


[I 2025-03-29 15:18:02,863] Trial 15 finished with value: 0.0016472698838778106 and parameters: {'lstm_units': 126, 'lstm_lr': 0.0005146674443896945, 'batch_size': 64, 'epochs': 25, 'et_n_estimators': 200, 'et_max_depth': 9, 'et_min_samples_split': 2, 'et_max_features': 'log2'}. Best is trial 8 with value: 0.0016425890967256217.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:19:41,656] Trial 16 finished with value: 0.0015650232753677221 and parameters: {'lstm_units': 109, 'lstm_lr': 0.0020624883080611884, 'batch_size': 32, 'epochs': 15, 'et_n_estimators': 136, 'et_max_depth': 10, 'et_min_samples_split': 6, 'et_max_features': 'log2'}. Best is trial 16 with value: 0.0015650232753677221.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:22:52,949] Trial 17 finished with value: 0.0021473580985102527 and parameters: {'lstm_units': 91, 'lstm_lr': 0.002029070322664711, 'batch_size': 32, 'epochs': 30, 'et_n_estimators': 135, 'et_max_depth': 7, 'et_min_samples_split': 6, 'et_max_features': 'log2'}. Best is trial 16 with value: 0.0015650232753677221.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:25:07,780] Trial 18 finished with value: 0.001724239643491734 and parameters: {'lstm_units': 110, 'lstm_lr': 0.0005351069245463085, 'batch_size': 32, 'epochs': 21, 'et_n_estimators': 94, 'et_max_depth': 9, 'et_min_samples_split': 6, 'et_max_features': 'log2'}. Best is trial 16 with value: 0.0015650232753677221.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:28:16,872] Trial 19 finished with value: 0.0021793465420821825 and parameters: {'lstm_units': 108, 'lstm_lr': 0.005225431867051273, 'batch_size': 32, 'epochs': 29, 'et_n_estimators': 173, 'et_max_depth': 7, 'et_min_samples_split': 4, 'et_max_features': 'log2'}. Best is trial 16 with value: 0.0015650232753677221.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


[I 2025-03-29 15:29:58,610] Trial 20 finished with value: 0.0016661454239736777 and parameters: {'lstm_units': 68, 'lstm_lr': 0.002490805080480492, 'batch_size': 32, 'epochs': 15, 'et_n_estimators': 125, 'et_max_depth': 9, 'et_min_samples_split': 8, 'et_max_features': 'log2'}. Best is trial 16 with value: 0.0015650232753677221.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step


[I 2025-03-29 15:30:59,609] Trial 21 finished with value: 0.0016432517066945571 and parameters: {'lstm_units': 116, 'lstm_lr': 0.0008050856123925385, 'batch_size': 64, 'epochs': 15, 'et_n_estimators': 177, 'et_max_depth': 10, 'et_min_samples_split': 6, 'et_max_features': 'log2'}. Best is trial 16 with value: 0.0015650232753677221.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:31:58,665] Trial 22 finished with value: 0.001647108554618663 and parameters: {'lstm_units': 117, 'lstm_lr': 0.0007626659811231377, 'batch_size': 64, 'epochs': 16, 'et_n_estimators': 142, 'et_max_depth': 10, 'et_min_samples_split': 6, 'et_max_features': 'log2'}. Best is trial 16 with value: 0.0015650232753677221.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


[I 2025-03-29 15:34:30,153] Trial 23 finished with value: 0.0015657263140411272 and parameters: {'lstm_units': 107, 'lstm_lr': 0.0007509069126576348, 'batch_size': 32, 'epochs': 23, 'et_n_estimators': 167, 'et_max_depth': 10, 'et_min_samples_split': 4, 'et_max_features': 'log2'}. Best is trial 16 with value: 0.0015650232753677221.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:37:00,752] Trial 24 finished with value: 0.0015329320526116107 and parameters: {'lstm_units': 90, 'lstm_lr': 0.004364772840669995, 'batch_size': 32, 'epochs': 23, 'et_n_estimators': 167, 'et_max_depth': 9, 'et_min_samples_split': 4, 'et_max_features': 'log2'}. Best is trial 24 with value: 0.0015329320526116107.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:40:09,224] Trial 25 finished with value: 0.0017117964868389533 and parameters: {'lstm_units': 85, 'lstm_lr': 0.002151868651881023, 'batch_size': 32, 'epochs': 31, 'et_n_estimators': 162, 'et_max_depth': 8, 'et_min_samples_split': 3, 'et_max_features': 'log2'}. Best is trial 24 with value: 0.0015329320526116107.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:42:38,384] Trial 26 finished with value: 0.0016009329463151218 and parameters: {'lstm_units': 93, 'lstm_lr': 0.00030967236118288214, 'batch_size': 32, 'epochs': 24, 'et_n_estimators': 147, 'et_max_depth': 10, 'et_min_samples_split': 4, 'et_max_features': 'sqrt'}. Best is trial 24 with value: 0.0015329320526116107.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:45:38,345] Trial 27 finished with value: 0.0015953229814201534 and parameters: {'lstm_units': 103, 'lstm_lr': 0.0008950592362671808, 'batch_size': 32, 'epochs': 27, 'et_n_estimators': 131, 'et_max_depth': 9, 'et_min_samples_split': 2, 'et_max_features': 'log2'}. Best is trial 24 with value: 0.0015329320526116107.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


[I 2025-03-29 15:47:34,536] Trial 28 finished with value: 0.001501043851207137 and parameters: {'lstm_units': 63, 'lstm_lr': 0.0017480917852129142, 'batch_size': 32, 'epochs': 18, 'et_n_estimators': 166, 'et_max_depth': 10, 'et_min_samples_split': 3, 'et_max_features': 'log2'}. Best is trial 28 with value: 0.001501043851207137.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


[I 2025-03-29 15:49:35,215] Trial 29 finished with value: 0.0020161468774083555 and parameters: {'lstm_units': 72, 'lstm_lr': 0.00865274341386862, 'batch_size': 32, 'epochs': 18, 'et_n_estimators': 149, 'et_max_depth': 7, 'et_min_samples_split': 3, 'et_max_features': 'log2'}. Best is trial 28 with value: 0.001501043851207137.


Best Params: {'lstm_units': 63, 'lstm_lr': 0.0017480917852129142, 'batch_size': 32, 'epochs': 18, 'et_n_estimators': 166, 'et_max_depth': 10, 'et_min_samples_split': 3, 'et_max_features': 'log2'}


In [37]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time

# Configure GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        print(e)

# Best parameters (from your Optuna study)
best_params = {
    'lstm_units': 63,
    'lstm_lr': 0.0017480917852129142,
    'batch_size': 32,
    'epochs': 18,
    'et_n_estimators': 166,
    'et_max_depth': 10,
    'et_min_samples_split': 3,
    'et_max_features': 'log2'
}

# Custom MAPE calculation
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / (y_true + 1e-10))) * 100  # Added small constant to avoid division by zero

# LSTM training function with timing
def train_lstm(X_train, Y_train, units, layers, lr, batch_size, epochs):
    start_time = time.time()

    model = tf.keras.Sequential()
    for _ in range(layers - 1):
        model.add(LSTM(units, return_sequences=True))
    model.add(LSTM(units))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss='mse')

    history = model.fit(
        X_train, Y_train,
        batch_size=batch_size,
        epochs=epochs,
        verbose=1
    )

    training_time = time.time() - start_time
    return model, history, training_time

# --- PHASE 1: Train All LSTM Models ---
lstm_models = {}
lstm_train_times = {}
lstm_pred_times = {'train': [], 'val': [], 'test': []}

print("=== TRAINING LSTM MODELS ===")
for layers in [2, 3, 5]:
    print(f"\nTraining LSTM with {layers} layers...")
    model, _, train_time = train_lstm(
        X_train=np.expand_dims(X_train, -1),
        Y_train=Y_train,
        units=best_params['lstm_units'],
        layers=layers,
        lr=best_params['lstm_lr'],
        batch_size=best_params['batch_size'],
        epochs=best_params['epochs']
    )
    lstm_models[f'lstm_{layers}layers'] = model
    lstm_train_times[f'lstm_{layers}layers'] = train_time

    # Time predictions for each dataset
    for dataset, x_data in [('train', X_train), ('val', X_val), ('test', X_test)]:
        start_time = time.time()
        model.predict(np.expand_dims(x_data, -1), verbose=0)
        lstm_pred_times[dataset].append(time.time() - start_time)

# --- PHASE 2: Prepare Stacked Features ---
print("\n=== PREPARING STACKED FEATURES ===")
def get_stacked_features(X):
    start_time = time.time()
    features = np.column_stack([
        model.predict(np.expand_dims(X, -1), verbose=0).flatten()
        for model in lstm_models.values()
    ])
    return features, time.time() - start_time

X_train_et, train_stack_time = get_stacked_features(X_train)
X_val_et, val_stack_time = get_stacked_features(X_val)
X_test_et, test_stack_time = get_stacked_features(X_test)

# --- PHASE 3: Train Extra Trees ---
print("\n=== TRAINING EXTRA TREES ===")
start_time = time.time()
et_model = ExtraTreesRegressor(
    n_estimators=best_params['et_n_estimators'],
    max_depth=best_params['et_max_depth'],
    min_samples_split=best_params['et_min_samples_split'],
    max_features=best_params['et_max_features'],
    random_state=42,
    n_jobs=-1
)
et_model.fit(X_train_et, Y_train)
et_train_time = time.time() - start_time

# --- PHASE 4: Make Predictions ---
print("\n=== MAKING PREDICTIONS ===")
def timed_predict(model, X):
    start_time = time.time()
    preds = model.predict(X)
    return preds, time.time() - start_time

Y_train_pred, train_pred_time = timed_predict(et_model, X_train_et)
Y_val_pred, val_pred_time = timed_predict(et_model, X_val_et)
Y_test_pred, test_pred_time = timed_predict(et_model, X_test_et)

# --- PHASE 5: Calculate Metrics ---
def calculate_all_metrics(y_true, y_pred):
    return {
        'MAE': mean_absolute_error(y_true, y_pred),
        'MSE': mean_squared_error(y_true, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'R2': r2_score(y_true, y_pred),
        'MAPE': mean_absolute_percentage_error(y_true, y_pred)
    }

metrics_train = calculate_all_metrics(Y_train, Y_train_pred)
metrics_val = calculate_all_metrics(Y_val, Y_val_pred)
metrics_test = calculate_all_metrics(Y_test, Y_test_pred)

# --- PHASE 6: Print Comprehensive Results ---
print("\n=== FINAL RESULTS ===")

# 1. Timing Metrics
print("\n=== TIMING METRICS (seconds) ===")
print(f"{'LSTM Training Times':<25} {'2L':<8} {'3L':<8} {'5L':<8}")
print(f"{'':<25} {lstm_train_times['lstm_2layers']:<8.2f} {lstm_train_times['lstm_3layers']:<8.2f} {lstm_train_times['lstm_5layers']:<8.2f}")

print("\nLSTM Prediction Times per Dataset:")
print(f"{'Dataset':<15} {'2L':<8} {'3L':<8} {'5L':<8}")
for dataset in ['train', 'val', 'test']:
    times = lstm_pred_times[dataset]
    print(f"{dataset:<15} {times[0]:<8.4f} {times[1]:<8.4f} {times[2]:<8.4f}")

print(f"\n{'Feature Stacking Time':<30} {train_stack_time:.4f} (train) | {val_stack_time:.4f} (val) | {test_stack_time:.4f} (test)")
print(f"{'Extra Trees Training Time':<30} {et_train_time:.2f}")
print(f"{'Extra Trees Prediction Time':<30} {train_pred_time:.4f} (train) | {val_pred_time:.4f} (val) | {test_pred_time:.4f} (test)")

# 2. Performance Metrics
print("\n=== PERFORMANCE METRICS ===")
def print_metrics(name, metrics):
    print(f"\n{name}:")
    print(f"{'MAE':<10} {metrics['MAE']:.4f}")
    print(f"{'MSE':<10} {metrics['MSE']:.4f}")
    print(f"{'RMSE':<10} {metrics['RMSE']:.4f}")
    print(f"{'R2':<10} {metrics['R2']:.4f}")
    print(f"{'MAPE':<10} {metrics['MAPE']:.2f}%")

print_metrics("Training Set", metrics_train)
print_metrics("Validation Set", metrics_val)
print_metrics("Test Set", metrics_test)

# 3. Feature Importance
print("\n=== FEATURE IMPORTANCES ===")
for i, (name, importance) in enumerate(zip(lstm_models.keys(), et_model.feature_importances_)):
    print(f"{name:<15} {importance:.4f}")

=== TRAINING LSTM MODELS ===

Training LSTM with 2 layers...
Epoch 1/18
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.0535
Epoch 2/18
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 4.5853e-05
Epoch 3/18
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 4.0391e-05
Epoch 4/18
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 3.7746e-05
Epoch 5/18
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 4.0562e-05
Epoch 6/18
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 4.0531e-05
Epoch 7/18
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 3.4114e-05
Epoch 8/18
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.9513e-05
Epoch 9/18
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 3.0090e-0

## BOHB

In [None]:
!pip install ConfigSpace

Collecting ConfigSpace
  Downloading configspace-1.2.1.tar.gz (130 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/131.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.0/131.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: ConfigSpace
  Building wheel for ConfigSpace (pyproject.toml) ... [?25l[?25hdone
  Created wheel for ConfigSpace: filename=configspace-1.2.1-py3-none-any.whl size=115990 sha256=2223b63ddd46f77450a71d7a4c4838e63b88159927e3a97695871e14fbaea187
  Stored in directory: /root/.cache/pip/wheels/11/0f/36/d5027c3eeb038827889830f7efbe6a1bad8956b3eb44ab2f44
Successfully built ConfigSpace
Installing collected packages: ConfigSpace
Successfully installed ConfigSpace-1.2.1


In [None]:
!pip install hpbandster

Collecting hpbandster
  Downloading hpbandster-0.7.4.tar.gz (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.3/51.3 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting Pyro4 (from hpbandster)
  Downloading Pyro4-4.82-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting serpent (from hpbandster)
  Downloading serpent-1.41-py3-none-any.whl.metadata (5.8 kB)
Collecting netifaces (from hpbandster)
  Downloading netifaces-0.11.0.tar.gz (30 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading Pyro4-4.82-py2.py3-none-any.whl (89 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading serpent-1.41-py3-none-any.whl (9.6 kB)
Building wheels for collected packages: hpbandster, netifaces
  Building whe

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
import hpbandster.core.nameserver as hpns
from hpbandster.optimizers import BOHB
from hpbandster.core.worker import Worker
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
import time

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    return mae, mse, rmse, r2, mape

# Convert datasets to PyTorch tensors and move to GPU
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# LSTM Configurations
lstm_layers = [2, 3, 5]
hidden_dim = 64
output_dim = 1
input_dim = X_train.shape[1]

# Dictionary to store LSTM feature representations
lstm_features = []

for num_layers in lstm_layers:
    print(f"Training LSTM with {num_layers} layers...")

    lstm_model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)
    num_epochs = 35

    start_time = time.time()
    for epoch in range(num_epochs):
        lstm_model.train()
        optimizer.zero_grad()
        outputs = lstm_model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()
    train_time = time.time() - start_time

    # Extract Feature Representations
    lstm_model.eval()
    with torch.no_grad():
        val_start = time.time()
        train_features = lstm_model(X_train_torch).cpu().numpy()
        val_features = lstm_model(X_val_torch).cpu().numpy()
        val_time = time.time() - val_start

        test_start = time.time()
        test_features = lstm_model(X_test_torch).cpu().numpy()
        test_time = time.time() - test_start

    lstm_features.append((train_features, val_features, test_features, train_time, val_time, test_time))

# Concatenate Features from All Layers
final_train_features = np.hstack([feat[0] for feat in lstm_features])
final_val_features = np.hstack([feat[1] for feat in lstm_features])
final_test_features = np.hstack([feat[2] for feat in lstm_features])

# Record Time for Each Stage
total_train_time = sum([feat[3] for feat in lstm_features])
total_val_time = sum([feat[4] for feat in lstm_features])
total_test_time = sum([feat[5] for feat in lstm_features])

# Define ConfigSpace for BOHB
def get_config_space():
    cs = CS.ConfigurationSpace()
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("n_estimators", 50, 500, default_value=100))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("max_depth", 3, 15, default_value=6))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("min_samples_split", 2, 10, default_value=2))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("min_samples_leaf", 1, 5, default_value=1))
    return cs

# BOHB Worker for Extra Trees
class ETWorker(Worker):
    def compute(self, config, budget, **kwargs):
        model = ExtraTreesRegressor(
            n_estimators=config["n_estimators"],
            max_depth=config["max_depth"],
            min_samples_split=config["min_samples_split"],
            min_samples_leaf=config["min_samples_leaf"],
            random_state=42
        )
        model.fit(final_train_features, Y_train)
        Y_val_pred = model.predict(final_val_features)
        mae = mean_absolute_error(Y_val, Y_val_pred)
        return {"loss": mae, "info": config}

# Run BOHB
NS = hpns.NameServer(run_id="lstm_et_bohb", host="127.0.0.2", port=None)
NS.start()

worker = ETWorker(nameserver="127.0.0.2", run_id="lstm_et_bohb")
worker.run(background=True)

bohb = BOHB(configspace=get_config_space(), run_id="lstm_et_bohb", nameserver="127.0.0.2", min_budget=1, max_budget=3)
res = bohb.run(n_iterations=50)
bohb.shutdown()
NS.shutdown()

# Train Best Extra Trees Model
best_config = res.get_incumbent_id()
best_params = res.get_id2config_mapping()[best_config]["config"]

best_et_model = ExtraTreesRegressor(
    n_estimators=best_params["n_estimators"],
    max_depth=best_params["max_depth"],
    min_samples_split=best_params["min_samples_split"],
    min_samples_leaf=best_params["min_samples_leaf"],
    random_state=42
)

best_et_model.fit(final_train_features, Y_train)

# Predictions
Y_train_pred = best_et_model.predict(final_train_features)
Y_val_pred = best_et_model.predict(final_val_features)
Y_test_pred = best_et_model.predict(final_test_features)

# Calculate Metrics
train_metrics = calculate_metrics(Y_train, Y_train_pred)
val_metrics = calculate_metrics(Y_val, Y_val_pred)
test_metrics = calculate_metrics(Y_test, Y_test_pred)

# Print Results
print("Train Metrics:", train_metrics, "Time:", total_train_time)
print("Validation Metrics:", val_metrics, "Time:", total_val_time)
print("Test Metrics:", test_metrics, "Time:", total_test_time)

Training LSTM with 2 layers...
Training LSTM with 3 layers...
Training LSTM with 5 layers...
Train Metrics: (0.002565484193522361, 1.3337153721626793e-05, 0.0036520068074452974, 0.9999237968401418, 0.8055239074732535) Time: 36.16904854774475
Validation Metrics: (0.13687320290358435, 0.02440584209047576, 0.1562236924748476, -3.1495642016183316, 7.660723872899469) Time: 0.40578222274780273
Test Metrics: (0.4051772716636299, 0.17020369186378098, 0.41255750128167706, -27.20243689579124, 19.980381794560085) Time: 0.058008670806884766


# RandomForest

## Intial

In [13]:
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.ensemble import RandomForestRegressor  # Changed from ExtraTreesRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Enable GPU for TensorFlow
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("GPU activated for TensorFlow!")
    except RuntimeError as e:
        print(e)

# Function to define and train an LSTM model on GPU (unchanged)
def train_lstm(X_train, Y_train, X_val, Y_val, layers):
    with tf.device('/GPU:0'):
        model = Sequential()
        model.add(LSTM(64, return_sequences=(layers > 1), input_shape=(X_train.shape[1], 1)))
        for _ in range(layers - 1):
            model.add(LSTM(64, return_sequences=(_ < layers - 2)))
        model.add(Dense(1))

        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=20, batch_size=16, verbose=0)
        return model

# Reshaping input for LSTM (unchanged)
X_train_r = np.expand_dims(X_train, axis=-1)
X_val_r = np.expand_dims(X_val, axis=-1)
X_test_r = np.expand_dims(X_test, axis=-1)

# Initialize timing dictionary (unchanged)
times = {}

# Train Phase (includes training all LSTMs and Random Forest)
start_train_time = time.time()

# Train 2, 3, and 5-layer LSTM models (unchanged)
lstm_models = {}
lstm_predictions = {}

for layers in [2, 3, 5]:
    model = train_lstm(X_train_r, Y_train, X_val_r, Y_val, layers)
    lstm_models[layers] = model

# Generate predictions from all LSTM models (unchanged)
for layers in [2, 3, 5]:
    Y_train_pred = lstm_models[layers].predict(X_train_r)
    Y_val_pred = lstm_models[layers].predict(X_val_r)
    Y_test_pred = lstm_models[layers].predict(X_test_r)
    lstm_predictions[layers] = (Y_train_pred, Y_val_pred, Y_test_pred)

# Prepare input for Random Forest (unchanged except variable names)
X_train_rf = np.column_stack([lstm_predictions[layers][0] for layers in [2, 3, 5]])
X_val_rf = np.column_stack([lstm_predictions[layers][1] for layers in [2, 3, 5]])
X_test_rf = np.column_stack([lstm_predictions[layers][2] for layers in [2, 3, 5]])

# Train Random Forest model (modified)
rf_model = RandomForestRegressor(n_estimators=100,
                               max_depth=None,
                               min_samples_split=2,
                               min_samples_leaf=1,
                               random_state=42,
                               n_jobs=-1)  # Using all available cores
rf_model.fit(X_train_rf, Y_train)

times['Total Train Time'] = time.time() - start_train_time

# Validation Phase (unchanged except variable names)
start_val_time = time.time()
Y_val_pred_rf = rf_model.predict(X_val_rf)
times['Total Validate Time'] = time.time() - start_val_time

# Test Phase (unchanged except variable names)
start_test_time = time.time()
Y_test_pred_rf = rf_model.predict(X_test_rf)
times['Total Test Time'] = time.time() - start_test_time

# Function to calculate metrics (unchanged)
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return mae, mse, rmse, r2, mape

# Compute and print metrics (unchanged except variable names)
metrics_train = compute_metrics(Y_train, rf_model.predict(X_train_rf))
metrics_val = compute_metrics(Y_val, Y_val_pred_rf)
metrics_test = compute_metrics(Y_test, Y_test_pred_rf)

print("\nPerformance Metrics:")
print(f"Train Metrics: MAE={metrics_train[0]:.4f}, MSE={metrics_train[1]:.4f}, RMSE={metrics_train[2]:.4f}, R²={metrics_train[3]:.4f}, MAPE={metrics_train[4]:.2f}%")
print(f"Validation Metrics: MAE={metrics_val[0]:.4f}, MSE={metrics_val[1]:.4f}, RMSE={metrics_val[2]:.4f}, R²={metrics_val[3]:.4f}, MAPE={metrics_val[4]:.2f}%")
print(f"Test Metrics: MAE={metrics_test[0]:.4f}, MSE={metrics_test[1]:.4f}, RMSE={metrics_test[2]:.4f}, R²={metrics_test[3]:.4f}, MAPE={metrics_test[4]:.2f}%")

# Print timing information (unchanged)
print("\nTiming Information:")
for phase, t in times.items():
    print(f"{phase}: {t:.2f} seconds")


GPU activated for TensorFlow!
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Performance Metrics:
Train Metrics: MAE=0.0011, MSE=0.0000, RMSE=0.0017, R²=1.0000, MAPE=0.33%
Validation Metrics: MAE=0.1375, MSE=0.0246, RMSE=0.1568, R²=-3.1781, MAPE=7.70%
Test Metrics: MAE=0.4058, MSE=0.1707, RMSE=0.4132, R²=-27.2845, MAPE=20.01%

Timing Information:
Total Train Time: 267.27 seconds
T

## Optuna

In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.2.1


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import LSTM, Dense
from sklearn.ensemble import RandomForestRegressor
import optuna
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time

# Configure GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Restrict TensorFlow to only allocate required GPU memory
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(f"{len(gpus)} Physical GPUs, {len(logical_gpus)} Logical GPUs")
    except RuntimeError as e:
        print(e)

# Early stopping callback
from tensorflow.keras.callbacks import EarlyStopping

# Function to train LSTM model with GPU acceleration
def train_lstm(X_train, Y_train, X_val, Y_val, units, layers, learning_rate, batch_size, epochs):
    # Use strategy scope for multi-GPU support (if available)
    strategy = tf.distribute.MirroredStrategy()
    with strategy.scope():
        model = keras.Sequential()
        for _ in range(layers - 1):
            model.add(LSTM(units, return_sequences=True))
        model.add(LSTM(units))
        model.add(Dense(1))

        model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                     loss="mse")

    early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

    # Convert data to TensorFlow tensors
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
    train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
    val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    start_time = time.time()
    history = model.fit(train_dataset,
                       validation_data=val_dataset,
                       epochs=epochs,
                       verbose=0,
                       callbacks=[early_stopping])
    lstm_train_time = time.time() - start_time

    return model, history, lstm_train_time

# Updated objective function for Random Forest
def objective(trial):
    # LSTM parameters
    units = trial.suggest_int("lstm_units", 32, 128, step=16)
    layers = trial.suggest_categorical("lstm_layers", [2, 3, 5])
    learning_rate = trial.suggest_float("lstm_learning_rate", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("lstm_batch_size", [16, 32, 64])
    epochs = trial.suggest_int("lstm_epochs", 10, 50, step=10)

    # Reshape and train LSTM
    X_train_r = np.expand_dims(X_train, axis=-1)
    X_val_r = np.expand_dims(X_val, axis=-1)
    model, _, lstm_train_time = train_lstm(X_train_r, Y_train, X_val_r, Y_val,
                                         units, layers, learning_rate, batch_size, epochs)
    Y_val_pred_lstm = model.predict(X_val_r, verbose=0).flatten()

    # Prepare data for Random Forest
    X_val_rf = np.column_stack([Y_val_pred_lstm])

    # Random Forest hyperparameters
    rf_params = {
        "n_estimators": trial.suggest_int("rf_n_estimators", 50, 200, step=50),
        "max_depth": trial.suggest_int("rf_max_depth", 3, 10),
        "min_samples_split": trial.suggest_int("rf_min_samples_split", 2, 10),
        "min_samples_leaf": trial.suggest_int("rf_min_samples_leaf", 1, 5),
        "max_features": trial.suggest_categorical("rf_max_features", ['sqrt', 'log2']),
        "random_state": 42,
        "n_jobs": -1
    }

    # Train Random Forest
    start_time = time.time()
    rf_model = RandomForestRegressor(**rf_params)
    rf_model.fit(X_val_rf, Y_val)
    rf_train_time = time.time() - start_time

    # Predict and evaluate
    Y_val_pred_rf = rf_model.predict(X_val_rf)
    rmse = np.sqrt(mean_squared_error(Y_val, Y_val_pred_rf))

    print(f"LSTM Training Time: {lstm_train_time:.2f} seconds")
    print(f"Random Forest Training Time: {rf_train_time:.2f} seconds")

    return rmse

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=25)
print("Best hyperparameters:", study.best_params)

# Final Model Implementation with Random Forest
best_params = study.best_params
units = best_params["lstm_units"]
layers = best_params["lstm_layers"]
learning_rate = best_params["lstm_learning_rate"]
batch_size = best_params["lstm_batch_size"]
epochs = best_params["lstm_epochs"]
n_estimators = best_params["rf_n_estimators"]
max_depth = best_params["rf_max_depth"]
min_samples_split = best_params["rf_min_samples_split"]
min_samples_leaf = best_params["rf_min_samples_leaf"]
max_features = best_params["rf_max_features"]

# Reshape input for LSTM
X_train_r = np.expand_dims(X_train, axis=-1)
X_val_r = np.expand_dims(X_val, axis=-1)
X_test_r = np.expand_dims(X_test, axis=-1)

# Train final LSTM model
final_lstm, lstm_train_time = train_lstm(X_train_r, Y_train, X_val_r, Y_val,
                                       units, layers, learning_rate, batch_size, epochs)

# Predictions with timing
start_time = time.time()
Y_train_pred_lstm = final_lstm.predict(X_train_r, verbose=0).flatten()
lstm_train_pred_time = time.time() - start_time

start_time = time.time()
Y_val_pred_lstm = final_lstm.predict(X_val_r, verbose=0).flatten()
lstm_val_pred_time = time.time() - start_time

start_time = time.time()
Y_test_pred_lstm = final_lstm.predict(X_test_r, verbose=0).flatten()
lstm_test_pred_time = time.time() - start_time

# Prepare data for Random Forest
X_train_rf = np.column_stack([Y_train_pred_lstm])
X_val_rf = np.column_stack([Y_val_pred_lstm])
X_test_rf = np.column_stack([Y_test_pred_lstm])

# Random Forest parameters
rf_params = {
    "n_estimators": n_estimators,
    "max_depth": max_depth,
    "min_samples_split": min_samples_split,
    "min_samples_leaf": min_samples_leaf,
    "max_features": max_features,
    "random_state": 42,
    "n_jobs": -1
}

# Train final Random Forest model
start_time = time.time()
final_rf = RandomForestRegressor(**rf_params)
final_rf.fit(X_train_rf, Y_train)
rf_train_time = time.time() - start_time

# Random Forest Predictions with timing
start_time = time.time()
Y_train_pred_rf = final_rf.predict(X_train_rf)
rf_train_pred_time = time.time() - start_time

start_time = time.time()
Y_val_pred_rf = final_rf.predict(X_val_rf)
rf_val_pred_time = time.time() - start_time

start_time = time.time()
Y_test_pred_rf = final_rf.predict(X_test_rf)
rf_test_pred_time = time.time() - start_time

# Compute Metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return mae, mse, rmse, r2, mape

metrics_train = compute_metrics(Y_train, Y_train_pred_rf)
metrics_val = compute_metrics(Y_val, Y_val_pred_rf)
metrics_test = compute_metrics(Y_test, Y_test_pred_rf)

# Print results
print("\nFinal Model Performance:")
print(f"LSTM Training Time: {lstm_train_time:.2f} seconds")
print(f"Random Forest Training Time: {rf_train_time:.2f} seconds\n")

print(f"LSTM Train Prediction Time: {lstm_train_pred_time:.4f} seconds")
print(f"LSTM Validation Prediction Time: {lstm_val_pred_time:.4f} seconds")
print(f"LSTM Test Prediction Time: {lstm_test_pred_time:.4f} seconds\n")

print(f"Random Forest Train Prediction Time: {rf_train_pred_time:.4f} seconds")
print(f"Random Forest Validation Prediction Time: {rf_val_pred_time:.4f} seconds")
print(f"Random Forest Test Prediction Time: {rf_test_pred_time:.4f} seconds\n")

print("Train Set Metrics:")
print(f"MAE: {metrics_train[0]:.4f}, MSE: {metrics_train[1]:.4f}, RMSE: {metrics_train[2]:.4f}, R²: {metrics_train[3]:.4f}, MAPE: {metrics_train[4]:.2f}%")

print("\nValidation Set Metrics:")
print(f"MAE: {metrics_val[0]:.4f}, MSE: {metrics_val[1]:.4f}, RMSE: {metrics_val[2]:.4f}, R²: {metrics_val[3]:.4f}, MAPE: {metrics_val[4]:.2f}%")

print("\nTest Set Metrics:")
print(f"MAE: {metrics_test[0]:.4f}, MSE: {metrics_test[1]:.4f}, RMSE: {metrics_test[2]:.4f}, R²: {metrics_test[3]:.4f}, MAPE: {metrics_test[4]:.2f}%")

[I 2025-03-29 13:25:56,750] A new study created in memory with name: no-name-385ec998-b0d4-44d2-a840-adc6f4d6dffc


1 Physical GPUs, 1 Logical GPUs


[I 2025-03-29 13:26:07,548] Trial 0 finished with value: 0.001961492249323409 and parameters: {'lstm_units': 80, 'lstm_layers': 2, 'lstm_learning_rate': 0.0005490612684285567, 'lstm_batch_size': 64, 'lstm_epochs': 30, 'rf_n_estimators': 200, 'rf_max_depth': 9, 'rf_min_samples_split': 10, 'rf_min_samples_leaf': 2, 'rf_max_features': 'sqrt'}. Best is trial 0 with value: 0.001961492249323409.


LSTM Training Time: 9.65 seconds
Random Forest Training Time: 0.35 seconds


[I 2025-03-29 13:27:00,040] Trial 1 finished with value: 0.002532995510984966 and parameters: {'lstm_units': 128, 'lstm_layers': 2, 'lstm_learning_rate': 0.008649461900744712, 'lstm_batch_size': 16, 'lstm_epochs': 20, 'rf_n_estimators': 200, 'rf_max_depth': 10, 'rf_min_samples_split': 5, 'rf_min_samples_leaf': 3, 'rf_max_features': 'sqrt'}. Best is trial 0 with value: 0.001961492249323409.


LSTM Training Time: 51.36 seconds
Random Forest Training Time: 0.35 seconds


[I 2025-03-29 13:27:14,342] Trial 2 finished with value: 0.0018125109141041145 and parameters: {'lstm_units': 32, 'lstm_layers': 2, 'lstm_learning_rate': 0.00015907867683047526, 'lstm_batch_size': 64, 'lstm_epochs': 20, 'rf_n_estimators': 200, 'rf_max_depth': 9, 'rf_min_samples_split': 3, 'rf_min_samples_leaf': 2, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 13.16 seconds
Random Forest Training Time: 0.38 seconds


[I 2025-03-29 13:27:27,176] Trial 3 finished with value: 0.0022004612651811044 and parameters: {'lstm_units': 128, 'lstm_layers': 3, 'lstm_learning_rate': 0.0004612902879129881, 'lstm_batch_size': 64, 'lstm_epochs': 50, 'rf_n_estimators': 150, 'rf_max_depth': 10, 'rf_min_samples_split': 3, 'rf_min_samples_leaf': 5, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 11.69 seconds
Random Forest Training Time: 0.27 seconds


[I 2025-03-29 13:27:47,156] Trial 4 finished with value: 0.002314071819400903 and parameters: {'lstm_units': 80, 'lstm_layers': 3, 'lstm_learning_rate': 0.00026139234420368036, 'lstm_batch_size': 32, 'lstm_epochs': 20, 'rf_n_estimators': 150, 'rf_max_depth': 6, 'rf_min_samples_split': 5, 'rf_min_samples_leaf': 5, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 18.83 seconds
Random Forest Training Time: 0.27 seconds


[I 2025-03-29 13:28:32,374] Trial 5 finished with value: 0.01012972899010514 and parameters: {'lstm_units': 80, 'lstm_layers': 3, 'lstm_learning_rate': 0.0031565945674339618, 'lstm_batch_size': 16, 'lstm_epochs': 50, 'rf_n_estimators': 150, 'rf_max_depth': 3, 'rf_min_samples_split': 3, 'rf_min_samples_leaf': 1, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 44.08 seconds
Random Forest Training Time: 0.24 seconds


[I 2025-03-29 13:29:32,457] Trial 6 finished with value: 0.002083335544679415 and parameters: {'lstm_units': 48, 'lstm_layers': 2, 'lstm_learning_rate': 0.0007229560965324383, 'lstm_batch_size': 16, 'lstm_epochs': 50, 'rf_n_estimators': 100, 'rf_max_depth': 6, 'rf_min_samples_split': 8, 'rf_min_samples_leaf': 2, 'rf_max_features': 'sqrt'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 59.18 seconds
Random Forest Training Time: 0.17 seconds


[I 2025-03-29 13:30:05,657] Trial 7 finished with value: 0.0041820275858125994 and parameters: {'lstm_units': 96, 'lstm_layers': 2, 'lstm_learning_rate': 0.00018593986136564812, 'lstm_batch_size': 16, 'lstm_epochs': 50, 'rf_n_estimators': 150, 'rf_max_depth': 4, 'rf_min_samples_split': 2, 'rf_min_samples_leaf': 2, 'rf_max_features': 'sqrt'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 32.16 seconds
Random Forest Training Time: 0.24 seconds


[I 2025-03-29 13:30:32,392] Trial 8 finished with value: 0.002264996417367797 and parameters: {'lstm_units': 80, 'lstm_layers': 3, 'lstm_learning_rate': 0.0008498071391575237, 'lstm_batch_size': 32, 'lstm_epochs': 20, 'rf_n_estimators': 150, 'rf_max_depth': 6, 'rf_min_samples_split': 2, 'rf_min_samples_leaf': 2, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 25.59 seconds
Random Forest Training Time: 0.26 seconds


[I 2025-03-29 13:31:36,360] Trial 9 finished with value: 0.0020671323691670053 and parameters: {'lstm_units': 32, 'lstm_layers': 3, 'lstm_learning_rate': 0.0003155721349271929, 'lstm_batch_size': 16, 'lstm_epochs': 40, 'rf_n_estimators': 200, 'rf_max_depth': 8, 'rf_min_samples_split': 7, 'rf_min_samples_leaf': 2, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 62.71 seconds
Random Forest Training Time: 0.34 seconds


[I 2025-03-29 13:31:57,707] Trial 10 finished with value: 0.002350284861531761 and parameters: {'lstm_units': 48, 'lstm_layers': 5, 'lstm_learning_rate': 0.00010142900158164547, 'lstm_batch_size': 64, 'lstm_epochs': 10, 'rf_n_estimators': 50, 'rf_max_depth': 8, 'rf_min_samples_split': 4, 'rf_min_samples_leaf': 4, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 20.08 seconds
Random Forest Training Time: 0.09 seconds


[I 2025-03-29 13:32:06,952] Trial 11 finished with value: 0.0019325352526864848 and parameters: {'lstm_units': 32, 'lstm_layers': 2, 'lstm_learning_rate': 0.0022222910913311526, 'lstm_batch_size': 64, 'lstm_epochs': 30, 'rf_n_estimators': 200, 'rf_max_depth': 8, 'rf_min_samples_split': 10, 'rf_min_samples_leaf': 1, 'rf_max_features': 'sqrt'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 7.97 seconds
Random Forest Training Time: 0.48 seconds


[I 2025-03-29 13:32:25,978] Trial 12 finished with value: 0.001882248738015702 and parameters: {'lstm_units': 32, 'lstm_layers': 2, 'lstm_learning_rate': 0.0022592967657598145, 'lstm_batch_size': 64, 'lstm_epochs': 30, 'rf_n_estimators': 200, 'rf_max_depth': 8, 'rf_min_samples_split': 10, 'rf_min_samples_leaf': 1, 'rf_max_features': 'sqrt'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 17.85 seconds
Random Forest Training Time: 0.36 seconds


[I 2025-03-29 13:32:41,762] Trial 13 finished with value: 0.002276863453899723 and parameters: {'lstm_units': 48, 'lstm_layers': 5, 'lstm_learning_rate': 0.0018409739216420158, 'lstm_batch_size': 64, 'lstm_epochs': 30, 'rf_n_estimators': 100, 'rf_max_depth': 7, 'rf_min_samples_split': 8, 'rf_min_samples_leaf': 1, 'rf_max_features': 'sqrt'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 14.34 seconds
Random Forest Training Time: 0.18 seconds


[I 2025-03-29 13:32:54,172] Trial 14 finished with value: 0.0018424307301031478 and parameters: {'lstm_units': 32, 'lstm_layers': 2, 'lstm_learning_rate': 0.005493042737323932, 'lstm_batch_size': 64, 'lstm_epochs': 10, 'rf_n_estimators': 200, 'rf_max_depth': 9, 'rf_min_samples_split': 6, 'rf_min_samples_leaf': 3, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 11.24 seconds
Random Forest Training Time: 0.38 seconds


[I 2025-03-29 13:33:07,542] Trial 15 finished with value: 0.002078040448420761 and parameters: {'lstm_units': 64, 'lstm_layers': 2, 'lstm_learning_rate': 0.00955171072514633, 'lstm_batch_size': 64, 'lstm_epochs': 10, 'rf_n_estimators': 50, 'rf_max_depth': 10, 'rf_min_samples_split': 6, 'rf_min_samples_leaf': 3, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 12.52 seconds
Random Forest Training Time: 0.10 seconds


[I 2025-03-29 13:33:30,220] Trial 16 finished with value: 0.002627164556152886 and parameters: {'lstm_units': 64, 'lstm_layers': 5, 'lstm_learning_rate': 0.004791692380475712, 'lstm_batch_size': 64, 'lstm_epochs': 10, 'rf_n_estimators': 200, 'rf_max_depth': 9, 'rf_min_samples_split': 6, 'rf_min_samples_leaf': 4, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 21.13 seconds
Random Forest Training Time: 0.37 seconds


[I 2025-03-29 13:33:46,216] Trial 17 finished with value: 0.0020554357369802793 and parameters: {'lstm_units': 112, 'lstm_layers': 2, 'lstm_learning_rate': 0.0013194086272617655, 'lstm_batch_size': 32, 'lstm_epochs': 20, 'rf_n_estimators': 100, 'rf_max_depth': 9, 'rf_min_samples_split': 4, 'rf_min_samples_leaf': 4, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 15.04 seconds
Random Forest Training Time: 0.17 seconds


[I 2025-03-29 13:33:59,104] Trial 18 finished with value: 0.002503708857998682 and parameters: {'lstm_units': 64, 'lstm_layers': 2, 'lstm_learning_rate': 0.00010036240527355147, 'lstm_batch_size': 64, 'lstm_epochs': 10, 'rf_n_estimators': 200, 'rf_max_depth': 5, 'rf_min_samples_split': 8, 'rf_min_samples_leaf': 3, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 11.80 seconds
Random Forest Training Time: 0.33 seconds


[I 2025-03-29 13:34:18,136] Trial 19 finished with value: 0.0019843956286892017 and parameters: {'lstm_units': 32, 'lstm_layers': 2, 'lstm_learning_rate': 0.005354301118049498, 'lstm_batch_size': 64, 'lstm_epochs': 20, 'rf_n_estimators': 150, 'rf_max_depth': 7, 'rf_min_samples_split': 5, 'rf_min_samples_leaf': 3, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 17.99 seconds
Random Forest Training Time: 0.27 seconds


[I 2025-03-29 13:34:56,792] Trial 20 finished with value: 0.0023365571236856923 and parameters: {'lstm_units': 48, 'lstm_layers': 5, 'lstm_learning_rate': 0.00015694538561655054, 'lstm_batch_size': 32, 'lstm_epochs': 40, 'rf_n_estimators': 200, 'rf_max_depth': 9, 'rf_min_samples_split': 3, 'rf_min_samples_leaf': 4, 'rf_max_features': 'log2'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 37.16 seconds
Random Forest Training Time: 0.36 seconds


[I 2025-03-29 13:35:11,873] Trial 21 finished with value: 0.0019049609423117445 and parameters: {'lstm_units': 32, 'lstm_layers': 2, 'lstm_learning_rate': 0.0037804131173137558, 'lstm_batch_size': 64, 'lstm_epochs': 40, 'rf_n_estimators': 200, 'rf_max_depth': 8, 'rf_min_samples_split': 9, 'rf_min_samples_leaf': 1, 'rf_max_features': 'sqrt'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 13.91 seconds
Random Forest Training Time: 0.35 seconds


[I 2025-03-29 13:35:24,829] Trial 22 finished with value: 0.0020595749800439355 and parameters: {'lstm_units': 32, 'lstm_layers': 2, 'lstm_learning_rate': 0.0013959740677776796, 'lstm_batch_size': 64, 'lstm_epochs': 20, 'rf_n_estimators': 200, 'rf_max_depth': 7, 'rf_min_samples_split': 7, 'rf_min_samples_leaf': 2, 'rf_max_features': 'sqrt'}. Best is trial 2 with value: 0.0018125109141041145.


LSTM Training Time: 11.81 seconds
Random Forest Training Time: 0.35 seconds


[I 2025-03-29 13:35:35,667] Trial 23 finished with value: 0.0017643995138562482 and parameters: {'lstm_units': 48, 'lstm_layers': 2, 'lstm_learning_rate': 0.0024060989777901134, 'lstm_batch_size': 64, 'lstm_epochs': 30, 'rf_n_estimators': 200, 'rf_max_depth': 9, 'rf_min_samples_split': 9, 'rf_min_samples_leaf': 1, 'rf_max_features': 'sqrt'}. Best is trial 23 with value: 0.0017643995138562482.


LSTM Training Time: 9.66 seconds
Random Forest Training Time: 0.35 seconds


[I 2025-03-29 13:35:49,060] Trial 24 finished with value: 0.0016432612768511104 and parameters: {'lstm_units': 48, 'lstm_layers': 2, 'lstm_learning_rate': 0.006522190446147382, 'lstm_batch_size': 64, 'lstm_epochs': 10, 'rf_n_estimators': 150, 'rf_max_depth': 10, 'rf_min_samples_split': 7, 'rf_min_samples_leaf': 2, 'rf_max_features': 'log2'}. Best is trial 24 with value: 0.0016432612768511104.


LSTM Training Time: 12.33 seconds
Random Forest Training Time: 0.28 seconds
Best hyperparameters: {'lstm_units': 48, 'lstm_layers': 2, 'lstm_learning_rate': 0.006522190446147382, 'lstm_batch_size': 64, 'lstm_epochs': 10, 'rf_n_estimators': 150, 'rf_max_depth': 10, 'rf_min_samples_split': 7, 'rf_min_samples_leaf': 2, 'rf_max_features': 'log2'}


ValueError: too many values to unpack (expected 2)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import LSTM, Dense
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time
import joblib

# Configure GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Restrict TensorFlow to only allocate required GPU memory
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(f"{len(gpus)} Physical GPUs, {len(logical_gpus)} Logical GPUs")
    except RuntimeError as e:
        print(e)

# Early stopping callback
from tensorflow.keras.callbacks import EarlyStopping

# Function to train LSTM model with GPU acceleration
def train_lstm(X_train, Y_train, X_val, Y_val, units, layers, learning_rate, batch_size, epochs):
    # Use strategy scope for multi-GPU support (if available)
    strategy = tf.distribute.MirroredStrategy()
    with strategy.scope():
        model = keras.Sequential()
        for _ in range(layers - 1):
            model.add(LSTM(units, return_sequences=True))
        model.add(LSTM(units))
        model.add(Dense(1))

        model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                     loss="mse")

    early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

    # Convert data to TensorFlow tensors
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
    train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
    val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    start_time = time.time()
    history = model.fit(train_dataset,
                       validation_data=val_dataset,
                       epochs=epochs,
                       verbose=1,  # Changed to verbose=1 to see training progress
                       callbacks=[early_stopping])
    train_time = time.time() - start_time

    return model, history, train_time

# Your best parameters from Optuna
best_params = {
    'lstm_units': 48,
    'lstm_layers': 2,
    'lstm_learning_rate': 0.006522190446147382,
    'lstm_batch_size': 64,
    'lstm_epochs': 10,
    'rf_n_estimators': 150,
    'rf_max_depth': 10,
    'rf_min_samples_split': 7,
    'rf_min_samples_leaf': 2,
    'rf_max_features': 'log2'
}

# Extract parameters
units = best_params["lstm_units"]
layers = best_params["lstm_layers"]
learning_rate = best_params["lstm_learning_rate"]
batch_size = best_params["lstm_batch_size"]
epochs = best_params["lstm_epochs"]
n_estimators = best_params["rf_n_estimators"]
max_depth = best_params["rf_max_depth"]
min_samples_split = best_params["rf_min_samples_split"]
min_samples_leaf = best_params["rf_min_samples_leaf"]
max_features = best_params["rf_max_features"]

# Reshape input for LSTM (assuming X_train, X_val, X_test are defined)
X_train_r = np.expand_dims(X_train, axis=-1)
X_val_r = np.expand_dims(X_val, axis=-1)
X_test_r = np.expand_dims(X_test, axis=-1)

# Initialize timing dictionary
timing_metrics = {}

# ========== LSTM Training ==========
print("\n=== Training LSTM Model with Best Parameters ===")
start_time = time.time()
final_lstm, _, lstm_train_time = train_lstm(
    X_train_r, Y_train, X_val_r, Y_val,
    units, layers, learning_rate, batch_size, epochs
)
timing_metrics['lstm_train'] = time.time() - start_time
print(f"LSTM Training Completed in {timing_metrics['lstm_train']:.2f} seconds")

# LSTM Predictions
print("\nGenerating LSTM predictions...")
start_time = time.time()
Y_train_pred_lstm = final_lstm.predict(X_train_r, verbose=0).flatten()
timing_metrics['lstm_train_pred'] = time.time() - start_time

start_time = time.time()
Y_val_pred_lstm = final_lstm.predict(X_val_r, verbose=0).flatten()
timing_metrics['lstm_val_pred'] = time.time() - start_time

start_time = time.time()
Y_test_pred_lstm = final_lstm.predict(X_test_r, verbose=0).flatten()
timing_metrics['lstm_test_pred'] = time.time() - start_time

# ========== Random Forest Preparation ==========
print("\nPreparing data for Random Forest...")
X_train_rf = np.column_stack([Y_train_pred_lstm])
X_val_rf = np.column_stack([Y_val_pred_lstm])
X_test_rf = np.column_stack([Y_test_pred_lstm])

rf_params = {
    "n_estimators": n_estimators,
    "max_depth": max_depth,
    "min_samples_split": min_samples_split,
    "min_samples_leaf": min_samples_leaf,
    "max_features": max_features,
    "random_state": 42,
    "n_jobs": -1
}

# ========== Random Forest Training ==========
print("\n=== Training Random Forest Model ===")
start_time = time.time()
final_rf = RandomForestRegressor(**rf_params)
final_rf.fit(X_train_rf, Y_train)
timing_metrics['rf_train'] = time.time() - start_time
print(f"Random Forest Training Completed in {timing_metrics['rf_train']:.2f} seconds")

# Random Forest Predictions
print("\nGenerating Random Forest predictions...")
start_time = time.time()
Y_train_pred_rf = final_rf.predict(X_train_rf)
timing_metrics['rf_train_pred'] = time.time() - start_time

start_time = time.time()
Y_val_pred_rf = final_rf.predict(X_val_rf)
timing_metrics['rf_val_pred'] = time.time() - start_time

start_time = time.time()
Y_test_pred_rf = final_rf.predict(X_test_rf)
timing_metrics['rf_test_pred'] = time.time() - start_time

# ========== Evaluation ==========
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / np.abs(y_true))) * 100  # Robust MAPE
    return mae, mse, rmse, r2, mape

print("\n=== Calculating Metrics ===")
metrics_train = compute_metrics(Y_train, Y_train_pred_rf)
metrics_val = compute_metrics(Y_val, Y_val_pred_rf)
metrics_test = compute_metrics(Y_test, Y_test_pred_rf)

# ========== Feature Importance ==========
print("\n=== Random Forest Feature Importance ===")
importances = final_rf.feature_importances_
for i, importance in enumerate(importances):
    print(f"Feature {i+1} (from LSTM): {importance:.4f}")

# ========== Results ==========
print("\n=== Final Results ===")
print("\nPerformance Metrics:")
print(f"{'Dataset':<15} {'MAE':<10} {'MSE':<10} {'RMSE':<10} {'R²':<10} {'MAPE':<10}")
print(f"{'Train':<15} {metrics_train[0]:<10.4f} {metrics_train[1]:<10.4f} {metrics_train[2]:<10.4f} {metrics_train[3]:<10.4f} {metrics_train[4]:<10.2f}%")
print(f"{'Validation':<15} {metrics_val[0]:<10.4f} {metrics_val[1]:<10.4f} {metrics_val[2]:<10.4f} {metrics_val[3]:<10.4f} {metrics_val[4]:<10.2f}%")
print(f"{'Test':<15} {metrics_test[0]:<10.4f} {metrics_test[1]:<10.4f} {metrics_test[2]:<10.4f} {metrics_test[3]:<10.4f} {metrics_test[4]:<10.2f}%")

print("\nTiming Metrics (seconds):")
print(f"{'Phase':<25} {'Time':<10}")
print(f"{'LSTM Training':<25} {timing_metrics['lstm_train']:<10.2f}")
print(f"{'LSTM Train Pred':<25} {timing_metrics['lstm_train_pred']:<10.4f}")
print(f"{'LSTM Val Pred':<25} {timing_metrics['lstm_val_pred']:<10.4f}")
print(f"{'LSTM Test Pred':<25} {timing_metrics['lstm_test_pred']:<10.4f}")
print(f"{'Random Forest Training':<25} {timing_metrics['rf_train']:<10.2f}")
print(f"{'RF Train Pred':<25} {timing_metrics['rf_train_pred']:<10.4f}")
print(f"{'RF Val Pred':<25} {timing_metrics['rf_val_pred']:<10.4f}")
print(f"{'RF Test Pred':<25} {timing_metrics['rf_test_pred']:<10.4f}")



1 Physical GPUs, 1 Logical GPUs

=== Training LSTM Model with Best Parameters ===
Epoch 1/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 0.0089 - val_loss: 0.0048
Epoch 2/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - loss: 0.1075 - val_loss: 0.0061
Epoch 3/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0255 - val_loss: 0.0074
Epoch 4/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0018 - val_loss: 0.0094
Epoch 5/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0227 - val_loss: 0.0043
Epoch 6/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0013 - val_loss: 0.0125
Epoch 7/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0170 - val_loss: 0.0030
Epoch 8/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m

## BOHB

In [14]:
!pip install ConfigSpace

Collecting ConfigSpace
  Downloading configspace-1.2.1.tar.gz (130 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/131.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.0/131.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: ConfigSpace
  Building wheel for ConfigSpace (pyproject.toml) ... [?25l[?25hdone
  Created wheel for ConfigSpace: filename=configspace-1.2.1-py3-none-any.whl size=115990 sha256=fa9bfc6a7426fcc65565fdeb7ed195773710ff6db091e6f4c947a3df726e7956
  Stored in directory: /root/.cache/pip/wheels/11/0f/36/d5027c3eeb038827889830f7efbe6a1bad8956b3eb44ab2f44
Successfully built ConfigSpace
Installing collected packages: ConfigSpace
Successfully installed ConfigSpace-1.2.1


In [15]:
!pip install hpbandster

Collecting hpbandster
  Downloading hpbandster-0.7.4.tar.gz (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.3/51.3 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting Pyro4 (from hpbandster)
  Downloading Pyro4-4.82-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting serpent (from hpbandster)
  Downloading serpent-1.41-py3-none-any.whl.metadata (5.8 kB)
Collecting netifaces (from hpbandster)
  Downloading netifaces-0.11.0.tar.gz (30 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading Pyro4-4.82-py2.py3-none-any.whl (89 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading serpent-1.41-py3-none-any.whl (9.6 kB)
Building wheels for collected packages: hpbandster, netifaces
  Building whe

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
import hpbandster.core.nameserver as hpns
from hpbandster.optimizers import BOHB
from hpbandster.core.worker import Worker
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
import time

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    return mae, mse, rmse, r2, mape

# Convert datasets to PyTorch tensors and move to GPU
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# LSTM Configurations
lstm_layers = [2, 3, 5]
hidden_dim = 64
output_dim = 1
input_dim = X_train.shape[1]

# Dictionary to store LSTM feature representations
lstm_features = []

for num_layers in lstm_layers:
    print(f"Training LSTM with {num_layers} layers...")

    lstm_model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)
    num_epochs = 35

    start_time = time.time()
    for epoch in range(num_epochs):
        lstm_model.train()
        optimizer.zero_grad()
        outputs = lstm_model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()
    train_time = time.time() - start_time

    # Extract Feature Representations
    lstm_model.eval()
    with torch.no_grad():
        val_start = time.time()
        train_features = lstm_model(X_train_torch).cpu().numpy()
        val_features = lstm_model(X_val_torch).cpu().numpy()
        val_time = time.time() - val_start

        test_start = time.time()
        test_features = lstm_model(X_test_torch).cpu().numpy()
        test_time = time.time() - test_start

    lstm_features.append((train_features, val_features, test_features, train_time, val_time, test_time))

# Concatenate Features from All Layers
final_train_features = np.hstack([feat[0] for feat in lstm_features])
final_val_features = np.hstack([feat[1] for feat in lstm_features])
final_test_features = np.hstack([feat[2] for feat in lstm_features])

# Record Time for Each Stage
total_train_time = sum([feat[3] for feat in lstm_features])
total_val_time = sum([feat[4] for feat in lstm_features])
total_test_time = sum([feat[5] for feat in lstm_features])

# Define ConfigSpace for BOHB
def get_config_space():
    cs = CS.ConfigurationSpace()
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("n_estimators", 50, 500, default_value=100))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("max_depth", 3, 15, default_value=6))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("min_samples_split", 2, 10, default_value=2))
    cs.add_hyperparameter(CSH.UniformIntegerHyperparameter("min_samples_leaf", 1, 5, default_value=1))
    return cs

# BOHB Worker for Random Forest
class RFWorker(Worker):
    def compute(self, config, budget, **kwargs):
        model = RandomForestRegressor(
            n_estimators=config["n_estimators"],
            max_depth=config["max_depth"],
            min_samples_split=config["min_samples_split"],
            min_samples_leaf=config["min_samples_leaf"],
            random_state=42
        )
        model.fit(final_train_features, Y_train)
        Y_val_pred = model.predict(final_val_features)
        mae = mean_absolute_error(Y_val, Y_val_pred)
        return {"loss": mae, "info": config}

# Run BOHB
NS = hpns.NameServer(run_id="lstm_rf_bohb", host="127.0.0.2", port=None)
NS.start()

worker = RFWorker(nameserver="127.0.0.2", run_id="lstm_rf_bohb")
worker.run(background=True)

bohb = BOHB(configspace=get_config_space(), run_id="lstm_rf_bohb", nameserver="127.0.0.2", min_budget=1, max_budget=3)
res = bohb.run(n_iterations=50)
bohb.shutdown()
NS.shutdown()

# Train Best Random Forest Model
best_config = res.get_incumbent_id()
best_params = res.get_id2config_mapping()[best_config]["config"]

best_rf_model = RandomForestRegressor(
    n_estimators=best_params["n_estimators"],
    max_depth=best_params["max_depth"],
    min_samples_split=best_params["min_samples_split"],
    min_samples_leaf=best_params["min_samples_leaf"],
    random_state=42
)

best_rf_model.fit(final_train_features, Y_train)

# Predictions
Y_train_pred = best_rf_model.predict(final_train_features)
Y_val_pred = best_rf_model.predict(final_val_features)
Y_test_pred = best_rf_model.predict(final_test_features)

# Calculate Metrics
train_metrics = calculate_metrics(Y_train, Y_train_pred)
val_metrics = calculate_metrics(Y_val, Y_val_pred)
test_metrics = calculate_metrics(Y_test, Y_test_pred)

# Print Results
print("Train Metrics:", train_metrics, "Time:", total_train_time)
print("Validation Metrics:", val_metrics, "Time:", total_val_time)
print("Test Metrics:", test_metrics, "Time:", total_test_time)


Training LSTM with 2 layers...
Training LSTM with 3 layers...
Training LSTM with 5 layers...
Train Metrics: (0.0019636370108342942, 7.231082957958517e-06, 0.0026890673026085674, 0.9999586844853037, 0.6290564566595542) Time: 32.42417860031128
Validation Metrics: (0.13746071182242878, 0.02457433881973903, 0.15676204521420048, -3.178212588068127, 7.694290811851231) Time: 0.47695016860961914
Test Metrics: (0.4057919015290884, 0.17070213773759144, 0.4131611522609446, -27.285028455046948, 20.01092301865549) Time: 0.07733273506164551
