## Initial Code

In [None]:
# Importing necessary libraries for data analysis and manipulation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# For handling warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
df_aapl = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/stocks/AAPL.csv')

In [None]:
import numpy as np
from scipy.stats import boxcox

df_aapl['Close_log'] = np.log(df_aapl['Close'] + 1)
df_aapl['Close_sqrt'] = np.sqrt(df_aapl['Close'])
df_aapl['Close_boxcox'], _ = boxcox(df_aapl['Close'] + 1)


This code calculates the skewness of the 'Close' column in the `df_aapl` DataFrame before and after applying various transformations:

1. **Original Skewness**: Calculates the skewness of the original 'Close' data.
2. **Log Transformation Skewness**: Calculates the skewness of the 'Close_log' column after applying the log transformation.
3. **Square Root Transformation Skewness**: Calculates the skewness of the 'Close_sqrt' column after applying the square root transformation.
4. **Box-Cox Transformation Skewness**: Calculates the skewness of the 'Close_boxcox' column after applying the Box-Cox transformation.

The printed results help assess how each transformation affects the distribution's symmetry and the success of skewness correction.







In [None]:

skew_original = df_aapl['Close'].skew()
skew_log = df_aapl['Close_log'].skew()
skew_sqrt = df_aapl['Close_sqrt'].skew()
skew_boxcox = pd.Series(df_aapl['Close_boxcox']).skew()

print(f"Original Skewness: {skew_original}")
print(f"Log Transformation Skewness: {skew_log}")
print(f"Square Root Transformation Skewness: {skew_sqrt}")
print(f"Box-Cox Transformation Skewness: {skew_boxcox}")


Original Skewness: 2.5045276102319933
Log Transformation Skewness: 0.8535555176510308
Square Root Transformation Skewness: 1.6211545809555206
Box-Cox Transformation Skewness: 0.4352746472149233


In [None]:

df_aapl['Open_log'] = np.log(df_aapl['Open'])
df_aapl['High_log'] = np.log(df_aapl['High'])
df_aapl['Low_log'] = np.log(df_aapl['Low'])
df_aapl['Adj Close_log'] = np.log(df_aapl['Adj Close'])
df_aapl['Volume_log'] = np.log(df_aapl['Volume'])


df_aapl['Open_sqrt'] = np.sqrt(df_aapl['Open'])
df_aapl['High_sqrt'] = np.sqrt(df_aapl['High'])
df_aapl['Low_sqrt'] = np.sqrt(df_aapl['Low'])
df_aapl['Adj Close_sqrt'] = np.sqrt(df_aapl['Adj Close'])
df_aapl['Volume_sqrt'] = np.sqrt(df_aapl['Volume'])

from scipy.stats import boxcox
df_aapl['Open_boxcox'], _ = boxcox(df_aapl['Open'])
df_aapl['High_boxcox'], _ = boxcox(df_aapl['High'])
df_aapl['Low_boxcox'], _ = boxcox(df_aapl['Low'])
df_aapl['Adj Close_boxcox'], _ = boxcox(df_aapl['Adj Close'])

This helps compare how the transformations reduce skewness in the data, aiming for a more normal distribution.

In [None]:

skewness_before = df_aapl[['Open', 'High', 'Low', 'Adj Close', 'Volume']].skew()
skewness_after = df_aapl[['Open_log', 'High_log', 'Low_log', 'Adj Close_log',
                          'Open_sqrt', 'High_sqrt', 'Low_sqrt', 'Adj Close_sqrt', 'Volume_sqrt',
                          'Open_boxcox', 'High_boxcox', 'Low_boxcox', 'Adj Close_boxcox']].skew()

print("Skewness Before Transformation:\n", skewness_before)
print("\nSkewness After Transformation:\n", skewness_after)


Skewness Before Transformation:
 Open         2.504632
High         2.502208
Low          2.506714
Adj Close    2.550677
Volume       3.565699
dtype: float64

Skewness After Transformation:
 Open_log            0.482872
High_log            0.481997
Low_log             0.484246
Adj Close_log       0.494009
Open_sqrt           1.620771
High_sqrt           1.621456
Low_sqrt            1.620661
Adj Close_sqrt      1.679402
Volume_sqrt         1.299776
Open_boxcox         0.181226
High_boxcox         0.179749
Low_boxcox          0.182882
Adj Close_boxcox    0.180085
dtype: float64


- Applied Box-Cox transformation to the 'Open', 'High', 'Low', 'Adj Close', and 'Close' columns.
- Recalculated skewness after the transformation to reduce skew and normalize the data for modeling.

In [None]:
from scipy import stats

df_aapl['Open_boxcox'], _ = stats.boxcox(df_aapl['Open'] + 1)
df_aapl['High_boxcox'], _ = stats.boxcox(df_aapl['High'] + 1)
df_aapl['Low_boxcox'], _ = stats.boxcox(df_aapl['Low'] + 1)
df_aapl['Adj Close_boxcox'], _ = stats.boxcox(df_aapl['Adj Close'] + 1)
df_aapl['Close_boxcox'], _ = stats.boxcox(df_aapl['Close'] + 1)

skewness_after_boxcox = df_aapl[['Open_boxcox', 'High_boxcox', 'Low_boxcox', 'Adj Close_boxcox', 'Close_boxcox']].skew()

print("Skewness After Box-Cox Transformation:")
print(skewness_after_boxcox)


Skewness After Box-Cox Transformation:
Open_boxcox         0.435237
High_boxcox         0.433381
Low_boxcox          0.437331
Adj Close_boxcox    0.458762
Close_boxcox        0.435275
dtype: float64


Feature Selection

In [None]:

df_aapl_cleaned = df_aapl[['Date', 'Open', 'High', 'Low', 'Adj Close', 'Close', 'Volume',
                           'Open_boxcox', 'High_boxcox', 'Low_boxcox', 'Adj Close_boxcox',
                           'Close_boxcox']]

print(df_aapl_cleaned.head())


         Date      Open      High       Low  Adj Close     Close     Volume  \
0  1980-12-12  0.128348  0.128906  0.128348   0.098943  0.128348  469033600   
1  1980-12-15  0.122210  0.122210  0.121652   0.093781  0.121652  175884800   
2  1980-12-16  0.113281  0.113281  0.112723   0.086898  0.112723  105728000   
3  1980-12-17  0.115513  0.116071  0.115513   0.089049  0.115513   86441600   
4  1980-12-18  0.118862  0.119420  0.118862   0.091630  0.118862   73449600   

   Open_boxcox  High_boxcox  Low_boxcox  Adj Close_boxcox  Close_boxcox  
0     0.117689     0.118173    0.117674          0.092374      0.117689  
1     0.112503     0.112516    0.112016          0.087857      0.112030  
2     0.104886     0.104897    0.104395          0.081785      0.104407  
3     0.106798     0.107287    0.106786          0.083688      0.106798  
4     0.109657     0.110145    0.109644          0.085966      0.109657  


### Train Validation Test Split

The code splits the data into training, validation, and test sets. The features `X` and target `Y` are split as follows:

- 70% for training (`X_train`, `Y_train`)
- 15% for validation (`X_val`, `Y_val`)
- 15% for testing (`X_test`, `Y_test`)

The split is done using a 30% test size, followed by splitting the remaining 70% into validation and test sets without shuffling (time series data).

In [None]:
from sklearn.model_selection import train_test_split

X = df_aapl_cleaned[['Open_boxcox', 'High_boxcox', 'Low_boxcox']]
Y = df_aapl_cleaned['Close_boxcox']

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, shuffle=False)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, shuffle=False)

print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}, Test set: {X_test.shape}")


Training set: (7736, 3), Validation set: (1658, 3), Test set: (1658, 3)


## GPU Activation

In [None]:
import torch

# Check GPU status
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is enabled:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("No GPU found, using CPU.")


GPU is enabled: Tesla T4


## ANN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [None]:
# Assuming X_train, X_val, X_test, Y_train, Y_val, Y_test are already defined
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).to(device).view(-1, 1)

X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).to(device).view(-1, 1)

X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).to(device).view(-1, 1)


In [None]:
class ANN(nn.Module):
    def __init__(self, input_size):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)  # Dropout to prevent overfitting

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout
        x = self.relu(self.fc2(x))
        x = self.fc3(x)  # No activation for regression
        return x

# Initialize model
model = ANN(input_size=X_train.shape[1]).to(device)


In [None]:
criterion = nn.MSELoss()  # MSE for regression
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # L2 Regularization


In [None]:
epochs = 200
patience = 10  # Stop training if no improvement after 10 epochs
best_val_loss = float("inf")
counter = 0

for epoch in range(epochs):
    # Training
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_torch)
        val_loss = criterion(val_outputs, Y_val_torch)

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.6f}, Val Loss: {val_loss.item():.6f}")


Epoch 1/200, Loss: 0.193401, Val Loss: 1.567056
Epoch 2/200, Loss: 0.174322, Val Loss: 1.432983
Epoch 3/200, Loss: 0.154621, Val Loss: 1.306110
Epoch 4/200, Loss: 0.138142, Val Loss: 1.183969
Epoch 5/200, Loss: 0.123961, Val Loss: 1.063381
Epoch 6/200, Loss: 0.109427, Val Loss: 0.945019
Epoch 7/200, Loss: 0.099343, Val Loss: 0.827867
Epoch 8/200, Loss: 0.086156, Val Loss: 0.715117
Epoch 9/200, Loss: 0.077368, Val Loss: 0.610022
Epoch 10/200, Loss: 0.069780, Val Loss: 0.518160
Epoch 11/200, Loss: 0.062221, Val Loss: 0.434913
Epoch 12/200, Loss: 0.058163, Val Loss: 0.359932
Epoch 13/200, Loss: 0.053779, Val Loss: 0.294038
Epoch 14/200, Loss: 0.048779, Val Loss: 0.237188
Epoch 15/200, Loss: 0.047834, Val Loss: 0.189271
Epoch 16/200, Loss: 0.044631, Val Loss: 0.149880
Epoch 17/200, Loss: 0.043444, Val Loss: 0.118095
Epoch 18/200, Loss: 0.042534, Val Loss: 0.093086
Epoch 19/200, Loss: 0.040575, Val Loss: 0.074017
Epoch 20/200, Loss: 0.038927, Val Loss: 0.059875
Epoch 21/200, Loss: 0.037575,

In [None]:
def evaluate_model(model, X, Y, set_name):
    model.eval()
    with torch.no_grad():
        predictions = model(X).cpu().numpy()
        Y_true = Y.cpu().numpy()

    mae = mean_absolute_error(Y_true, predictions)
    mse = mean_squared_error(Y_true, predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(Y_true, predictions)
    mape = np.mean(np.abs((Y_true - predictions) / Y_true)) * 100  # MAPE in %

    print(f"{set_name} Metrics: MAE={mae:.4f}, MSE={mse:.6f}, RMSE={rmse:.4f}, R²={r2:.4f}, MAPE={mape:.2f}%")

# Evaluate on all sets
evaluate_model(model, X_train_torch, Y_train_torch, "Train")
evaluate_model(model, X_val_torch, Y_val_torch, "Validation")
evaluate_model(model, X_test_torch, Y_test_torch, "Test")


Train Metrics: MAE=0.0206, MSE=0.000493, RMSE=0.0222, R²=0.9972, MAPE=7.42%
Validation Metrics: MAE=0.0408, MSE=0.001672, RMSE=0.0409, R²=0.7157, MAPE=2.34%
Test Metrics: MAE=0.0442, MSE=0.001957, RMSE=0.0442, R²=0.6757, MAPE=2.19%


In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.0-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.4/383.4 kB[0m [31m33.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.1-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.6/233.6 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.9-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import optuna
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).to(device).view(-1, 1)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).to(device).view(-1, 1)

# Define the model class
class ANN(nn.Module):
    def __init__(self, input_size, hidden1, hidden2, dropout_rate):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden1)
        self.fc2 = nn.Linear(hidden1, hidden2)
        self.fc3 = nn.Linear(hidden2, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)  # No activation for regression
        return x

# Define the objective function for Optuna
def objective(trial):
    # Hyperparameters to tune
    hidden1 = trial.suggest_int("hidden1", 32, 128, step=16)
    hidden2 = trial.suggest_int("hidden2", 16, 64, step=16)
    dropout_rate = trial.suggest_float("dropout", 0.1, 0.5, step=0.1)
    lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)

    # Initialize model
    model = ANN(X_train.shape[1], hidden1, hidden2, dropout_rate).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Training loop
    best_val_loss = float("inf")
    patience = 10
    counter = 0

    for epoch in range(200):  # Max 200 epochs
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val_torch)
            val_loss = criterion(val_outputs, Y_val_torch)

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                break

    return best_val_loss.item()  # Optuna minimizes this

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)  # Run 50 trials

# Print best parameters
print("Best Hyperparameters:", study.best_params)

# Train final model with best hyperparameters
best_params = study.best_params
final_model = ANN(X_train.shape[1], best_params["hidden1"], best_params["hidden2"], best_params["dropout"]).to(device)
optimizer = optim.Adam(final_model.parameters(), lr=best_params["lr"], weight_decay=best_params["weight_decay"])

# Train with best params
for epoch in range(200):
    final_model.train()
    optimizer.zero_grad()
    outputs = final_model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()

    # Validation
    final_model.eval()
    with torch.no_grad():
        val_outputs = final_model(X_val_torch)
        val_loss = criterion(val_outputs, Y_val_torch)

# Evaluation function
def evaluate_model(model, X, Y, set_name):
    model.eval()
    with torch.no_grad():
        predictions = model(X).cpu().numpy()
        Y_true = Y.cpu().numpy()

    mae = mean_absolute_error(Y_true, predictions)
    mse = mean_squared_error(Y_true, predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(Y_true, predictions)
    mape = np.mean(np.abs((Y_true - predictions) / Y_true)) * 100  # MAPE in %

    print(f"{set_name} Metrics: MAE={mae:.4f}, MSE={mse:.6f}, RMSE={rmse:.4f}, R²={r2:.4f}, MAPE={mape:.2f}%")

# Evaluate on all sets
evaluate_model(final_model, X_train_torch, Y_train_torch, "Train")
evaluate_model(final_model, X_val_torch, Y_val_torch, "Validation")
evaluate_model(final_model, X_test_torch, Y_test_torch, "Test")


[I 2025-02-09 18:27:56,312] A new study created in memory with name: no-name-48266d59-3bed-406d-bcd3-02a79785f701


Using device: cuda


[I 2025-02-09 18:27:56,618] Trial 0 finished with value: 0.054957929998636246 and parameters: {'hidden1': 64, 'hidden2': 16, 'dropout': 0.2, 'lr': 0.0006240698846171853, 'weight_decay': 1.3840009879062286e-06}. Best is trial 0 with value: 0.054957929998636246.
[I 2025-02-09 18:27:57,437] Trial 1 finished with value: 0.4436366558074951 and parameters: {'hidden1': 48, 'hidden2': 16, 'dropout': 0.4, 'lr': 0.00012158855295734993, 'weight_decay': 0.00034447228971052127}. Best is trial 0 with value: 0.054957929998636246.
[I 2025-02-09 18:27:57,541] Trial 2 finished with value: 1.435631656931946e-05 and parameters: {'hidden1': 32, 'hidden2': 64, 'dropout': 0.30000000000000004, 'lr': 0.004215798965438718, 'weight_decay': 1.3604481381637065e-05}. Best is trial 2 with value: 1.435631656931946e-05.
[I 2025-02-09 18:27:57,879] Trial 3 finished with value: 0.0006602357607334852 and parameters: {'hidden1': 64, 'hidden2': 32, 'dropout': 0.1, 'lr': 0.00046195114171576157, 'weight_decay': 0.00030945186

Best Hyperparameters: {'hidden1': 128, 'hidden2': 64, 'dropout': 0.2, 'lr': 0.00030251631116183893, 'weight_decay': 1.6486940906465844e-06}
Train Metrics: MAE=0.0110, MSE=0.000155, RMSE=0.0124, R²=0.9991, MAPE=4.56%
Validation Metrics: MAE=0.0108, MSE=0.000123, RMSE=0.0111, R²=0.9791, MAPE=0.62%
Test Metrics: MAE=0.0127, MSE=0.000163, RMSE=0.0128, R²=0.9730, MAPE=0.63%


## DNN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Convert Data to Tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).to(device).view(-1, 1)

X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).to(device).view(-1, 1)

X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).to(device).view(-1, 1)


# Define Deep Neural Network Model
class DNN(nn.Module):
    def __init__(self, input_size):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)  # First hidden layer
        self.fc2 = nn.Linear(128, 64)  # Second hidden layer
        self.fc3 = nn.Linear(64, 32)  # Third hidden layer
        self.fc4 = nn.Linear(32, 1)  # Output layer

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)  # Regularization

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.fc4(x)  # No activation for regression
        return x


# Initialize Model
model = DNN(input_size=X_train.shape[1]).to(device)

# Loss Function & Optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

# Training Loop
epochs = 200
patience = 10  # Early stopping patience
best_val_loss = float("inf")
counter = 0

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_torch)
        val_loss = criterion(val_outputs, Y_val_torch)

    # Early stopping logic
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.6f}, Val Loss: {val_loss.item():.6f}")



Using device: cuda
Epoch 1/200, Loss: 0.552473, Val Loss: 3.336216
Epoch 2/200, Loss: 0.503372, Val Loss: 3.180955
Epoch 3/200, Loss: 0.463147, Val Loss: 3.012921
Epoch 4/200, Loss: 0.427143, Val Loss: 2.883939
Epoch 5/200, Loss: 0.399188, Val Loss: 2.767354
Epoch 6/200, Loss: 0.372178, Val Loss: 2.633403
Epoch 7/200, Loss: 0.349387, Val Loss: 2.489299
Epoch 8/200, Loss: 0.324809, Val Loss: 2.341400
Epoch 9/200, Loss: 0.300863, Val Loss: 2.184827
Epoch 10/200, Loss: 0.275835, Val Loss: 2.028053
Epoch 11/200, Loss: 0.252260, Val Loss: 1.867168
Epoch 12/200, Loss: 0.226817, Val Loss: 1.707970
Epoch 13/200, Loss: 0.203228, Val Loss: 1.544718
Epoch 14/200, Loss: 0.178215, Val Loss: 1.378437
Epoch 15/200, Loss: 0.154886, Val Loss: 1.203386
Epoch 16/200, Loss: 0.131977, Val Loss: 1.029333
Epoch 17/200, Loss: 0.109680, Val Loss: 0.859390
Epoch 18/200, Loss: 0.092941, Val Loss: 0.696841
Epoch 19/200, Loss: 0.074885, Val Loss: 0.544018
Epoch 20/200, Loss: 0.062107, Val Loss: 0.400039
Epoch 21/2

In [None]:
# Function to Evaluate Model
def evaluate_model(model, X, Y, set_name):
    model.eval()
    with torch.no_grad():
        predictions = model(X).cpu().numpy()
        Y_true = Y.cpu().numpy()

    mae = mean_absolute_error(Y_true, predictions)
    mse = mean_squared_error(Y_true, predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(Y_true, predictions)
    mape = np.mean(np.abs((Y_true - predictions) / Y_true)) * 100  # MAPE in %

    print(f"{set_name} Metrics: MAE={mae:.4f}, MSE={mse:.6f}, RMSE={rmse:.4f}, R²={r2:.4f}, MAPE={mape:.2f}%")

# Evaluate on Train, Validation, and Test sets
evaluate_model(model, X_train_torch, Y_train_torch, "Train")
evaluate_model(model, X_val_torch, Y_val_torch, "Validation")
evaluate_model(model, X_test_torch, Y_test_torch, "Test")


Train Metrics: MAE=0.0867, MSE=0.012322, RMSE=0.1110, R²=0.9296, MAPE=36.21%
Validation Metrics: MAE=0.2946, MSE=0.086972, RMSE=0.2949, R²=-13.7872, MAPE=16.87%
Test Metrics: MAE=0.3399, MSE=0.115678, RMSE=0.3401, R²=-18.1677, MAPE=16.86%


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import optuna
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Convert data to tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).to(device).view(-1, 1)

X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).to(device).view(-1, 1)

X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).to(device).view(-1, 1)


# Define DNN model with variable layers
class DNN(nn.Module):
    def __init__(self, input_size, hidden_layers, hidden_units, dropout_rate):
        super(DNN, self).__init__()
        layers = []

        # Input Layer
        layers.append(nn.Linear(input_size, hidden_units))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout_rate))

        # Hidden Layers
        for _ in range(hidden_layers - 1):
            layers.append(nn.Linear(hidden_units, hidden_units))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))

        # Output Layer
        layers.append(nn.Linear(hidden_units, 1))

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


# Define the Optuna optimization function
def objective(trial):
    hidden_layers = trial.suggest_int("hidden_layers", 3, 6)  # Number of hidden layers
    hidden_units = trial.suggest_int("hidden_units", 64, 256)  # Neurons per layer
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)  # Dropout
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)  # Learning rate
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)  # L2 regularization

    model = DNN(input_size=X_train.shape[1], hidden_layers=hidden_layers, hidden_units=hidden_units, dropout_rate=dropout_rate).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    epochs = 100
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()

        # Validation Loss
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val_torch)
            val_loss = criterion(val_outputs, Y_val_torch)

    return val_loss.item()


# Run Optuna optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30)

# Get best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)


# Train the best model
best_model = DNN(
    input_size=X_train.shape[1],
    hidden_layers=best_params["hidden_layers"],
    hidden_units=best_params["hidden_units"],
    dropout_rate=best_params["dropout_rate"]
).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(best_model.parameters(), lr=best_params["lr"], weight_decay=best_params["weight_decay"])

epochs = 200
for epoch in range(epochs):
    best_model.train()
    optimizer.zero_grad()
    outputs = best_model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()


# Function to evaluate model
def evaluate_model(model, X, Y, set_name):
    model.eval()
    with torch.no_grad():
        predictions = model(X).cpu().numpy()
        Y_true = Y.cpu().numpy()

    mae = mean_absolute_error(Y_true, predictions)
    mse = mean_squared_error(Y_true, predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(Y_true, predictions)
    mape = np.mean(np.abs((Y_true - predictions) / Y_true)) * 100

    print(f"{set_name} Metrics: MAE={mae:.4f}, MSE={mse:.6f}, RMSE={rmse:.4f}, R²={r2:.4f}, MAPE={mape:.2f}%")


# Evaluate model on train, validation, and test sets
evaluate_model(best_model, X_train_torch, Y_train_torch, "Train")
evaluate_model(best_model, X_val_torch, Y_val_torch, "Validation")
evaluate_model(best_model, X_test_torch, Y_test_torch, "Test")


[I 2025-02-09 18:50:06,802] A new study created in memory with name: no-name-d1881fe6-ce52-4e40-bf00-52b20042325b


Using device: cuda


[I 2025-02-09 18:50:07,343] Trial 0 finished with value: 0.0004791483806911856 and parameters: {'hidden_layers': 3, 'hidden_units': 172, 'dropout_rate': 0.11968700455527817, 'lr': 0.004115761476855755, 'weight_decay': 0.0009031311184265404}. Best is trial 0 with value: 0.0004791483806911856.
[I 2025-02-09 18:50:07,834] Trial 1 finished with value: 0.21277083456516266 and parameters: {'hidden_layers': 3, 'hidden_units': 134, 'dropout_rate': 0.4351985972833001, 'lr': 0.0016581273967353502, 'weight_decay': 1.583172324321955e-05}. Best is trial 0 with value: 0.0004791483806911856.
[I 2025-02-09 18:50:08,558] Trial 2 finished with value: 0.028164507821202278 and parameters: {'hidden_layers': 4, 'hidden_units': 256, 'dropout_rate': 0.2076410698400221, 'lr': 0.0005208788093962377, 'weight_decay': 1.2171625953605177e-06}. Best is trial 0 with value: 0.0004791483806911856.
[I 2025-02-09 18:50:09,295] Trial 3 finished with value: 2.530266284942627 and parameters: {'hidden_layers': 4, 'hidden_uni

Best Hyperparameters: {'hidden_layers': 3, 'hidden_units': 172, 'dropout_rate': 0.11968700455527817, 'lr': 0.004115761476855755, 'weight_decay': 0.0009031311184265404}
Train Metrics: MAE=0.0147, MSE=0.000329, RMSE=0.0181, R²=0.9981, MAPE=4.52%
Validation Metrics: MAE=0.0647, MSE=0.004235, RMSE=0.0651, R²=0.2799, MAPE=3.69%
Test Metrics: MAE=0.0888, MSE=0.007929, RMSE=0.0890, R²=-0.3138, MAPE=4.40%


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import optuna
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).to(device).view(-1, 1)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).to(device).view(-1, 1)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).to(device).view(-1, 1)

# Define DNN model with batch normalization
class DNN(nn.Module):
    def __init__(self, input_size, hidden_layers, hidden_units, dropout_rate):
        super(DNN, self).__init__()
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(input_size, hidden_units))
        self.layers.append(nn.BatchNorm1d(hidden_units))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(dropout_rate))

        for _ in range(hidden_layers - 1):
            self.layers.append(nn.Linear(hidden_units, hidden_units))
            self.layers.append(nn.BatchNorm1d(hidden_units))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(dropout_rate))

        self.layers.append(nn.Linear(hidden_units, 1))

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

# Define function to train and evaluate model
def objective(trial):
    hidden_layers = trial.suggest_int("hidden_layers", 2, 5)
    hidden_units = trial.suggest_int("hidden_units", 100, 150)
    dropout_rate = trial.suggest_float("dropout_rate", 0.2, 0.4)
    lr = trial.suggest_float("lr", 0.0005, 0.0015, log=True)
    weight_decay = trial.suggest_float("weight_decay", 0.001, 0.01, log=True)

    model = DNN(X_train.shape[1], hidden_layers, hidden_units, dropout_rate).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5)

    best_val_loss = float("inf")
    patience = 10
    counter = 0

    for epoch in range(200):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_torch)
        loss = criterion(outputs, Y_train_torch)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val_torch)
            val_loss = criterion(val_outputs, Y_val_torch)

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                break

    return best_val_loss.item()

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

# Get best parameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train best model
best_model = DNN(X_train.shape[1], best_params['hidden_layers'], best_params['hidden_units'], best_params['dropout_rate']).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(best_model.parameters(), lr=best_params['lr'], weight_decay=best_params['weight_decay'])

for epoch in range(200):
    best_model.train()
    optimizer.zero_grad()
    outputs = best_model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)
    loss.backward()
    optimizer.step()

# Evaluate function
def evaluate_model(model, X, Y, set_name):
    model.eval()
    with torch.no_grad():
        predictions = model(X).cpu().numpy()
        Y_true = Y.cpu().numpy()

    mae = mean_absolute_error(Y_true, predictions)
    mse = mean_squared_error(Y_true, predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(Y_true, predictions)
    mape = np.mean(np.abs((Y_true - predictions) / Y_true)) * 100

    print(f"{set_name} Metrics: MAE={mae:.4f}, MSE={mse:.6f}, RMSE={rmse:.4f}, R²={r2:.4f}, MAPE={mape:.2f}%")

# Evaluate model
evaluate_model(best_model, X_train_torch, Y_train_torch, "Train")
evaluate_model(best_model, X_val_torch, Y_val_torch, "Validation")
evaluate_model(best_model, X_test_torch, Y_test_torch, "Test")


[I 2025-02-09 18:53:17,590] A new study created in memory with name: no-name-38eafa5c-a098-4996-a50e-fd8f2960c721
[I 2025-02-09 18:53:18,628] Trial 0 finished with value: 0.32991117238998413 and parameters: {'hidden_layers': 4, 'hidden_units': 148, 'dropout_rate': 0.3151924614093718, 'lr': 0.00099409384658947, 'weight_decay': 0.005109620347560256}. Best is trial 0 with value: 0.32991117238998413.
[I 2025-02-09 18:53:18,934] Trial 1 finished with value: 0.35571053624153137 and parameters: {'hidden_layers': 2, 'hidden_units': 144, 'dropout_rate': 0.2656312889359005, 'lr': 0.0014226719236575362, 'weight_decay': 0.0016503216087987405}. Best is trial 0 with value: 0.32991117238998413.
[I 2025-02-09 18:53:19,251] Trial 2 finished with value: 0.08823561668395996 and parameters: {'hidden_layers': 2, 'hidden_units': 108, 'dropout_rate': 0.28364276834374946, 'lr': 0.0005501589353261401, 'weight_decay': 0.005367521880837673}. Best is trial 2 with value: 0.08823561668395996.
[I 2025-02-09 18:53:19

Best Hyperparameters: {'hidden_layers': 2, 'hidden_units': 128, 'dropout_rate': 0.2199652595007233, 'lr': 0.000659222685305151, 'weight_decay': 0.008146400410283663}
Train Metrics: MAE=0.0388, MSE=0.002240, RMSE=0.0473, R²=0.9872, MAPE=13.62%
Validation Metrics: MAE=0.1375, MSE=0.018963, RMSE=0.1377, R²=-2.2241, MAPE=7.87%
Test Metrics: MAE=0.1614, MSE=0.026081, RMSE=0.1615, R²=-3.3216, MAPE=8.00%


## GRU

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define GRU Model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        return self.fc(out[:, -1, :])

# Set parameters
input_size = 3  # Open, High, Low
hidden_size = 64
num_layers = 2
learning_rate = 0.001
num_epochs = 100

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# Initialize model, loss function, and optimizer
model = GRUModel(input_size, hidden_size, num_layers).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    outputs = model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    train_pred = model(X_train_torch).cpu().numpy()
    val_pred = model(X_val_torch).cpu().numpy()
    test_pred = model(X_test_torch).cpu().numpy()

# Convert back to NumPy for metric calculations
Y_train_np = Y_train_torch.cpu().numpy()
Y_val_np = Y_val_torch.cpu().numpy()
Y_test_np = Y_test_torch.cpu().numpy()

# Compute metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100  # MAPE in %
    return mae, mse, rmse, r2, mape

metrics_train = compute_metrics(Y_train_np, train_pred)
metrics_val = compute_metrics(Y_val_np, val_pred)
metrics_test = compute_metrics(Y_test_np, test_pred)

# Print metrics
print("\nTraining Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_train))
print("Validation Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_val))
print("Test Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_test))


Epoch [10/100], Loss: 0.3086
Epoch [20/100], Loss: 0.1553
Epoch [30/100], Loss: 0.0518
Epoch [40/100], Loss: 0.0430
Epoch [50/100], Loss: 0.0206
Epoch [60/100], Loss: 0.0098
Epoch [70/100], Loss: 0.0024
Epoch [80/100], Loss: 0.0005
Epoch [90/100], Loss: 0.0001
Epoch [100/100], Loss: 0.0002

Training Metrics: MAE=0.0114, MSE=0.0002, RMSE=0.0134, R²=0.9990, MAPE=3.74%
Validation Metrics: MAE=0.0172, MSE=0.0003, RMSE=0.0176, R²=0.9472, MAPE=0.99%
Test Metrics: MAE=0.0083, MSE=0.0001, RMSE=0.0089, R²=0.9869, MAPE=0.41%


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define GRU Model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        return self.fc(out[:, -1, :])

# Set parameters
input_size = 3  # Open, High, Low
hidden_size = 64
num_layers = 3
learning_rate = 0.001
num_epochs = 100

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# Initialize model, loss function, and optimizer
model = GRUModel(input_size, hidden_size, num_layers).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    outputs = model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    train_pred = model(X_train_torch).cpu().numpy()
    val_pred = model(X_val_torch).cpu().numpy()
    test_pred = model(X_test_torch).cpu().numpy()

# Convert back to NumPy for metric calculations
Y_train_np = Y_train_torch.cpu().numpy()
Y_val_np = Y_val_torch.cpu().numpy()
Y_test_np = Y_test_torch.cpu().numpy()

# Compute metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100  # MAPE in %
    return mae, mse, rmse, r2, mape

metrics_train = compute_metrics(Y_train_np, train_pred)
metrics_val = compute_metrics(Y_val_np, val_pred)
metrics_test = compute_metrics(Y_test_np, test_pred)

# Print metrics
print("\nTraining Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_train))
print("Validation Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_val))
print("Test Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_test))


Epoch [10/100], Loss: 0.2116
Epoch [20/100], Loss: 0.1298
Epoch [30/100], Loss: 0.0997
Epoch [40/100], Loss: 0.0452
Epoch [50/100], Loss: 0.0067
Epoch [60/100], Loss: 0.0020
Epoch [70/100], Loss: 0.0023
Epoch [80/100], Loss: 0.0006
Epoch [90/100], Loss: 0.0007
Epoch [100/100], Loss: 0.0005

Training Metrics: MAE=0.0176, MSE=0.0005, RMSE=0.0220, R²=0.9972, MAPE=10.67%
Validation Metrics: MAE=0.0088, MSE=0.0001, RMSE=0.0103, R²=0.9820, MAPE=0.51%
Test Metrics: MAE=0.0313, MSE=0.0012, RMSE=0.0347, R²=0.8008, MAPE=1.53%


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define GRU Model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        return self.fc(out[:, -1, :])

# Set parameters
input_size = 3  # Open, High, Low
hidden_size = 64
num_layers = 5
learning_rate = 0.001
num_epochs = 100

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# Initialize model, loss function, and optimizer
model = GRUModel(input_size, hidden_size, num_layers).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    outputs = model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    train_pred = model(X_train_torch).cpu().numpy()
    val_pred = model(X_val_torch).cpu().numpy()
    test_pred = model(X_test_torch).cpu().numpy()

# Convert back to NumPy for metric calculations
Y_train_np = Y_train_torch.cpu().numpy()
Y_val_np = Y_val_torch.cpu().numpy()
Y_test_np = Y_test_torch.cpu().numpy()

# Compute metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100  # MAPE in %
    return mae, mse, rmse, r2, mape

metrics_train = compute_metrics(Y_train_np, train_pred)
metrics_val = compute_metrics(Y_val_np, val_pred)
metrics_test = compute_metrics(Y_test_np, test_pred)

# Print metrics
print("\nTraining Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_train))
print("Validation Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_val))
print("Test Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_test))


Epoch [10/100], Loss: 0.2233
Epoch [20/100], Loss: 0.1596
Epoch [30/100], Loss: 0.1312
Epoch [40/100], Loss: 0.0553
Epoch [50/100], Loss: 0.0022
Epoch [60/100], Loss: 0.0065
Epoch [70/100], Loss: 0.0013
Epoch [80/100], Loss: 0.0017
Epoch [90/100], Loss: 0.0011
Epoch [100/100], Loss: 0.0011

Training Metrics: MAE=0.0296, MSE=0.0011, RMSE=0.0329, R²=0.9938, MAPE=13.75%
Validation Metrics: MAE=0.1457, MSE=0.0231, RMSE=0.1518, R²=-2.9197, MAPE=8.25%
Test Metrics: MAE=0.3171, MSE=0.1035, RMSE=0.3217, R²=-16.1438, MAPE=15.65%


## BI-LSTM

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define Bi-LSTM Model
class BiLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(BiLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, 1)  # Bi-directional → hidden_size * 2

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)  # *2 for bidirectional
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        return self.fc(out[:, -1, :])  # Take last time step output

# Set Parameters
input_size = 3
hidden_size = 64
num_layers = 2
learning_rate = 0.001
num_epochs = 100

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# Initialize model, loss function, and optimizer
model = BiLSTMModel(input_size, hidden_size, num_layers).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    outputs = model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    train_pred = model(X_train_torch).cpu().numpy()
    val_pred = model(X_val_torch).cpu().numpy()
    test_pred = model(X_test_torch).cpu().numpy()

# Convert back to NumPy for metric calculations
Y_train_np = Y_train_torch.cpu().numpy()
Y_val_np = Y_val_torch.cpu().numpy()
Y_test_np = Y_test_torch.cpu().numpy()

# Compute metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100  # MAPE as %
    return mae, mse, rmse, r2, mape

metrics_train = compute_metrics(Y_train_np, train_pred)
metrics_val = compute_metrics(Y_val_np, val_pred)
metrics_test = compute_metrics(Y_test_np, test_pred)

# Print metrics
print("\nTraining Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_train))
print("Validation Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_val))
print("Test Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_test))


Epoch [10/100], Loss: 0.2700
Epoch [20/100], Loss: 0.1525
Epoch [30/100], Loss: 0.0823
Epoch [40/100], Loss: 0.0605
Epoch [50/100], Loss: 0.0271
Epoch [60/100], Loss: 0.0074
Epoch [70/100], Loss: 0.0017
Epoch [80/100], Loss: 0.0017
Epoch [90/100], Loss: 0.0016
Epoch [100/100], Loss: 0.0012

Training Metrics: MAE=0.0289, MSE=0.0012, RMSE=0.0343, R²=0.9933, MAPE=13.05%
Validation Metrics: MAE=0.1051, MSE=0.0113, RMSE=0.1063, R²=-0.9207, MAPE=5.99%
Test Metrics: MAE=0.1533, MSE=0.0236, RMSE=0.1537, R²=-2.9167, MAPE=7.59%


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define Bi-LSTM Model
class BiLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(BiLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, 1)  # Bi-directional → hidden_size * 2

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)  # *2 for bidirectional
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        return self.fc(out[:, -1, :])  # Take last time step output

# Set Parameters
input_size = 3
hidden_size = 64
num_layers = 3
learning_rate = 0.001
num_epochs = 100

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# Initialize model, loss function, and optimizer
model = BiLSTMModel(input_size, hidden_size, num_layers).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    outputs = model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    train_pred = model(X_train_torch).cpu().numpy()
    val_pred = model(X_val_torch).cpu().numpy()
    test_pred = model(X_test_torch).cpu().numpy()

# Convert back to NumPy for metric calculations
Y_train_np = Y_train_torch.cpu().numpy()
Y_val_np = Y_val_torch.cpu().numpy()
Y_test_np = Y_test_torch.cpu().numpy()

# Compute metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100  # MAPE as %
    return mae, mse, rmse, r2, mape

metrics_train = compute_metrics(Y_train_np, train_pred)
metrics_val = compute_metrics(Y_val_np, val_pred)
metrics_test = compute_metrics(Y_test_np, test_pred)

# Print metrics
print("\nTraining Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_train))
print("Validation Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_val))
print("Test Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_test))


Epoch [10/100], Loss: 0.3436
Epoch [20/100], Loss: 0.1889
Epoch [30/100], Loss: 0.1164
Epoch [40/100], Loss: 0.0615
Epoch [50/100], Loss: 0.0207
Epoch [60/100], Loss: 0.0036
Epoch [70/100], Loss: 0.0027
Epoch [80/100], Loss: 0.0028
Epoch [90/100], Loss: 0.0019
Epoch [100/100], Loss: 0.0016

Training Metrics: MAE=0.0321, MSE=0.0016, RMSE=0.0402, R²=0.9908, MAPE=18.71%
Validation Metrics: MAE=0.0434, MSE=0.0019, RMSE=0.0435, R²=0.6782, MAPE=2.49%
Test Metrics: MAE=0.0246, MSE=0.0007, RMSE=0.0266, R²=0.8830, MAPE=1.24%


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define Bi-LSTM Model
class BiLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(BiLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, 1)  # Bi-directional → hidden_size * 2

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)  # *2 for bidirectional
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        return self.fc(out[:, -1, :])  # Take last time step output

# Set Parameters
input_size = 3
hidden_size = 64
num_layers = 5
learning_rate = 0.001
num_epochs = 100

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_train_torch = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_val_torch = torch.tensor(X_val.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_val_torch = torch.tensor(Y_val.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test_torch = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)
Y_test_torch = torch.tensor(Y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

# Initialize model, loss function, and optimizer
model = BiLSTMModel(input_size, hidden_size, num_layers).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    outputs = model(X_train_torch)
    loss = criterion(outputs, Y_train_torch)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    train_pred = model(X_train_torch).cpu().numpy()
    val_pred = model(X_val_torch).cpu().numpy()
    test_pred = model(X_test_torch).cpu().numpy()

# Convert back to NumPy for metric calculations
Y_train_np = Y_train_torch.cpu().numpy()
Y_val_np = Y_val_torch.cpu().numpy()
Y_test_np = Y_test_torch.cpu().numpy()

# Compute metrics
def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100  # MAPE as %
    return mae, mse, rmse, r2, mape

metrics_train = compute_metrics(Y_train_np, train_pred)
metrics_val = compute_metrics(Y_val_np, val_pred)
metrics_test = compute_metrics(Y_test_np, test_pred)

# Print metrics
print("\nTraining Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_train))
print("Validation Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_val))
print("Test Metrics: MAE={:.4f}, MSE={:.4f}, RMSE={:.4f}, R²={:.4f}, MAPE={:.2f}%".format(*metrics_test))


Epoch [10/100], Loss: 0.2673
Epoch [20/100], Loss: 0.1615
Epoch [30/100], Loss: 0.1314
Epoch [40/100], Loss: 0.0521
Epoch [50/100], Loss: 0.0080
Epoch [60/100], Loss: 0.0079
Epoch [70/100], Loss: 0.0035
Epoch [80/100], Loss: 0.0028
Epoch [90/100], Loss: 0.0026
Epoch [100/100], Loss: 0.0024

Training Metrics: MAE=0.0420, MSE=0.0024, RMSE=0.0486, R²=0.9865, MAPE=22.11%
Validation Metrics: MAE=0.1931, MSE=0.0400, RMSE=0.2001, R²=-5.8052, MAPE=10.94%
Test Metrics: MAE=0.3950, MSE=0.1598, RMSE=0.3998, R²=-25.4853, MAPE=19.51%
