# HYPERPARAMETER TUNING

In [32]:
import os
os.chdir('/content/sample_data')

In [33]:
!pip install optuna



# Apple Stock Data

In [34]:
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from mamba import Mamba, MambaConfig

# Check for CUDA availability
use_cuda = torch.cuda.is_available()
epochs = 100

# Data loading and preprocessing
# Load data
data = pd.read_csv('/content/sample_data/AAPL_Apple_Inc..csv')
data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')

# Calculate the percentage change in the 'Close' price
data['pct_chg'] = data['Close'].pct_change() * 100

# Drop the first row if needed and reset the index
data = data.iloc[1:].reset_index(drop=True)

# Extract 'Close' and rate of change
close = data.pop('Close').values
ratechg = data['pct_chg'].apply(lambda x: 0.01 * x).values
data.drop(columns=['pct_chg'], inplace=True)
dat = data.iloc[:, 1:].values

# Columns to normalize
cols_to_normalize = ["Open", "High", "Low", "Adj Close", "Volume"]

# Split data into train, validation, and test
n_test = 100  # The full test set remains constant
n_val = 200
train_data = dat[:-(n_test + n_val)]
val_data = dat[-(n_test + n_val):-n_test]
test_data = dat[-n_test:]

# Calculate mean and std using training and validation sets
mean = np.mean(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)
std = np.std(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)

# Normalize train, validation, and test sets
train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std

# Prepare labels for train, validation, and test
trainX, trainy = train_data, ratechg[:-(n_test + n_val)]
valX, valy = val_data, ratechg[-(n_test + n_val):-n_test]
testX = test_data[-3:]  # Using the last 3 data points in test for prediction
testy = ratechg[-n_test:][-3:]  # Last 3 rate changes for evaluation

# Define evaluation metric
def evaluation_metric(y_test, y_hat):
    MSE = mean_squared_error(y_test, y_hat)
    RMSE = MSE**0.5
    MAE = mean_absolute_error(y_test, y_hat)
    R2 = r2_score(y_test, y_hat)
    print('%.6f %.6f %.6f %.6f' % (MSE, RMSE, MAE, R2))

# Define model
class Net(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim, num_layers):
        super().__init__()
        self.config = MambaConfig(d_model=hidden_dim, n_layers=num_layers)
        self.mamba = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            Mamba(self.config),
            nn.Linear(hidden_dim, out_dim),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.mamba(x)
        return x.flatten()

# Define objective function for Optuna
def objective(trial):
    # Suggest hyperparameters
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
    hidden_dim = trial.suggest_int('hidden_dim', 8, 64)
    num_layers = trial.suggest_int('num_layers', 1, 4)

    # Set up model and optimizer with trial parameters
    clf = Net(len(trainX[0]), 1, hidden_dim, num_layers)
    opt = torch.optim.Adam(clf.parameters(), lr=lr, weight_decay=wd)

    # Move model to GPU if available
    if use_cuda:
        clf = clf.cuda()

    xt = torch.from_numpy(trainX).float().unsqueeze(0)
    xv = torch.from_numpy(valX).float().unsqueeze(0)
    yt = torch.from_numpy(trainy).float()
    yv = torch.from_numpy(valy).float()

    if use_cuda:
        xt, xv, yt, yv = xt.cuda(), xv.cuda(), yt.cuda(), yv.cuda()

    # Training loop with early stopping
    best_val_loss = float("inf")
    patience, wait = 10, 0
    for e in range(epochs):
        clf.train()
        opt.zero_grad()
        z = clf(xt)
        train_loss = F.mse_loss(z, yt)
        train_loss.backward()
        opt.step()

        # Validation
        clf.eval()
        with torch.no_grad():
            val_pred = clf(xv)
            val_loss = F.mse_loss(val_pred, yv)

        # Early stopping
        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                break

    return best_val_loss

# Perform hyperparameter optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

# Retrieve best hyperparameters
best_params = study.best_params
best_lr = best_params['lr']
best_wd = best_params['wd']
best_hidden_dim = best_params['hidden_dim']
best_num_layers = best_params['num_layers']

[I 2024-11-10 16:33:07,138] A new study created in memory with name: no-name-5d748fd1-6104-4007-b9ea-5038299a58c6
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
[I 2024-11-10 16:33:09,721] Trial 0 finished with value: 0.0028028273954987526 and parameters: {'lr': 7.76598029029617e-05, 'wd': 0.00020462353463830172, 'hidden_dim': 45, 'num_layers': 3}. Best is trial 0 with value: 0.0028028273954987526.
[I 2024-11-10 16:33:10,904] Trial 1 finished with value: 0.016716856509447098 and parameters: {'lr': 0.0008746665774731694, 'wd': 0.00016920671299095434, 'hidden_dim': 51, 'num_layers': 2}. Best is trial 0 with value: 0.0028028273954987526.
[I 2024-11-10 16:33:14,044] Trial 2 finished with value: 0.0002657175064086914 and parameters: {'lr': 0.007001847336373218, 'wd': 1.704043566167442e-06, 'hidden_dim': 44, 'num_layers': 3}. Best is trial 2 with value: 0.0002657175064086914.
[I 2024-11-10 16:33:16,372] Trial 3 finished with value: 0.14408

Best hyperparameters: {'lr': 0.004242931752889983, 'wd': 9.418111311321393e-05, 'hidden_dim': 18, 'num_layers': 1}


# Reliance Industries Stock

In [35]:
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from mamba import Mamba, MambaConfig

# Check for CUDA availability
use_cuda = torch.cuda.is_available()
epochs = 100

# Data loading and preprocessing
# Load data
data = pd.read_csv('/content/sample_data/RELIANCE.NS_Reliance_Industries_Limited.csv')
data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')

# Calculate the percentage change in the 'Close' price
data['pct_chg'] = data['Close'].pct_change() * 100

# Drop the first row if needed and reset the index
data = data.iloc[1:].reset_index(drop=True)

# Extract 'Close' and rate of change
close = data.pop('Close').values
ratechg = data['pct_chg'].apply(lambda x: 0.01 * x).values
data.drop(columns=['pct_chg'], inplace=True)
dat = data.iloc[:, 1:].values

# Columns to normalize
cols_to_normalize = ["Open", "High", "Low", "Adj Close", "Volume"]

# Split data into train, validation, and test
n_test = 100  # The full test set remains constant
n_val = 200
train_data = dat[:-(n_test + n_val)]
val_data = dat[-(n_test + n_val):-n_test]
test_data = dat[-n_test:]

# Calculate mean and std using training and validation sets
mean = np.mean(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)
std = np.std(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)

# Normalize train, validation, and test sets
train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std

# Prepare labels for train, validation, and test
trainX, trainy = train_data, ratechg[:-(n_test + n_val)]
valX, valy = val_data, ratechg[-(n_test + n_val):-n_test]
testX = test_data[-3:]  # Using the last 3 data points in test for prediction
testy = ratechg[-n_test:][-3:]  # Last 3 rate changes for evaluation

# Define evaluation metric
def evaluation_metric(y_test, y_hat):
    MSE = mean_squared_error(y_test, y_hat)
    RMSE = MSE**0.5
    MAE = mean_absolute_error(y_test, y_hat)
    R2 = r2_score(y_test, y_hat)
    print('%.6f %.6f %.6f %.6f' % (MSE, RMSE, MAE, R2))

# Define model
class Net(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim, num_layers):
        super().__init__()
        self.config = MambaConfig(d_model=hidden_dim, n_layers=num_layers)
        self.mamba = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            Mamba(self.config),
            nn.Linear(hidden_dim, out_dim),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.mamba(x)
        return x.flatten()

# Define objective function for Optuna
def objective(trial):
    # Suggest hyperparameters
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
    hidden_dim = trial.suggest_int('hidden_dim', 8, 64)
    num_layers = trial.suggest_int('num_layers', 1, 4)

    # Set up model and optimizer with trial parameters
    clf = Net(len(trainX[0]), 1, hidden_dim, num_layers)
    opt = torch.optim.Adam(clf.parameters(), lr=lr, weight_decay=wd)

    # Move model to GPU if available
    if use_cuda:
        clf = clf.cuda()

    xt = torch.from_numpy(trainX).float().unsqueeze(0)
    xv = torch.from_numpy(valX).float().unsqueeze(0)
    yt = torch.from_numpy(trainy).float()
    yv = torch.from_numpy(valy).float()

    if use_cuda:
        xt, xv, yt, yv = xt.cuda(), xv.cuda(), yt.cuda(), yv.cuda()

    # Training loop with early stopping
    best_val_loss = float("inf")
    patience, wait = 10, 0
    for e in range(epochs):
        clf.train()
        opt.zero_grad()
        z = clf(xt)
        train_loss = F.mse_loss(z, yt)
        train_loss.backward()
        opt.step()

        # Validation
        clf.eval()
        with torch.no_grad():
            val_pred = clf(xv)
            val_loss = F.mse_loss(val_pred, yv)

        # Early stopping
        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                break

    return best_val_loss

# Perform hyperparameter optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

# Retrieve best hyperparameters
best_params = study.best_params
best_lr = best_params['lr']
best_wd = best_params['wd']
best_hidden_dim = best_params['hidden_dim']
best_num_layers = best_params['num_layers']

[I 2024-11-10 16:34:27,332] A new study created in memory with name: no-name-61127ad1-73b6-4c5f-acd4-c05831eb4701
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
[I 2024-11-10 16:34:32,199] Trial 0 finished with value: 0.2639835774898529 and parameters: {'lr': 4.049805441427899e-05, 'wd': 4.877223503734596e-05, 'hidden_dim': 63, 'num_layers': 3}. Best is trial 0 with value: 0.2639835774898529.
[I 2024-11-10 16:34:36,404] Trial 1 finished with value: 0.0001970904995687306 and parameters: {'lr': 0.0011578412226535995, 'wd': 2.9872067736964168e-05, 'hidden_dim': 44, 'num_layers': 4}. Best is trial 1 with value: 0.0001970904995687306.
[I 2024-11-10 16:34:37,889] Trial 2 finished with value: 0.0011837149504572153 and parameters: {'lr': 0.0006783331720693029, 'wd': 1.516648717949992e-05, 'hidden_dim': 41, 'num_layers': 3}. Best is trial 1 with value: 0.0001970904995687306.
[I 2024-11-10 16:34:40,887] Trial 3 finished with value: 0.000309865

Best hyperparameters: {'lr': 0.0017353734689015063, 'wd': 6.917474982141434e-06, 'hidden_dim': 54, 'num_layers': 2}


# NSRGY Stock

In [36]:
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from mamba import Mamba, MambaConfig

# Check for CUDA availability
use_cuda = torch.cuda.is_available()
epochs = 100

# Data loading and preprocessing
# Load data
data = pd.read_csv('/content/sample_data/NSRGY_Nestlé_S.A..csv')
data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')

# Calculate the percentage change in the 'Close' price
data['pct_chg'] = data['Close'].pct_change() * 100

# Drop the first row if needed and reset the index
data = data.iloc[1:].reset_index(drop=True)

# Extract 'Close' and rate of change
close = data.pop('Close').values
ratechg = data['pct_chg'].apply(lambda x: 0.01 * x).values
data.drop(columns=['pct_chg'], inplace=True)
dat = data.iloc[:, 1:].values

# Columns to normalize
cols_to_normalize = ["Open", "High", "Low", "Adj Close", "Volume"]

# Split data into train, validation, and test
n_test = 100  # The full test set remains constant
n_val = 200
train_data = dat[:-(n_test + n_val)]
val_data = dat[-(n_test + n_val):-n_test]
test_data = dat[-n_test:]

# Calculate mean and std using training and validation sets
mean = np.mean(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)
std = np.std(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)

# Normalize train, validation, and test sets
train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std

# Prepare labels for train, validation, and test
trainX, trainy = train_data, ratechg[:-(n_test + n_val)]
valX, valy = val_data, ratechg[-(n_test + n_val):-n_test]
testX = test_data[-3:]  # Using the last 3 data points in test for prediction
testy = ratechg[-n_test:][-3:]  # Last 3 rate changes for evaluation

# Define evaluation metric
def evaluation_metric(y_test, y_hat):
    MSE = mean_squared_error(y_test, y_hat)
    RMSE = MSE**0.5
    MAE = mean_absolute_error(y_test, y_hat)
    R2 = r2_score(y_test, y_hat)
    print('%.6f %.6f %.6f %.6f' % (MSE, RMSE, MAE, R2))

# Define model
class Net(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim, num_layers):
        super().__init__()
        self.config = MambaConfig(d_model=hidden_dim, n_layers=num_layers)
        self.mamba = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            Mamba(self.config),
            nn.Linear(hidden_dim, out_dim),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.mamba(x)
        return x.flatten()

# Define objective function for Optuna
def objective(trial):
    # Suggest hyperparameters
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
    hidden_dim = trial.suggest_int('hidden_dim', 8, 64)
    num_layers = trial.suggest_int('num_layers', 1, 4)

    # Set up model and optimizer with trial parameters
    clf = Net(len(trainX[0]), 1, hidden_dim, num_layers)
    opt = torch.optim.Adam(clf.parameters(), lr=lr, weight_decay=wd)

    # Move model to GPU if available
    if use_cuda:
        clf = clf.cuda()

    xt = torch.from_numpy(trainX).float().unsqueeze(0)
    xv = torch.from_numpy(valX).float().unsqueeze(0)
    yt = torch.from_numpy(trainy).float()
    yv = torch.from_numpy(valy).float()

    if use_cuda:
        xt, xv, yt, yv = xt.cuda(), xv.cuda(), yt.cuda(), yv.cuda()

    # Training loop with early stopping
    best_val_loss = float("inf")
    patience, wait = 10, 0
    for e in range(epochs):
        clf.train()
        opt.zero_grad()
        z = clf(xt)
        train_loss = F.mse_loss(z, yt)
        train_loss.backward()
        opt.step()

        # Validation
        clf.eval()
        with torch.no_grad():
            val_pred = clf(xv)
            val_loss = F.mse_loss(val_pred, yv)

        # Early stopping
        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                break

    return best_val_loss

# Perform hyperparameter optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

# Retrieve best hyperparameters
best_params = study.best_params
best_lr = best_params['lr']
best_wd = best_params['wd']
best_hidden_dim = best_params['hidden_dim']
best_num_layers = best_params['num_layers']

[I 2024-11-10 16:35:24,307] A new study created in memory with name: no-name-b9348a2a-2b2d-4bcc-83c1-50cd44119144
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
[I 2024-11-10 16:35:24,761] Trial 0 finished with value: 0.02550608292222023 and parameters: {'lr': 0.0011548456755387116, 'wd': 3.0689832420717197e-06, 'hidden_dim': 12, 'num_layers': 3}. Best is trial 0 with value: 0.02550608292222023.
[I 2024-11-10 16:35:25,597] Trial 1 finished with value: 0.0010583184193819761 and parameters: {'lr': 0.0002684944090774832, 'wd': 8.719789133358913e-05, 'hidden_dim': 50, 'num_layers': 3}. Best is trial 1 with value: 0.0010583184193819761.
[I 2024-11-10 16:35:26,263] Trial 2 finished with value: 0.0031845178455114365 and parameters: {'lr': 0.0016706029167794256, 'wd': 2.526585787519611e-05, 'hidden_dim': 24, 'num_layers': 3}. Best is trial 1 with value: 0.0010583184193819761.
[I 2024-11-10 16:35:30,760] Trial 3 finished with value: 0.0081536

Best hyperparameters: {'lr': 0.007003669931395202, 'wd': 0.0001393959207587204, 'hidden_dim': 62, 'num_layers': 4}


# Petrobas Stock

In [37]:
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from mamba import Mamba, MambaConfig

# Check for CUDA availability
use_cuda = torch.cuda.is_available()
epochs = 100

# Data loading and preprocessing
# Load data
data = pd.read_csv('/content/sample_data/PBR_Petrobras.csv')
data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')

# Calculate the percentage change in the 'Close' price
data['pct_chg'] = data['Close'].pct_change() * 100

# Drop the first row if needed and reset the index
data = data.iloc[1:].reset_index(drop=True)

# Extract 'Close' and rate of change
close = data.pop('Close').values
ratechg = data['pct_chg'].apply(lambda x: 0.01 * x).values
data.drop(columns=['pct_chg'], inplace=True)
dat = data.iloc[:, 1:].values

# Columns to normalize
cols_to_normalize = ["Open", "High", "Low", "Adj Close", "Volume"]

# Split data into train, validation, and test
n_test = 100  # The full test set remains constant
n_val = 200
train_data = dat[:-(n_test + n_val)]
val_data = dat[-(n_test + n_val):-n_test]
test_data = dat[-n_test:]

# Calculate mean and std using training and validation sets
mean = np.mean(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)
std = np.std(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)

# Normalize train, validation, and test sets
train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std

# Prepare labels for train, validation, and test
trainX, trainy = train_data, ratechg[:-(n_test + n_val)]
valX, valy = val_data, ratechg[-(n_test + n_val):-n_test]
testX = test_data[-3:]  # Using the last 3 data points in test for prediction
testy = ratechg[-n_test:][-3:]  # Last 3 rate changes for evaluation

# Define evaluation metric
def evaluation_metric(y_test, y_hat):
    MSE = mean_squared_error(y_test, y_hat)
    RMSE = MSE**0.5
    MAE = mean_absolute_error(y_test, y_hat)
    R2 = r2_score(y_test, y_hat)
    print('%.6f %.6f %.6f %.6f' % (MSE, RMSE, MAE, R2))

# Define model
class Net(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim, num_layers):
        super().__init__()
        self.config = MambaConfig(d_model=hidden_dim, n_layers=num_layers)
        self.mamba = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            Mamba(self.config),
            nn.Linear(hidden_dim, out_dim),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.mamba(x)
        return x.flatten()

# Define objective function for Optuna
def objective(trial):
    # Suggest hyperparameters
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
    hidden_dim = trial.suggest_int('hidden_dim', 8, 64)
    num_layers = trial.suggest_int('num_layers', 1, 4)

    # Set up model and optimizer with trial parameters
    clf = Net(len(trainX[0]), 1, hidden_dim, num_layers)
    opt = torch.optim.Adam(clf.parameters(), lr=lr, weight_decay=wd)

    # Move model to GPU if available
    if use_cuda:
        clf = clf.cuda()

    xt = torch.from_numpy(trainX).float().unsqueeze(0)
    xv = torch.from_numpy(valX).float().unsqueeze(0)
    yt = torch.from_numpy(trainy).float()
    yv = torch.from_numpy(valy).float()

    if use_cuda:
        xt, xv, yt, yv = xt.cuda(), xv.cuda(), yt.cuda(), yv.cuda()

    # Training loop with early stopping
    best_val_loss = float("inf")
    patience, wait = 10, 0
    for e in range(epochs):
        clf.train()
        opt.zero_grad()
        z = clf(xt)
        train_loss = F.mse_loss(z, yt)
        train_loss.backward()
        opt.step()

        # Validation
        clf.eval()
        with torch.no_grad():
            val_pred = clf(xv)
            val_loss = F.mse_loss(val_pred, yv)

        # Early stopping
        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                break

    return best_val_loss

# Perform hyperparameter optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

# Retrieve best hyperparameters
best_params = study.best_params
best_lr = best_params['lr']
best_wd = best_params['wd']
best_hidden_dim = best_params['hidden_dim']
best_num_layers = best_params['num_layers']

[I 2024-11-10 16:36:59,056] A new study created in memory with name: no-name-d5fcd60c-cea4-4435-a273-e5a1c8e4a832
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
[I 2024-11-10 16:36:59,736] Trial 0 finished with value: 0.003276924369856715 and parameters: {'lr': 0.004538214880406722, 'wd': 4.387561213419411e-05, 'hidden_dim': 36, 'num_layers': 3}. Best is trial 0 with value: 0.003276924369856715.
[I 2024-11-10 16:37:02,847] Trial 1 finished with value: 0.035230446606874466 and parameters: {'lr': 2.1224076693675003e-05, 'wd': 0.0009108168425523938, 'hidden_dim': 28, 'num_layers': 3}. Best is trial 0 with value: 0.003276924369856715.
[I 2024-11-10 16:37:06,171] Trial 2 finished with value: 0.09157811850309372 and parameters: {'lr': 0.00011934604984932481, 'wd': 0.0005696030247464601, 'hidden_dim': 45, 'num_layers': 3}. Best is trial 0 with value: 0.003276924369856715.
[I 2024-11-10 16:37:07,505] Trial 3 finished with value: 0.0100268190

Best hyperparameters: {'lr': 0.0026290338491044476, 'wd': 1.624313800478834e-06, 'hidden_dim': 49, 'num_layers': 2}


# Sasol Stock

In [38]:
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from mamba import Mamba, MambaConfig

# Check for CUDA availability
use_cuda = torch.cuda.is_available()
epochs = 100

# Data loading and preprocessing
# Load data
data = pd.read_csv('/content/sample_data/sasol_stock_data.csv')
data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')

# Calculate the percentage change in the 'Close' price
data['pct_chg'] = data['Close'].pct_change() * 100

# Drop the first row if needed and reset the index
data = data.iloc[1:].reset_index(drop=True)

# Extract 'Close' and rate of change
close = data.pop('Close').values
ratechg = data['pct_chg'].apply(lambda x: 0.01 * x).values
data.drop(columns=['pct_chg'], inplace=True)
dat = data.iloc[:, 1:].values

# Columns to normalize
cols_to_normalize = ["Open", "High", "Low", "Adj Close", "Volume"]

# Split data into train, validation, and test
n_test = 100  # The full test set remains constant
n_val = 200
train_data = dat[:-(n_test + n_val)]
val_data = dat[-(n_test + n_val):-n_test]
test_data = dat[-n_test:]

# Calculate mean and std using training and validation sets
mean = np.mean(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)
std = np.std(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)

# Normalize train, validation, and test sets
train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std

# Prepare labels for train, validation, and test
trainX, trainy = train_data, ratechg[:-(n_test + n_val)]
valX, valy = val_data, ratechg[-(n_test + n_val):-n_test]
testX = test_data[-3:]  # Using the last 3 data points in test for prediction
testy = ratechg[-n_test:][-3:]  # Last 3 rate changes for evaluation

# Define evaluation metric
def evaluation_metric(y_test, y_hat):
    MSE = mean_squared_error(y_test, y_hat)
    RMSE = MSE**0.5
    MAE = mean_absolute_error(y_test, y_hat)
    R2 = r2_score(y_test, y_hat)
    print('%.6f %.6f %.6f %.6f' % (MSE, RMSE, MAE, R2))

# Define model
class Net(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim, num_layers):
        super().__init__()
        self.config = MambaConfig(d_model=hidden_dim, n_layers=num_layers)
        self.mamba = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            Mamba(self.config),
            nn.Linear(hidden_dim, out_dim),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.mamba(x)
        return x.flatten()

# Define objective function for Optuna
def objective(trial):
    # Suggest hyperparameters
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
    hidden_dim = trial.suggest_int('hidden_dim', 8, 64)
    num_layers = trial.suggest_int('num_layers', 1, 4)

    # Set up model and optimizer with trial parameters
    clf = Net(len(trainX[0]), 1, hidden_dim, num_layers)
    opt = torch.optim.Adam(clf.parameters(), lr=lr, weight_decay=wd)

    # Move model to GPU if available
    if use_cuda:
        clf = clf.cuda()

    xt = torch.from_numpy(trainX).float().unsqueeze(0)
    xv = torch.from_numpy(valX).float().unsqueeze(0)
    yt = torch.from_numpy(trainy).float()
    yv = torch.from_numpy(valy).float()

    if use_cuda:
        xt, xv, yt, yv = xt.cuda(), xv.cuda(), yt.cuda(), yv.cuda()

    # Training loop with early stopping
    best_val_loss = float("inf")
    patience, wait = 10, 0
    for e in range(epochs):
        clf.train()
        opt.zero_grad()
        z = clf(xt)
        train_loss = F.mse_loss(z, yt)
        train_loss.backward()
        opt.step()

        # Validation
        clf.eval()
        with torch.no_grad():
            val_pred = clf(xv)
            val_loss = F.mse_loss(val_pred, yv)

        # Early stopping
        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                break

    return best_val_loss

# Perform hyperparameter optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

# Retrieve best hyperparameters
best_params = study.best_params
best_lr = best_params['lr']
best_wd = best_params['wd']
best_hidden_dim = best_params['hidden_dim']
best_num_layers = best_params['num_layers']

[I 2024-11-10 16:38:10,262] A new study created in memory with name: no-name-4518bb95-ade5-4185-97c5-170a02e68414
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
[I 2024-11-10 16:38:11,712] Trial 0 finished with value: 0.001225325046107173 and parameters: {'lr': 0.008727629959834534, 'wd': 7.631805031535136e-05, 'hidden_dim': 24, 'num_layers': 4}. Best is trial 0 with value: 0.001225325046107173.
[I 2024-11-10 16:38:15,690] Trial 1 finished with value: 0.3857773244380951 and parameters: {'lr': 3.981210291627973e-05, 'wd': 7.3727117025545135e-06, 'hidden_dim': 18, 'num_layers': 4}. Best is trial 0 with value: 0.001225325046107173.
[I 2024-11-10 16:38:17,298] Trial 2 finished with value: 0.0012666514376178384 and parameters: {'lr': 0.00600339897668853, 'wd': 0.00021810957243484578, 'hidden_dim': 44, 'num_layers': 4}. Best is trial 0 with value: 0.001225325046107173.
[I 2024-11-10 16:38:17,629] Trial 3 finished with value: 0.000956577714

Best hyperparameters: {'lr': 0.00984800702937031, 'wd': 0.00021927054805574915, 'hidden_dim': 39, 'num_layers': 2}


# MTN Stock Data

In [39]:
df = pd.read_csv('/content/sample_data/MTN_stock_data.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-01-02 00:00:00+00:00,75.169998,75.419998,74.07,74.989998,57.500946,210000
1,2014-01-03 00:00:00+00:00,74.980003,75.309998,74.260002,74.769997,57.332237,105600
2,2014-01-06 00:00:00+00:00,75.010002,75.040001,74.089996,74.169998,56.872173,119900
3,2014-01-07 00:00:00+00:00,74.440002,75.089996,73.519997,73.940002,56.695801,107100
4,2014-01-08 00:00:00+00:00,73.879997,73.879997,72.129997,72.57,55.645329,181800


In [41]:
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from mamba import Mamba, MambaConfig

# Check for CUDA availability
use_cuda = torch.cuda.is_available()
epochs = 100

# Data loading and preprocessing
# Load data
data = pd.read_csv('/content/sample_data/MTN_stock_data.csv')
data['Date'] = pd.to_datetime(data['Date'],  format='%Y-%m-%d %H:%M:%S%z', errors='coerce')

# Calculate the percentage change in the 'Close' price
data['pct_chg'] = data['Close'].pct_change() * 100

# Drop the first row if needed and reset the index
data = data.iloc[1:].reset_index(drop=True)

# Extract 'Close' and rate of change
close = data.pop('Close').values
ratechg = data['pct_chg'].apply(lambda x: 0.01 * x).values
data.drop(columns=['pct_chg'], inplace=True)
dat = data.iloc[:, 1:].values

# Columns to normalize
cols_to_normalize = ["Open", "High", "Low", "Adj Close", "Volume"]

# Split data into train, validation, and test
n_test = 100  # The full test set remains constant
n_val = 200
train_data = dat[:-(n_test + n_val)]
val_data = dat[-(n_test + n_val):-n_test]
test_data = dat[-n_test:]

# Calculate mean and std using training and validation sets
mean = np.mean(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)
std = np.std(np.vstack((train_data, val_data))[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]], axis=0)

# Normalize train, validation, and test sets
train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (train_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (val_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std
test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] = (test_data[:, [data.columns.get_loc(col) - 1 for col in cols_to_normalize]] - mean) / std

# Prepare labels for train, validation, and test
trainX, trainy = train_data, ratechg[:-(n_test + n_val)]
valX, valy = val_data, ratechg[-(n_test + n_val):-n_test]
testX = test_data[-3:]  # Using the last 3 data points in test for prediction
testy = ratechg[-n_test:][-3:]  # Last 3 rate changes for evaluation

# Define evaluation metric
def evaluation_metric(y_test, y_hat):
    MSE = mean_squared_error(y_test, y_hat)
    RMSE = MSE**0.5
    MAE = mean_absolute_error(y_test, y_hat)
    R2 = r2_score(y_test, y_hat)
    print('%.6f %.6f %.6f %.6f' % (MSE, RMSE, MAE, R2))

# Define model
class Net(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim, num_layers):
        super().__init__()
        self.config = MambaConfig(d_model=hidden_dim, n_layers=num_layers)
        self.mamba = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            Mamba(self.config),
            nn.Linear(hidden_dim, out_dim),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.mamba(x)
        return x.flatten()

# Define objective function for Optuna
def objective(trial):
    # Suggest hyperparameters
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
    hidden_dim = trial.suggest_int('hidden_dim', 8, 64)
    num_layers = trial.suggest_int('num_layers', 1, 4)

    # Set up model and optimizer with trial parameters
    clf = Net(len(trainX[0]), 1, hidden_dim, num_layers)
    opt = torch.optim.Adam(clf.parameters(), lr=lr, weight_decay=wd)

    # Move model to GPU if available
    if use_cuda:
        clf = clf.cuda()

    xt = torch.from_numpy(trainX).float().unsqueeze(0)
    xv = torch.from_numpy(valX).float().unsqueeze(0)
    yt = torch.from_numpy(trainy).float()
    yv = torch.from_numpy(valy).float()

    if use_cuda:
        xt, xv, yt, yv = xt.cuda(), xv.cuda(), yt.cuda(), yv.cuda()

    # Training loop with early stopping
    best_val_loss = float("inf")
    patience, wait = 10, 0
    for e in range(epochs):
        clf.train()
        opt.zero_grad()
        z = clf(xt)
        train_loss = F.mse_loss(z, yt)
        train_loss.backward()
        opt.step()

        # Validation
        clf.eval()
        with torch.no_grad():
            val_pred = clf(xv)
            val_loss = F.mse_loss(val_pred, yv)

        # Early stopping
        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                break

    return best_val_loss

# Perform hyperparameter optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

# Retrieve best hyperparameters
best_params = study.best_params
best_lr = best_params['lr']
best_wd = best_params['wd']
best_hidden_dim = best_params['hidden_dim']
best_num_layers = best_params['num_layers']

[I 2024-11-10 16:42:10,789] A new study created in memory with name: no-name-7254ba55-c1ae-4b7d-9dfe-20cd57b40bd6
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  wd = trial.suggest_loguniform('wd', 1e-6, 1e-3)
[I 2024-11-10 16:42:15,744] Trial 0 finished with value: 0.1878051459789276 and parameters: {'lr': 2.646210770228286e-05, 'wd': 1.7123416354785758e-06, 'hidden_dim': 30, 'num_layers': 3}. Best is trial 0 with value: 0.1878051459789276.
[I 2024-11-10 16:42:21,311] Trial 1 finished with value: 0.03195078670978546 and parameters: {'lr': 1.2877168462231178e-05, 'wd': 0.0005264613420627403, 'hidden_dim': 47, 'num_layers': 4}. Best is trial 1 with value: 0.03195078670978546.
[I 2024-11-10 16:42:23,432] Trial 2 finished with value: 0.0003097000590059906 and parameters: {'lr': 0.004205533606291724, 'wd': 1.8748145925650504e-05, 'hidden_dim': 46, 'num_layers': 2}. Best is trial 2 with value: 0.0003097000590059906.
[I 2024-11-10 16:42:24,784] Trial 3 finished with value: 0.000263049325

Best hyperparameters: {'lr': 0.009807515737823923, 'wd': 3.051959676985035e-05, 'hidden_dim': 48, 'num_layers': 2}
