In [1]:
import optuna
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset

# Set seed for reproducibility
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# Load and preprocess data
data = pd.read_csv("Book1.csv")
data["Date"] = pd.to_datetime(data["Date"])
data = data.sort_values(by="Date")

# Normalize the data
scaler = MinMaxScaler()
numeric_data = data.drop(columns=["Date"])
scaled_data = scaler.fit_transform(numeric_data)
data[numeric_data.columns] = scaled_data

# Split data into train, validation, and test sets
train_data = data[(data["Date"].dt.year >= 2017) & (data["Date"].dt.year <= 2022)].drop(columns=["Date"]).values
val_data = data[data["Date"].dt.year == 2023].drop(columns=["Date"]).values
test_data = data[data["Date"].dt.year == 2024].drop(columns=["Date"]).values

# Convert data to sequences for time series
def create_sequences(data, sequence_length=30):
    sequences, targets = [], []
    for i in range(len(data) - sequence_length):
        sequences.append(data[i:i + sequence_length])
        targets.append(data[i + sequence_length, 0])  # Assuming target is the first column
    return np.array(sequences), np.array(targets)

sequence_length = 30
X_train, y_train = create_sequences(train_data, sequence_length)
X_val, y_val = create_sequences(val_data, sequence_length)
X_test, y_test = create_sequences(test_data, sequence_length)

# Prepare data loaders
batch_size = 32
train_loader = DataLoader(TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                                        torch.tensor(y_train, dtype=torch.float32)), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(torch.tensor(X_val, dtype=torch.float32),
                                      torch.tensor(y_val, dtype=torch.float32)), batch_size=batch_size, shuffle=False)

def evaluate_model(model, data_loader, device):
    model.eval()
    criterion = nn.MSELoss()
    total_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in data_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            predictions = model(X_batch).squeeze()
            loss = criterion(predictions, y_batch)
            total_loss += loss.item()
    return total_loss / len(data_loader)

# Define the LSTM with Attention model
class LSTMWithAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, dropout):
        super(LSTMWithAttention, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.attention = nn.Linear(hidden_dim, 1)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)  # Output shape: (batch_size, seq_length, hidden_dim)
        attention_weights = torch.softmax(self.attention(lstm_out), dim=1)  # Shape: (batch_size, seq_length, 1)
        context_vector = torch.sum(attention_weights * lstm_out, dim=1)  # Weighted sum: (batch_size, hidden_dim)
        output = self.fc(context_vector)  # Shape: (batch_size, 1)
        return output

def objective(trial):
    # Suggest hyperparameters
    hidden_dim = trial.suggest_int("hidden_dim", 32, 256, step=32)
    num_layers = trial.suggest_int("num_layers", 1, 4)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)
    num_epochs = 20

    # Initialize model
    model = LSTMWithAttention(X_train.shape[2], hidden_dim, num_layers, dropout)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Training setup
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            predictions = model(X_batch).squeeze()
            loss = criterion(predictions, y_batch)
            loss.backward()
            optimizer.step()

    # Evaluate on validation data
    val_loss = evaluate_model(model, val_loader, device)
    return val_loss

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Output the best hyperparameters
print("Best hyperparameters:", study.best_params)


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-01-13 21:07:00,099] A new study created in memory with name: no-name-10a6bbff-abc7-4bc1-a0fd-ca1c271fc8b1
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)
[I 2025-01-13 21:08:09,951] Trial 0 finished with value: 0.00038584197252269155 and parameters: {'hidden_dim': 224, 'num_layers': 3, 'dropout': 0.47543014519211935, 'learning_rate': 0.0004550604683151981}. Best is trial 0 with value: 0.00038584197252269155.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)
[I 2025-01-13 21:09:07,449] Trial 1 finished with value: 0.0003534032472419891 and parameters: {'hidden_dim': 256, 'num_layers': 2, 'dropout': 0.3173461625807401, 'learning_rate': 0.00030857233593809647}. Best is trial 1 with value: 0.0003534032472419891.
[I 2025-01-13 21:10:19,148] Trial 2 finished with value: 0.0003907226002245972 and parameters: {'hidden_dim': 224, 'num_layers': 3, 'dropout': 0.2675322426388319, 'learning_rate'

Best hyperparameters: {'hidden_dim': 224, 'num_layers': 3, 'dropout': 0.4332642026284568, 'learning_rate': 0.0048610015241686315}
