In [None]:
from sklearn.model_selection import KFold
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [None]:
# Load in data
data = torch.load('/content/preprocessed_data.pt')
X_train = data['X_train']
y_train = data['y_train']
X_val = data['X_val']
y_val = data['y_val']
X_test = data['X_test']
y_test = data['y_test']
sample_weights = data['sample_weights']

In [None]:

# Your existing data (already tensors)
X = X_train  # full training set (not split yet)
y = y_train

# Define dataset wrapper
class SequenceDataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Model definition
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=1, bidirectional=True, dropout_p=0.2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # last time step
        out = self.fc(out)
        return out.squeeze()

# Grid search parameters
param_grid = {
    'hidden_size': [32, 64],
    'num_layers': [1, 2],
    'lr': [0.01, 0.001, 0.0001]
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = X.shape[2]
k_folds = 3
best_params = None
best_loss = float('inf')

kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

# Grid Search loop
for hidden_size in param_grid['hidden_size']:
    for num_layers in param_grid['num_layers']:
        for lr in param_grid['lr']:
            val_losses = []

            for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
                model = LSTMModel(input_size, hidden_size, num_layers).to(device)
                optimizer = torch.optim.Adam(model.parameters(), lr=lr)
                criterion = nn.MSELoss()

                X_train_fold = X[train_idx]
                y_train_fold = y[train_idx]
                X_val_fold = X[val_idx]
                y_val_fold = y[val_idx]

                train_loader = DataLoader(SequenceDataset(X_train_fold, y_train_fold), batch_size=32, shuffle=True)
                val_loader = DataLoader(SequenceDataset(X_val_fold, y_val_fold), batch_size=32)

                # Training loop for small number of epochs
                for epoch in range(50):  # keep small to speed up tuning
                    model.train()
                    for X_batch, y_batch in train_loader:
                        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                        optimizer.zero_grad()
                        outputs = model(X_batch)
                        loss = criterion(outputs, y_batch)
                        loss.backward()
                        optimizer.step()

                # Evaluate
                model.eval()
                fold_val_loss = 0.0
                with torch.no_grad():
                    for X_batch, y_batch in val_loader:
                        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                        outputs = model(X_batch)
                        loss = criterion(outputs, y_batch)
                        fold_val_loss += loss.item() * X_batch.size(0)
                fold_val_loss /= len(val_loader.dataset)
                val_losses.append(fold_val_loss)

            avg_loss = np.mean(val_losses)
            print(f"Params: hidden={hidden_size}, layers={num_layers}, lr={lr} -> Val Loss: {avg_loss:.4f}")

            if avg_loss < best_loss:
                best_loss = avg_loss
                best_params = {
                    'hidden_size': hidden_size,
                    'num_layers': num_layers,
                    'lr': lr
                }

print(f"\nBest Params: {best_params} with average CV loss {best_loss:.4f}")
