In [None]:
# Data scaling
scaler = StandardScaler()
scaled_data = scaler.fit_transform(train_df.drop(["target", "ID"], axis=1))

# Prepare cross-validation
n_splits = 5
tscv = TimeSeriesSplit(n_splits=n_splits)
mse_scores = []
mape_scores = []

def mape(y_true, y_pred):
    return np.mean(np.abs((y_true-y_pred) / y_true)) * 100

# Define the PyTorch LSTM model
class BiLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BiLSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, bidirectional=True, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * 2, 50)  # Bidirectional, so 2x hidden_size
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(50, output_size)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc1(lstm_out[:, -1, :])  # Take output from the last time step
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Cross-validation loop
for train_index, val_index in tscv.split(scaled_data):
    x_train_fold, x_valid_fold = scaled_data[train_index], scaled_data[val_index]
    y_train_fold, y_valid_fold = train_df["target"].values[train_index], train_df["target"].values[val_index]

    # Reshaping data for LSTM (samples, timesteps, features)
    x_train_fold = np.reshape(x_train_fold, (x_train_fold.shape[0], 1, x_train_fold.shape[1]))
    x_valid_fold = np.reshape(x_valid_fold, (x_valid_fold.shape[0], 1, x_valid_fold.shape[1]))

    # Convert to PyTorch tensors
    x_train_fold = torch.tensor(x_train_fold, dtype=torch.float32)
    x_valid_fold = torch.tensor(x_valid_fold, dtype=torch.float32)
    y_train_fold = torch.tensor(y_train_fold, dtype=torch.float32).unsqueeze(1)
    y_valid_fold = torch.tensor(y_valid_fold, dtype=torch.float32).unsqueeze(1)

    # Initialize model, loss function, and optimizer
    input_size = x_train_fold.shape[2]
    hidden_size = 50
    output_size = 1
    model = BiLSTMModel(input_size, hidden_size, output_size)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    num_epochs = 30
    batch_size = 32
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        y_train_pred = model(x_train_fold)
        loss = criterion(y_train_pred, y_train_fold)
        loss.backward()
        optimizer.step()

    # Prediction on validation set
    model.eval()
    with torch.no_grad():
        y_valid_pred_fold = model(x_valid_fold).numpy()
        y_train_pred_fold = model(x_train_fold).numpy()

    # Calculate Mean Squared Error
    mse_fold = mean_squared_error(y_valid_fold, y_valid_pred_fold)
    mse_scores.append(mse_fold)
    mape_scores.append(mape(y_train_fold.numpy(), y_train_pred_fold))

    print(mape(y_train_fold.numpy(), y_train_pred_fold))
    print(f"Fold MSE: {mse_fold}")

# Average MSE across all folds
average_mse = np.mean(mse_scores)
average_mape = np.mean(mape_scores)
print(f"Average Bidirectional LSTM Mean Squared Error: {average_mse}")