In [None]:
data = pd.read_csv('../data/multistep_regression.csv')
data.head()

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import torch
from torch.utils.data import Dataset, DataLoader

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
data['inflow_scaled'] = scaler.fit_transform(data[['Value']])

# Create sequences for LSTM
class TimeSeriesDataset(Dataset):
    def __init__(self, data, target_column, seq_length):
        self.data = data
        self.target_column = target_column
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, index):
        x = self.data.iloc[index:index + self.seq_length][self.target_column].values
        y = self.data.iloc[index + self.seq_length][self.target_column]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Create dataset
seq_length = 12
dataset = TimeSeriesDataset(data, target_column='inflow_scaled', seq_length=seq_length)

# Split dataset into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
print("Train size:", train_size)
print("Test size:", test_size)
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Use the last output of the sequence
        return out

# Initialize model
input_size = 1
hidden_size = 50
num_layers = 2
output_size = 1

model = LSTMModel(input_size, hidden_size, num_layers, output_size)
print(model)



In [None]:
import torch.optim as optim

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.unsqueeze(-1)  # Add feature dimension
        y_batch = y_batch.unsqueeze(-1)

        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {train_loss/len(train_loader):.4f}")



In [None]:
# Evaluate on the test set
model.eval()
y_test_actual = []
y_test_predicted = []

with torch.no_grad():
    for x_batch, y_batch in test_loader:
        x_batch = x_batch.unsqueeze(-1)
        y_batch = y_batch.unsqueeze(-1)

        predictions = model(x_batch)
        y_test_actual.extend(y_batch.numpy())
        y_test_predicted.extend(predictions.numpy())

# Rescale the predictions back to original values
y_test_actual_rescaled = scaler.inverse_transform(np.array(y_test_actual).reshape(-1, 1))
y_test_predicted_rescaled = scaler.inverse_transform(np.array(y_test_predicted).reshape(-1, 1))

# Calculate evaluation metrics
mae = mean_absolute_error(y_test_actual_rescaled, y_test_predicted_rescaled)
rmse = np.sqrt(mean_squared_error(y_test_actual_rescaled, y_test_predicted_rescaled))

print("\nModel Evaluation Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

# Forecast the next 5 months
model.eval()
last_sequence = torch.tensor(data['inflow_scaled'].values[-seq_length:], dtype=torch.float32).unsqueeze(0).unsqueeze(-1)

future_predictions = []
for _ in range(5):
    with torch.no_grad():
        next_prediction = model(last_sequence).item()
        future_predictions.append(next_prediction)

        # Update the sequence with the predicted value
        last_sequence = torch.cat((last_sequence[:, 1:, :], torch.tensor([[next_prediction]], dtype=torch.float32).unsqueeze(-1)), dim=1)

# Rescale future predictions back to original values
future_predictions_rescaled = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

print("\nForecast for the Next 5 Months:")
print(future_predictions_rescaled.flatten())


In [None]:
import matplotlib.pyplot as plt

# Plot actual vs. predicted on the test set
plt.figure(figsize=(12, 6))
plt.plot(y_test_actual_rescaled, label='Actual', color='green')
plt.plot(y_test_predicted_rescaled, label='Predicted', color='red', linestyle='--')
plt.title('PyTorch LSTM: Actual vs Predicted')
plt.xlabel('Time')
plt.ylabel('Water Inflow')
plt.legend()
plt.grid()
plt.show()

# Plot forecast
plt.figure(figsize=(12, 6))
plt.plot(range(1, 6), future_predictions_rescaled, label='Forecast', color='blue', linestyle='--')
plt.title('PyTorch LSTM Forecast for Next 5 Months')
plt.xlabel('Future Months')
plt.ylabel('Water Inflow')
plt.legend()
plt.grid()
plt.show()
