In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Set a random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Create a synthetic multivariate time series dataset
def generate_multivariate_time_series(num_samples, input_dim, output_dim):
    np.random.seed(42)
    data = np.random.randn(num_samples, input_dim + output_dim)
    return data

# Define the dataset class
class TimeSeriesDataset(Dataset):
    def __init__(self, data, input_length, target_length):
        self.data = data
        self.input_length = input_length
        self.target_length = target_length

    def __len__(self):
        return len(self.data) - self.input_length - self.target_length + 1

    def __getitem__(self, idx):
        input_data = self.data[idx:idx + self.input_length, :]
        target_data = self.data[idx + self.input_length:idx + self.input_length + self.target_length, :]
        return torch.FloatTensor(input_data), torch.FloatTensor(target_data)

# Define the PositionalEncoding module
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=1000):
        super(PositionalEncoding, self).__init__()
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(np.log(10000.0) / d_model))
        self.positional_encoding = nn.Parameter(torch.zeros(1, max_len, d_model))
        self.positional_encoding[:, :, 0::2] = torch.sin(position * div_term)
        self.positional_encoding[:, :, 1::2] = torch.cos(position * div_term)

    def forward(self, x):
        return x + self.positional_encoding[:, :x.size(1)].detach()

# Define the Transformer model with positional encoding
class TransformerForecasting(nn.Module):
    def __init__(self, input_dim, output_dim, d_model=64, nhead=2, num_encoder_layers=2):
        super(TransformerForecasting, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        self.positional_encoding = PositionalEncoding(d_model)
        self.transformer = nn.Transformer(d_model, nhead, num_encoder_layers)
        self.linear = nn.Linear(d_model, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        x = self.positional_encoding(x.permute(1, 0, 2))
        x = self.transformer(x)
        x = self.linear(x[-1, :, :])  # Use the last transformer layer output for forecasting
        return x


# Hyperparameters
input_dim = 5  # Number of input features
output_dim = 1  # Number of output features (for univariate time series, set this to 1)
input_length = 10  # Length of input sequence
target_length = 1  # Length of output sequence
batch_size = 32
num_epochs = 50
lr = 0.001

# Generate synthetic data
data = generate_multivariate_time_series(1000, input_dim, output_dim)

# Split the data into training and testing sets
train_size = int(0.8 * len(data))
train_data, test_data = data[:train_size], data[train_size:]

# Normalize the data
scaler = StandardScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)

# Create datasets and dataloaders
train_dataset = TimeSeriesDataset(train_data, input_length, target_length)
test_dataset = TimeSeriesDataset(test_data, input_length, target_length)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Initialize the model, loss function, and optimizer
model = TransformerForecasting(input_dim, output_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs.permute(1, 0, 2))  # Permute to (sequence_length, batch_size, input_dim)
        loss = criterion(outputs, targets[:, 0, :])  # Assume univariate output, adjust if needed
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    average_loss = total_loss / len(train_loader)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}')

# Evaluation
model.eval()
predictions = []
true_values = []
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = model(inputs.permute(1, 0, 2))
        predictions.append(outputs.cpu().numpy())
        true_values.append(targets[:, 0, :].cpu().numpy())

predictions = np.concatenate(predictions, axis=0)
true_values = np.concatenate(true_values, axis=0)

# Inverse transform the data
predictions = scaler.inverse_transform(predictions)
true_values = scaler.inverse_transform(true_values)

# Calculate and print the mean squared error
mse = mean_squared_error(true_values, predictions)
print(f'Mean Squared Error on Test Data: {mse:.4f}')

# Plot the results
plt.plot(true_values, label='True Values')
plt.plot(predictions, label='Predictions')
plt.legend()
plt.show()

RuntimeError: a view of a leaf Variable that requires grad is being used in an in-place operation.