In [1]:
%pip install torch numpy pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from xlstm import xLSTM

# Load and preprocess the Air Passengers dataset
data = pd.read_csv('AirPassengers.csv')
passengers = data['#Passengers'].values.reshape(-1, 1)

scaler = MinMaxScaler(feature_range=(-1, 1))
passengers_norm = scaler.fit_transform(passengers)

# Create sequences
seq_length = 12
x = []
y = []
for i in range(len(passengers_norm) - seq_length):
    x.append(passengers_norm[i:i+seq_length])
    y.append(passengers_norm[i+seq_length])
x = np.array(x)
y = np.array(y)

# Split the data into train and test sets
train_size = int(len(x) * 0.8)
x_train, y_train = x[:train_size], y[:train_size]
x_test, y_test = x[train_size:], y[train_size:]

# Convert data to PyTorch tensors
x_train_tensor = torch.FloatTensor(x_train)
y_train_tensor = torch.FloatTensor(y_train)
x_test_tensor = torch.FloatTensor(x_test)
y_test_tensor = torch.FloatTensor(y_test)

# Create DataLoader
batch_size = 32
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize the model
input_size = 1
hidden_size = 64
num_heads = 1
layer_order = ['m', 's', 'm']
num_copies = 1

model = xLSTM(input_size, hidden_size, num_heads, layer_order, num_copies)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    for batch_x, batch_y in train_loader:
        batch_x = batch_x.unsqueeze(-1)
        batch_y = batch_y.unsqueeze(-1)

        h_prev = torch.zeros(batch_size, hidden_size)
        c_prev = torch.zeros(batch_size, num_heads, hidden_size // num_heads, hidden_size // num_heads)
        n_prev = torch.ones(batch_size, num_heads, hidden_size // num_heads)
        m_prev = torch.zeros(batch_size, num_heads)

        optimizer.zero_grad()
        outputs = []
        for t in range(seq_length):
            output, h_prev, c_prev, n_prev, m_prev = model(batch_x[:, t], h_prev, c_prev, n_prev, m_prev)
            outputs.append(output)

        outputs = torch.stack(outputs, dim=1).squeeze(-1)
        loss = criterion(outputs[:, -1], batch_y.squeeze(-1))
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    h_prev = torch.zeros(x_test_tensor.size(0), hidden_size)
    c_prev = torch.zeros(x_test_tensor.size(0), num_heads, hidden_size // num_heads, hidden_size // num_heads)
    n_prev = torch.ones(x_test_tensor.size(0), num_heads, hidden_size // num_heads)
    m_prev = torch.zeros(x_test_tensor.size(0), num_heads)

    outputs = []
    for t in range(seq_length):
        output, h_prev, c_prev, n_prev, m_prev = model(x_test_tensor[:, t].unsqueeze(-1), h_prev, c_prev, n_prev, m_prev)
        outputs.append(output)

    outputs = torch.stack(outputs, dim=1).squeeze(-1)
    test_loss = criterion(outputs[:, -1], y_test_tensor)
    print(f"Test Loss: {test_loss.item():.4f}")

# Inverse transform the predictions and actual values
predicted = scaler.inverse_transform(outputs[:, -1].detach().numpy().reshape(-1, 1))
actual = scaler.inverse_transform(y_test_tensor.numpy().reshape(-1, 1))

# Print the first few predictions and actual values
print("Predictions:", predicted[:5])
print("Actual:", actual[:5])

RuntimeError: Expected 2D (unbatched) or 3D (batched) input to conv1d, but got input of size: [32, 1, 1, 2]