In [1]:
# -----------------------------
# 📦 Import
# -----------------------------
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

In [2]:
# -----------------------------
# ⚙️ Config
# -----------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
WINDOW_SIZE = 30
FORECAST_HORIZON = 7
BATCH_SIZE = 64
EPOCHS = 20
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

In [3]:
# -----------------------------
# 📥 Caricamento dati
# -----------------------------
df = pd.read_csv("train.csv", parse_dates=["date"])

In [4]:
# -----------------------------
# 🧹 Feature engineering
# -----------------------------
df["day"] = df["date"].dt.day
df["month"] = df["date"].dt.month
df["weekday"] = df["date"].dt.weekday
df["is_weekend"] = df["weekday"].isin([5, 6]).astype(int)

# Encode categoriali
for col in ["store_nbr", "family"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

# Scaling delle feature continue
scaler = StandardScaler()
df[["dcoilwtico", "sales"]] = scaler.fit_transform(df[["dcoilwtico", "sales"]])

In [5]:
# -----------------------------
# 🪄 Creazione sliding windows
# -----------------------------
features = [
    "store_nbr", "family", "onpromotion",
    "dcoilwtico", "is_holiday", "day", "month",
    "weekday", "is_weekend"
]
target_col = "sales"
INPUT_SIZE = len(features)

series_data = []
for _, group in df.groupby(["store_nbr", "family"]):
    group = group.sort_values("date")
    values = group[features + [target_col]].values.astype(np.float32)
    for i in range(WINDOW_SIZE, len(values) - FORECAST_HORIZON):
        x = values[i - WINDOW_SIZE:i, :-1]
        y = values[i:i + FORECAST_HORIZON, -1]
        series_data.append((x, y))

In [6]:
# -----------------------------
# 🎒 Dataset e Dataloader
# -----------------------------
class SalesDataset(Dataset):
    def __init__(self, data):
        self.data = data
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        x, y = self.data[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

train_data, val_data = train_test_split(series_data, test_size=0.1, random_state=SEED)
train_loader = DataLoader(SalesDataset(train_data), batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(SalesDataset(val_data), batch_size=BATCH_SIZE)

In [7]:
# -----------------------------
# 🤖 Modello Transformer
# -----------------------------
class TransformerForecaster(nn.Module):
    def __init__(self, input_size, window_size, forecast_horizon, d_model=64, num_layers=2, nhead=4, dim_feedforward=128, dropout=0.1):
        super().__init__()
        self.input_projection = nn.Linear(input_size, d_model)
        self.positional_encoding = nn.Parameter(torch.randn(1, window_size, d_model))
        encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.decoder = nn.Linear(d_model, forecast_horizon)
    def forward(self, x):
        x = self.input_projection(x) + self.positional_encoding[:, :x.size(1), :]
        x = self.encoder(x)
        x = x.mean(dim=1)
        return self.decoder(x)

model = TransformerForecaster(INPUT_SIZE, WINDOW_SIZE, FORECAST_HORIZON).to(DEVICE)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [8]:
# -----------------------------
# 🔁 Training loop
# -----------------------------
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(DEVICE), y_batch.to(DEVICE)
        optimizer.zero_grad()
        output = model(x_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(DEVICE), y_val.to(DEVICE)
            pred = model(x_val)
            loss = criterion(pred, y_val)
            val_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f}")

Epoch 1/20 - Train Loss: 17457.0551 - Val Loss: 1762.3602
Epoch 2/20 - Train Loss: 14165.1288 - Val Loss: 1924.5076
Epoch 3/20 - Train Loss: 13196.1762 - Val Loss: 1647.1730
Epoch 4/20 - Train Loss: 13184.3209 - Val Loss: 1602.4302
Epoch 5/20 - Train Loss: 12397.5121 - Val Loss: 1489.7200
Epoch 6/20 - Train Loss: 11872.3400 - Val Loss: 1496.3551
Epoch 7/20 - Train Loss: 11496.3238 - Val Loss: 1538.3543
Epoch 8/20 - Train Loss: 11539.5436 - Val Loss: 1771.2255
Epoch 9/20 - Train Loss: 11077.3895 - Val Loss: 1988.9432
Epoch 10/20 - Train Loss: 10798.9615 - Val Loss: 1831.0463
Epoch 11/20 - Train Loss: 10850.6419 - Val Loss: 1826.7535
Epoch 12/20 - Train Loss: 10562.2819 - Val Loss: 1900.8062


KeyboardInterrupt: 