In [1]:
!pip uninstall -y lightning lightning-pytorch lightning-fabric pytorch-lightning pytorch-forecasting 

!pip install --no-cache-dir torch 

!pip install --no-cache-dir "pytorch-lightning<2.0" 

!pip install --no-cache-dir pytorch-forecasting==1.4.0 

[0mFound existing installation: pytorch-lightning 2.5.1.post0
Uninstalling pytorch-lightning-2.5.1.post0:
  Successfully uninstalled pytorch-lightning-2.5.1.post0
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)
  Downloading nvidia_cusparse_cu12-12.3.

In [2]:
import torch
import pandas as pd
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import RMSE

In [3]:
DAILY_PATH = "/kaggle/input/freshretailnet-50k-daily-imputed/daily_df_imputed.parquet"
df = pd.read_parquet(DAILY_PATH)
df["dt"] = pd.to_datetime(df["dt"])
df["third_category_id"]   = df["third_category_id"].astype(str)
df["store_id"]            = df["store_id"].astype(str)
df["management_group_id"] = df["management_group_id"].astype(str)
df["time_idx"] = (df["dt"] - df["dt"].min()).dt.days

In [4]:
# ❶ Convert category to string so TFT treats it as categorical
df["third_category_id"] = df["third_category_id"].astype(str)

# ❷ Sort and rebuild time_idx per group so no series has a huge time index
df = df.sort_values(["third_category_id", "dt"]).reset_index(drop=True)
df["time_idx"] = (
    df
    .groupby("third_category_id")
    .cumcount()    # 0,1,2,...
    .add(1)        # → 1,2,3,... per category
)

# ❸ Now cut off the last 7 days for validation
max_encoder_length    = 28
max_prediction_length = 7
training_cutoff       = df["time_idx"].max() - max_prediction_length

# ❹ Build the dataset
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer

tft_dataset = TimeSeriesDataSet(
    df[df["time_idx"] <= training_cutoff],
    time_idx="time_idx",
    target="daily_sale_imputed",
    group_ids=["third_category_id"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["third_category_id"],
    time_varying_known_reals=["time_idx", "discount", "oos_hours_total", "holiday_flag"],
    time_varying_unknown_reals=["daily_sale_imputed"],
    target_normalizer=GroupNormalizer(groups=["third_category_id"], transformation="softplus"),
    allow_missing_timesteps=True
)

print("✅ TFT dataset created:")
print(f"  • Number of series: {len(tft_dataset.group_ids)}")
print(f"  • Encoder length: {max_encoder_length}")
print(f"  • Prediction length: {max_prediction_length}")

✅ TFT dataset created:
  • Number of series: 1
  • Encoder length: 28
  • Prediction length: 7


In [5]:
# # from torch.utils.data import DataLoader

# # batch_size = 32

# # Cell 5 — small batch, no workers, no pinning
# train_dataloader = tft_dataset.to_dataloader(
#     train=True, batch_size=8, num_workers=0, pin_memory=False
# )
# val_dataloader = tft_dataset.to_dataloader(
#     train=False, batch_size=8, num_workers=0, pin_memory=False
# )

In [6]:
# import torch
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_forecasting import TemporalFusionTransformer
from pytorch_forecasting.metrics import RMSE
# # callbacks
# early_stop = EarlyStopping(monitor="val_loss", patience=5, mode="min")
# lr_logger  = LearningRateMonitor(logging_interval="step")

# # model — tiny TFT
tft = TemporalFusionTransformer.from_dataset(
    tft_dataset,
    learning_rate=3e-3,
    hidden_size=4,             # half again
    attention_head_size=1,     # single head
    hidden_continuous_size=2,  # minimal
    output_size=1,
    loss=RMSE(),
    log_interval=10,
    reduce_on_plateau_patience=3
)

# trainer = Trainer(
#     max_epochs=30,
#     accelerator="cpu",
#     devices=1,
#     callbacks=[early_stop, lr_logger],
#     log_every_n_steps=10,
# )
# trainer.fit(tft, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

/usr/local/lib/python3.11/dist-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/usr/local/lib/python3.11/dist-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.


In [None]:
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor

# 1️⃣ Create dataloaders
train_dataloader = tft_dataset.to_dataloader(train=True,  batch_size=64, num_workers=12)
val_dataloader   = tft_dataset.to_dataloader(train=False, batch_size=64, num_workers=12)

# 2️⃣ Callbacks
early_stop = EarlyStopping(monitor="val_loss", patience=5, mode="min")
lr_logger  = LearningRateMonitor(logging_interval="step")

# 3️⃣ Trainer on GPU
trainer = Trainer(
    max_epochs=30,
    accelerator="gpu",    # force GPU
    devices=1,            # number of GPUs
    callbacks=[early_stop, lr_logger],
    log_every_n_steps=10,
    accumulate_grad_batches=2
)

# 4️⃣ Fit
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader
)

INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
2025-07-06 00:39:15.023142: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751762355.250854      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751762355.318680      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
   | Name                            

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]