<a href="https://colab.research.google.com/github/ekvirika/WalmartRecruiting/blob/main/notebooks/model_experiment_tft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [55]:
# Install required packages
!pip install -q wandb torch torchvision pandas numpy matplotlib seaborn scikit-learn mlflow pytorch_lightning pytorch_forecasting mlflow

# Set up Kaggle API
!pip install -q kaggle pytorch_forecasting pytorch_lightning dagshub

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/261.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.9/139.9 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m76.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.2/85.2 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.3/74.3 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
# Upload your kaggle.json to Colab and run:
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

In [5]:
# Download the dataset
!kaggle competitions download -c walmart-recruiting-store-sales-forecasting
!unzip -q walmart-recruiting-store-sales-forecasting.zip

walmart-recruiting-store-sales-forecasting.zip: Skipping, found more recently modified local copy (use --force to force download)
replace features.csv.zip? [y]es, [n]o, [A]ll, [N]one, [r]ename: A


In [6]:
!unzip -q train.csv.zip
!unzip -q stores.csv.zip
!unzip -q test.csv.zip
!unzip -q features.csv.zip

replace train.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
unzip:  cannot find or open stores.csv.zip, stores.csv.zip.zip or stores.csv.zip.ZIP.
replace test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
replace features.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A


In [7]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

# Deep Learning Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import MLFlowLogger

# Time Series Libraries
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, MAE, RMSE
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# MLflow for experiment tracking
import mlflow
import mlflow.pytorch
import mlflow.sklearn
from mlflow.models.signature import infer_signature
import joblib
import os

In [8]:
# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
pl.seed_everything(42)

print("All libraries imported successfully!")


INFO:lightning_fabric.utilities.seed:Global seed set to 42


All libraries imported successfully!


In [9]:
# MLflow Experiment Setup
mlflow.set_tracking_uri("sqlite:///mlflow.db")
experiment_name = "TFT_Training"
mlflow.set_experiment(experiment_name)

print(f"MLflow experiment '{experiment_name}' is ready!")

2025/07/07 19:11:35 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/07/07 19:11:35 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.


MLflow experiment 'TFT_Training' is ready!


In [10]:
# Data Loading and Initial Exploration
def load_data():
    """Load and explore the Walmart dataset"""
    train_df = pd.read_csv('train.csv')
    test_df = pd.read_csv('test.csv')
    stores_df = pd.read_csv('stores.csv')
    features_df = pd.read_csv('features.csv')

    print("Dataset shapes:")
    print(f"Train: {train_df.shape}")
    print(f"Test: {test_df.shape}")
    print(f"Stores: {stores_df.shape}")
    print(f"Features: {features_df.shape}")

    return train_df, test_df, stores_df, features_df

# Load Data

In [11]:
# Load data
train_df, test_df, stores_df, features_df = load_data()
# Display basic info about the datasets
print("\nTrain dataset info:")
print(train_df.info())
print(f"\nTrain dataset head:\n{train_df.head()}")

print("\nTest dataset info:")
print(test_df.info())
print(f"\nTest dataset head:\n{test_df.head()}")

Dataset shapes:
Train: (421570, 5)
Test: (115064, 4)
Stores: (45, 3)
Features: (8190, 12)

Train dataset info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 421570 entries, 0 to 421569
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Store         421570 non-null  int64  
 1   Dept          421570 non-null  int64  
 2   Date          421570 non-null  object 
 3   Weekly_Sales  421570 non-null  float64
 4   IsHoliday     421570 non-null  bool   
dtypes: bool(1), float64(1), int64(2), object(1)
memory usage: 13.3+ MB
None

Train dataset head:
   Store  Dept        Date  Weekly_Sales  IsHoliday
0      1     1  2010-02-05      24924.50      False
1      1     1  2010-02-12      46039.49       True
2      1     1  2010-02-19      41595.55      False
3      1     1  2010-02-26      19403.54      False
4      1     1  2010-03-05      21827.90      False

Test dataset info:
<class 'pandas.core.frame.DataFrame'>
Ran

# MLflow Run: Data Cleaning and Preprocessing


In [12]:
with mlflow.start_run(run_name="TFT_Data_Cleaning"):
    print("Starting data cleaning and preprocessing...")

    # Log parameters
    mlflow.log_param("train_shape", train_df.shape)
    mlflow.log_param("test_shape", test_df.shape)

    # Data cleaning function
    def clean_data(df):
        """Clean the dataset"""
        # Convert Date to datetime
        df['Date'] = pd.to_datetime(df['Date'])

        # Handle missing values
        missing_before = df.isnull().sum().sum()

        # Fill missing values with appropriate methods
        if 'Weekly_Sales' in df.columns:
            # For training data
            df['Weekly_Sales'].fillna(df['Weekly_Sales'].median(), inplace=True)

        missing_after = df.isnull().sum().sum()

        print(f"Missing values before cleaning: {missing_before}")
        print(f"Missing values after cleaning: {missing_after}")

        return df, missing_before, missing_after

    # Clean training data
    train_df, missing_before_train, missing_after_train = clean_data(train_df)

    # Clean test data
    test_df, missing_before_test, missing_after_test = clean_data(test_df)

    # Log cleaning metrics
    mlflow.log_metric("missing_before_train", missing_before_train)
    mlflow.log_metric("missing_after_train", missing_after_train)
    mlflow.log_metric("missing_before_test", missing_before_test)
    mlflow.log_metric("missing_after_test", missing_after_test)

    print("Data cleaning completed!")



Starting data cleaning and preprocessing...
Missing values before cleaning: 0
Missing values after cleaning: 0
Missing values before cleaning: 0
Missing values after cleaning: 0
Data cleaning completed!


# Feature Engineering


In [13]:
import pandas as pd
import numpy as np
import mlflow
from sklearn.preprocessing import LabelEncoder

with mlflow.start_run(run_name="TFT_Feature_Engineering"):
    print("Starting feature engineering...")

    # --- Ensure Date columns are datetime ---
    train_df['Date'] = pd.to_datetime(train_df['Date'])
    test_df['Date'] = pd.to_datetime(test_df['Date'])
    features_df['Date'] = pd.to_datetime(features_df['Date'])
    stores_df = stores_df.copy()  # in case you want to modify safely

    features_df = features_df.drop(columns=['IsHoliday'], errors='ignore')

    # --- Feature engineering function ---
    def engineer_features(df):
        df['Year'] = df['Date'].dt.year
        df['Month'] = df['Date'].dt.month
        df['Week'] = df['Date'].dt.isocalendar().week
        df['Day'] = df['Date'].dt.day
        df['DayOfWeek'] = df['Date'].dt.dayofweek
        df['Quarter'] = df['Date'].dt.quarter
        df['IsHoliday'] = df['IsHoliday'].astype(int)
        df = df.sort_values(['Store', 'Date']).reset_index(drop=True)
        return df

    # --- Apply feature engineering ---
    train_df = engineer_features(train_df)
    test_df = engineer_features(test_df)

    # --- Handle overlapping columns more carefully ---
    # First, identify what columns exist in each dataframe
    print("Available columns:")
    print(f"  train_df: {list(train_df.columns)}")
    print(f"  stores_df: {list(stores_df.columns)}")
    print(f"  features_df: {list(features_df.columns)}")

    # Check what columns overlap between train_df and stores_df
    stores_overlap = [col for col in stores_df.columns if col in train_df.columns and col != 'Store']
    print(f"Overlapping columns with stores_df: {stores_overlap}")

    # Only drop columns that actually exist in both and cause conflicts
    # Keep Type and Size from stores_df by dropping them from train_df if they exist
    if 'Type' in train_df.columns and 'Type' in stores_df.columns:
        train_df = train_df.drop(columns=['Type'], errors='ignore')
        test_df = test_df.drop(columns=['Type'], errors='ignore')
    if 'Size' in train_df.columns and 'Size' in stores_df.columns:
        train_df = train_df.drop(columns=['Size'], errors='ignore')
        test_df = test_df.drop(columns=['Size'], errors='ignore')

    # --- Merge with stores data ---
    train_df = train_df.merge(stores_df, on='Store', how='left')
    test_df = test_df.merge(stores_df, on='Store', how='left')

    # Check for overlapping columns with features_df and drop them from train/test
    features_overlap = [col for col in features_df.columns if col in train_df.columns and col not in ['Store', 'Date']]
    if features_overlap:
        print(f"Dropping overlapping columns before features merge: {features_overlap}")
        train_df = train_df.drop(columns=features_overlap, errors='ignore')
        test_df = test_df.drop(columns=features_overlap, errors='ignore')

    # --- Merge with features data ---
    train_df = train_df.merge(features_df, on=['Store', 'Date'], how='left')
    test_df = test_df.merge(features_df, on=['Store', 'Date'], how='left')

    # --- Encode categorical variables ---
    # Check if Type column exists before encoding
    if 'Type' in train_df.columns:
        le_type = LabelEncoder()
        train_df['Type_encoded'] = le_type.fit_transform(train_df['Type'])
        test_df['Type_encoded'] = le_type.transform(test_df['Type'])
    else:
        print("Warning: 'Type' column not found in dataframes after merging")

    # --- Fill missing values in numerical columns (handle train and test separately) ---
    # Get numeric columns for each dataset separately
    train_numeric_cols = train_df.select_dtypes(include=[np.number]).columns
    test_numeric_cols = test_df.select_dtypes(include=[np.number]).columns

    # Fill missing values using medians from training data
    train_df[train_numeric_cols] = train_df[train_numeric_cols].fillna(train_df[train_numeric_cols].median())

    # For test data, use training data medians for common columns, test data medians for test-only columns
    for col in test_numeric_cols:
        if col in train_numeric_cols:
            # Use training data median for consistency
            test_df[col] = test_df[col].fillna(train_df[col].median())
        else:
            # Use test data median for columns not in training data
            test_df[col] = test_df[col].fillna(test_df[col].median())

    # --- Log to MLflow ---
    mlflow.log_param("features_after_engineering", len(train_df.columns))
    mlflow.log_param("time_features_added", 6)
    mlflow.log_param("train_numeric_cols", len(train_numeric_cols))
    mlflow.log_param("test_numeric_cols", len(test_numeric_cols))

    print(f"Feature engineering completed!")
    print(f"Train shape: {train_df.shape}")
    print(f"Test shape: {test_df.shape}")
    print(f"Train columns: {list(train_df.columns)}")
    print(f"Test columns: {list(test_df.columns)}")

Starting feature engineering...
Available columns:
  train_df: ['Store', 'Dept', 'Date', 'Weekly_Sales', 'IsHoliday', 'Year', 'Month', 'Week', 'Day', 'DayOfWeek', 'Quarter']
  stores_df: ['Store', 'Type', 'Size']
  features_df: ['Store', 'Date', 'Temperature', 'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5', 'CPI', 'Unemployment']
Overlapping columns with stores_df: []
Feature engineering completed!
Train shape: (421570, 23)
Test shape: (115064, 22)
Train columns: ['Store', 'Dept', 'Date', 'Weekly_Sales', 'IsHoliday', 'Year', 'Month', 'Week', 'Day', 'DayOfWeek', 'Quarter', 'Type', 'Size', 'Temperature', 'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5', 'CPI', 'Unemployment', 'Type_encoded']
Test columns: ['Store', 'Dept', 'Date', 'IsHoliday', 'Year', 'Month', 'Week', 'Day', 'DayOfWeek', 'Quarter', 'Type', 'Size', 'Temperature', 'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5', 'CPI', 'Unemployment',

In [14]:
# Prepare data for TFT
with mlflow.start_run(run_name="TFT_Data_Preparation"):
    print("Preparing data for TFT...")

    # Create time index
    train_df['time_idx'] = (train_df['Date'] - train_df['Date'].min()).dt.days
    test_df['time_idx'] = (test_df['Date'] - train_df['Date'].min()).dt.days

    # Define the features for TFT
    static_categoricals = ['Store', 'Type_encoded']
    static_reals = ['Size']
    time_varying_known_categoricals = ['IsHoliday', 'Month', 'Quarter', 'DayOfWeek']
    time_varying_known_reals = ['time_idx']
    time_varying_unknown_reals = ['Weekly_Sales']

    # Add external features if available
    external_features = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
    available_external = [col for col in external_features if col in train_df.columns]
    time_varying_known_reals.extend(available_external)

    # Create target variable
    target = 'Weekly_Sales'

    # Split data for validation
    max_prediction_length = 12  # 12 weeks ahead
    max_encoder_length = 52     # Use 52 weeks of history

    # Calculate cutoff for validation
    cutoff = train_df['time_idx'].max() - max_prediction_length

    # Create training and validation sets
    training_data = train_df[train_df['time_idx'] <= cutoff]
    validation_data = train_df[train_df['time_idx'] > cutoff]

    print(f"Training data shape: {training_data.shape}")
    print(f"Validation data shape: {validation_data.shape}")

    # Log data preparation parameters
    mlflow.log_param("max_prediction_length", max_prediction_length)
    mlflow.log_param("max_encoder_length", max_encoder_length)
    mlflow.log_param("training_samples", len(training_data))
    mlflow.log_param("validation_samples", len(validation_data))

    print("Data preparation completed!")

Preparing data for TFT...
Training data shape: (415661, 24)
Validation data shape: (5909, 24)
Data preparation completed!


# Create TFT Dataset


In [15]:
with mlflow.start_run(run_name="TFT_Dataset_Creation"):
    print("Creating TFT dataset...")

    # Convert Store to string type for categorical handling
    train_df['Store'] = train_df['Store'].astype(str)

    # Also convert any other categorical columns that might be numeric
    for col in static_categoricals + time_varying_known_categoricals:
        if col in train_df.columns:
            train_df[col] = train_df[col].astype(str)

    # Handle missing values in target variable
    print(f"Missing values in {target} before handling: {train_df[target].isna().sum()}")

    # Option 1: Fill missing target values with forward fill then backward fill
    train_df[target] = train_df.groupby('Store')[target].fillna(method='ffill').fillna(method='bfill')

    # Option 2: If still missing, fill with store-specific median
    train_df[target] = train_df.groupby('Store')[target].fillna(train_df.groupby('Store')[target].transform('median'))

    # Option 3: If still missing, fill with overall median
    train_df[target] = train_df[target].fillna(train_df[target].median())

    print(f"Missing values in {target} after handling: {train_df[target].isna().sum()}")

    # Check for infinite values and handle them
    inf_mask = np.isinf(train_df[target])
    if inf_mask.any():
        print(f"Found {inf_mask.sum()} infinite values in {target}, replacing with median")
        train_df.loc[inf_mask, target] = train_df[target].median()

    # Final check for any remaining problematic values
    print(f"Final check - NaN: {train_df[target].isna().sum()}, Inf: {np.isinf(train_df[target]).sum()}")

    # Create the filtered dataset for training
    train_subset = train_df[train_df['time_idx'] <= cutoff].copy()
    print(f"Training subset shape: {train_subset.shape}")
    print(f"Missing values in {target} in training subset: {train_subset[target].isna().sum()}")

    # Handle missing values in the training subset
    if train_subset[target].isna().sum() > 0:
        print("Handling missing values in training subset...")
        # Fill missing values in the training subset
        train_subset[target] = train_subset.groupby('Store')[target].fillna(method='ffill').fillna(method='bfill')
        train_subset[target] = train_subset.groupby('Store')[target].fillna(train_subset.groupby('Store')[target].transform('median'))
        train_subset[target] = train_subset[target].fillna(train_subset[target].median())

        # Handle infinite values
        inf_mask = np.isinf(train_subset[target])
        if inf_mask.any():
            print(f"Found {inf_mask.sum()} infinite values in training subset, replacing with median")
            train_subset.loc[inf_mask, target] = train_subset[target].median()

    print(f"Final training subset check - NaN: {train_subset[target].isna().sum()}, Inf: {np.isinf(train_subset[target]).sum()}")

    # Additional debugging - check all columns for missing values
    print("Checking all columns for missing values:")
    for col in train_subset.columns:
        missing_count = train_subset[col].isna().sum()
        if missing_count > 0:
            print(f"  {col}: {missing_count} missing values")

    # Check for any problematic values in all numeric columns
    numeric_cols = train_subset.select_dtypes(include=[np.number]).columns
    for col in numeric_cols:
        inf_count = np.isinf(train_subset[col]).sum()
        if inf_count > 0:
            print(f"  {col}: {inf_count} infinite values")
            train_subset[col] = train_subset[col].replace([np.inf, -np.inf], train_subset[col].median())

    # Fill any remaining missing values in all columns
    print("Filling any remaining missing values in all columns...")
    for col in train_subset.columns:
        if train_subset[col].isna().sum() > 0:
            if train_subset[col].dtype == 'object':
                # For categorical columns, fill with mode
                train_subset[col] = train_subset[col].fillna(train_subset[col].mode()[0] if len(train_subset[col].mode()) > 0 else 'Unknown')
            else:
                # For numeric columns, fill with median
                train_subset[col] = train_subset[col].fillna(train_subset[col].median())

    print("Final check of all columns after comprehensive cleaning:")
    total_missing = train_subset.isna().sum().sum()
    print(f"Total missing values across all columns: {total_missing}")

    # Debug: Check the actual values in Weekly_Sales
    print(f"Weekly_Sales statistics:")
    print(f"  Min: {train_subset[target].min()}")
    print(f"  Max: {train_subset[target].max()}")
    print(f"  Mean: {train_subset[target].mean()}")
    print(f"  Unique values with potential issues: {train_subset[target][train_subset[target] <= 0].count()}")

    # Handle edge cases that might cause issues with GroupNormalizer
    if (train_subset[target] <= 0).any():
        print("Found non-positive values in Weekly_Sales, adjusting for GroupNormalizer...")
        # Add a small constant to ensure all values are positive for softplus transformation
        min_val = train_subset[target].min()
        if min_val <= 0:
            train_subset[target] = train_subset[target] + abs(min_val) + 1

    # Try with a simpler normalizer first
    from pytorch_forecasting.data.encoders import EncoderNormalizer

    # Create the dataset with a simpler normalizer
    training_dataset = TimeSeriesDataSet(
        train_subset,
        time_idx='time_idx',
        target=target,
        group_ids=['Store'],
        min_encoder_length=max_encoder_length // 2,
        max_encoder_length=max_encoder_length,
        min_prediction_length=1,
        max_prediction_length=max_prediction_length,
        static_categoricals=static_categoricals,
        static_reals=static_reals,
        time_varying_known_categoricals=time_varying_known_categoricals,
        time_varying_known_reals=time_varying_known_reals,
        time_varying_unknown_reals=time_varying_unknown_reals,
        target_normalizer=EncoderNormalizer(),  # Use simpler normalizer
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        allow_missing_timesteps=True,
    )

    # Create validation dataset
    validation_dataset = TimeSeriesDataSet.from_dataset(
        training_dataset,
        train_df,
        predict=True,
        stop_randomization=True
    )

    # Create dataloaders
    batch_size = 128
    train_dataloader = training_dataset.to_dataloader(
        train=True,
        batch_size=batch_size,
        num_workers=0
    )
    val_dataloader = validation_dataset.to_dataloader(
        train=False,
        batch_size=batch_size,
        num_workers=0
    )

    print(f"Training dataset size: {len(training_dataset)}")
    print(f"Validation dataset size: {len(validation_dataset)}")

    # Log dataset parameters
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("train_dataset_size", len(training_dataset))
    mlflow.log_param("val_dataset_size", len(validation_dataset))

    print("Dataset creation completed!")

Creating TFT dataset...
Missing values in Weekly_Sales before handling: 0
Missing values in Weekly_Sales after handling: 0
Final check - NaN: 0, Inf: 0
Training subset shape: (415661, 24)
Missing values in Weekly_Sales in training subset: 0
Final training subset check - NaN: 0, Inf: 0
Checking all columns for missing values:
Filling any remaining missing values in all columns...
Final check of all columns after comprehensive cleaning:
Total missing values across all columns: 0
Weekly_Sales statistics:
  Min: -4988.94
  Max: 693099.36
  Mean: 15990.320141293021
  Unique values with potential issues: 1347
Found non-positive values in Weekly_Sales, adjusting for GroupNormalizer...
Training dataset size: 798711
Validation dataset size: 45
Dataset creation completed!


# Model Training


In [54]:
import lightning.pytorch as pl  # Fixed import
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from lightning.pytorch.loggers import MLFlowLogger

with mlflow.start_run(run_name="TFT_Model_Training"):
    print("Starting TFT model training...")

    # Enable MLflow auto-logging for PyTorch Lightning
    mlflow.pytorch.autolog()

    # Create MLflow logger
    mlflow_logger = MLFlowLogger(
        experiment_name=experiment_name,
        tracking_uri=mlflow.get_tracking_uri()
    )

    # Model configuration
    model_config = {
        "hidden_size": 64,
        "lstm_layers": 2,
        "dropout": 0.1,
        "attention_head_size": 4,
        "learning_rate": 0.001,
        "reduce_on_plateau_patience": 3,
        "optimizer": "Adam"
    }

    # Create the model
    tft = TemporalFusionTransformer.from_dataset(
        training_dataset,
        hidden_size=model_config["hidden_size"],
        lstm_layers=model_config["lstm_layers"],
        dropout=model_config["dropout"],
        attention_head_size=model_config["attention_head_size"],
        output_size=1,  # Fixed for SMAPE loss
        loss=SMAPE(),
        learning_rate=model_config["learning_rate"],
        reduce_on_plateau_patience=model_config["reduce_on_plateau_patience"],
        optimizer=model_config["optimizer"],
    )

    # Log model configuration
    for key, value in model_config.items():
        mlflow.log_param(key, value)

    # Setup callbacks
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=10,
        verbose=True,
        mode='min'
    )

    checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        mode='min',
        save_top_k=1,
        filename='best_tft_model'
    )

    # Create trainer - FIXED: Removed deterministic=True
    trainer = pl.Trainer(
        max_epochs=50,
        accelerator='gpu',
        devices=1,
        callbacks=[early_stopping, checkpoint_callback],
        logger=mlflow_logger,
        enable_progress_bar=True,
        # Removed: deterministic=True
    )

    # Train the model
    trainer.fit(
        tft,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader
    )

    # Load best model
    best_model = TemporalFusionTransformer.load_from_checkpoint(
        checkpoint_callback.best_model_path
    )

    print("Model training completed!")

Starting TFT model training...


INFO: GPU available: True (cuda), used: True
INFO  [lightning.pytorch.utilities.rank_zero] GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO  [lightning.pytorch.utilities.rank_zero] TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO  [lightning.pytorch.utilities.rank_zero] HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO  [lightning.pytorch.accelerators.cuda] LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 681    | train
3  | presc

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

RuntimeError: upsample_linear1d_backward_out_cuda does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True)'. You can turn off determinism just for this operation, or you can use the 'warn_only=True' option, if that's acceptable for your application. You can also file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation.

In [50]:
print(f"Model type: {type(tft)}")
print(f"Is LightningModule: {isinstance(tft, pl.LightningModule)}")
print(f"Model MRO: {type(tft).__mro__}")

Model type: <class 'pytorch_forecasting.models.temporal_fusion_transformer._tft.TemporalFusionTransformer'>
Is LightningModule: False
Model MRO: (<class 'pytorch_forecasting.models.temporal_fusion_transformer._tft.TemporalFusionTransformer'>, <class 'pytorch_forecasting.models.base._base_model.BaseModelWithCovariates'>, <class 'pytorch_forecasting.models.base._base_model.BaseModel'>, <class 'pytorch_forecasting.utils._utils.InitialParameterRepresenterMixIn'>, <class 'lightning.pytorch.core.module.LightningModule'>, <class 'lightning.fabric.utilities.device_dtype_mixin._DeviceDtypeModuleMixin'>, <class 'lightning.pytorch.core.mixins.hparams_mixin.HyperparametersMixin'>, <class 'lightning.pytorch.core.hooks.ModelHooks'>, <class 'lightning.pytorch.core.hooks.DataHooks'>, <class 'lightning.pytorch.core.hooks.CheckpointHooks'>, <class 'torch.nn.modules.module.Module'>, <class 'pytorch_forecasting.utils._utils.TupleOutputMixIn'>, <class 'object'>)


# Model Evaluation

In [43]:
with mlflow.start_run(run_name="TFT_Model_Evaluation"):
    print("Starting model evaluation...")

    # Make predictions on validation set
    predictions = best_model.predict(val_dataloader, return_y=True)

    # Calculate metrics
    mae = MAE()(predictions.output, predictions.y).item()
    smape = SMAPE()(predictions.output, predictions.y).item()
    rmse = RMSE()(predictions.output, predictions.y).item()

    # Log evaluation metrics
    mlflow.log_metric("val_mae", mae)
    mlflow.log_metric("val_smape", smape)
    mlflow.log_metric("val_rmse", rmse)

    print(f"Validation MAE: {mae:.4f}")
    print(f"Validation SMAPE: {smape:.4f}")
    print(f"Validation RMSE: {rmse:.4f}")

    # Create prediction plots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))

    # Plot 1: Actual vs Predicted
    actual = predictions.y.cpu().numpy().flatten()
    predicted = predictions.output.cpu().numpy().flatten()

    axes[0, 0].scatter(actual, predicted, alpha=0.5)
    axes[0, 0].plot([actual.min(), actual.max()], [actual.min(), actual.max()], 'r--', lw=2)
    axes[0, 0].set_xlabel('Actual')
    axes[0, 0].set_ylabel('Predicted')
    axes[0, 0].set_title('Actual vs Predicted')

    # Plot 2: Residuals
    residuals = actual - predicted
    axes[0, 1].scatter(predicted, residuals, alpha=0.5)
    axes[0, 1].axhline(y=0, color='r', linestyle='--')
    axes[0, 1].set_xlabel('Predicted')
    axes[0, 1].set_ylabel('Residuals')
    axes[0, 1].set_title('Residual Plot')

    # Plot 3: Residuals histogram
    axes[1, 0].hist(residuals, bins=50, alpha=0.7)
    axes[1, 0].set_xlabel('Residuals')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].set_title('Residuals Distribution')

    # Plot 4: Time series example
    example_idx = 0
    example_prediction = predictions.output[example_idx].cpu().numpy()
    example_actual = predictions.y[example_idx].cpu().numpy()

    axes[1, 1].plot(range(len(example_actual)), example_actual, 'b-', label='Actual', linewidth=2)
    axes[1, 1].plot(range(len(example_prediction)), example_prediction, 'r--', label='Predicted', linewidth=2)
    axes[1, 1].set_xlabel('Time Steps')
    axes[1, 1].set_ylabel('Weekly Sales')
    axes[1, 1].set_title('Example Prediction')
    axes[1, 1].legend()

    plt.tight_layout()
    plt.savefig('tft_evaluation_plots.png', dpi=300, bbox_inches='tight')
    mlflow.log_artifact('tft_evaluation_plots.png')
    plt.show()

    print("Model evaluation completed!")



Starting model evaluation...


AttributeError: 'NoneType' object has no attribute 'predict'

In [31]:
pip install pytorch-lightning==1.9.5 pytorch-forecasting==1.0.0


Collecting pytorch-lightning==1.9.5
  Using cached pytorch_lightning-1.9.5-py3-none-any.whl.metadata (23 kB)
[31mERROR: Ignored the following versions that require a different python version: 0.10.2 Requires-Python >=3.8,<3.11; 0.10.3 Requires-Python >=3.8,<3.11; 1.0.0 Requires-Python >=3.8,<3.11[0m[31m
[0m[31mERROR: Could not find a version that satisfies the requirement pytorch-forecasting==1.0.0 (from versions: 0.1.0, 0.1.1, 0.1.2, 0.2.0, 0.2.1, 0.2.2, 0.2.3, 0.2.4, 0.3.0, 0.3.1, 0.4.0, 0.4.1, 0.5.0, 0.5.1, 0.5.2, 0.5.3, 0.6.0, 0.6.1, 0.7.0, 0.7.1, 0.8.0, 0.8.1, 0.8.2, 0.8.3, 0.8.4, 0.8.5, 0.9.0, 0.9.1, 0.9.2, 0.10.0, 0.10.1, 1.1.0, 1.1.1, 1.2.0, 1.3.0, 1.4.0)[0m[31m
[0m[31mERROR: No matching distribution found for pytorch-forecasting==1.0.0[0m[31m
[0m

In [None]:
# Hyperparameter Tuning (Optional)
with mlflow.start_run(run_name="TFT_Hyperparameter_Tuning"):
    print("Starting hyperparameter tuning...")

    # Define hyperparameter ranges
    study = optimize_hyperparameters(
        train_dataloader,
        val_dataloader,
        model_path="optuna_test",
        n_trials=10,  # Reduce for faster execution
        max_epochs=20,
        gradient_clip_val_range=(0.01, 1.0),
        hidden_size_range=(32, 128),
        lstm_layers_range=(1, 4),
        dropout_range=(0.1, 0.3),
        attention_head_size_range=(1, 8),
        learning_rate_range=(0.001, 0.1),
        use_learning_rate_finder=False,
    )

    # Log best parameters
    best_params = study.best_params
    for key, value in best_params.items():
        mlflow.log_param(f"best_{key}", value)

    mlflow.log_metric("best_trial_value", study.best_value)

    print(f"Best trial value: {study.best_value}")
    print(f"Best parameters: {best_params}")

# Final Model Training with Best Parameters
with mlflow.start_run(run_name="TFT_Final_Model_Training"):
    print("Training final model with best parameters...")

    # Create final model with best parameters (use default if tuning was skipped)
    final_tft = TemporalFusionTransformer.from_dataset(
        training_dataset,
        hidden_size=64,  # Use best params if available
        lstm_layers=2,
        dropout=0.1,
        attention_head_size=4,
        output_size=7,
        loss=SMAPE(),
        learning_rate=0.001,
        reduce_on_plateau_patience=3,
        optimizer="Adam",
    )

    # Create final trainer
    final_trainer = pl.Trainer(
        max_epochs=100,
        accelerator='cpu',
        callbacks=[early_stopping, checkpoint_callback],
        logger=mlflow_logger,
        enable_progress_bar=True,
        deterministic=True
    )

    # Train final model
    final_trainer.fit(
        final_tft,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader
    )

    # Load best final model
    final_best_model = TemporalFusionTransformer.load_from_checkpoint(
        checkpoint_callback.best_model_path
    )

    print("Final model training completed!")

# Create Pipeline and Save Model


In [None]:
with mlflow.start_run(run_name="TFT_Pipeline_Creation"):
    print("Creating TFT pipeline...")

    # Create a pipeline class for TFT
    class TFTPipeline:
        def __init__(self, model, dataset_config, preprocessing_params):
            self.model = model
            self.dataset_config = dataset_config
            self.preprocessing_params = preprocessing_params
            self.label_encoders = {}

        def preprocess(self, data):
            """Preprocess raw data for TFT"""
            # Apply the same preprocessing as training
            data = data.copy()

            # Convert Date to datetime
            data['Date'] = pd.to_datetime(data['Date'])

            # Engineer features
            data['Year'] = data['Date'].dt.year
            data['Month'] = data['Date'].dt.month
            data['Week'] = data['Date'].dt.week
            data['Day'] = data['Date'].dt.day
            data['DayOfWeek'] = data['Date'].dt.dayofweek
            data['Quarter'] = data['Date'].dt.quarter
            data['IsHoliday'] = data['IsHoliday'].astype(int)

            # Create time index
            data['time_idx'] = (data['Date'] - self.preprocessing_params['min_date']).dt.days

            # Handle categorical encoding
            if 'Type' in data.columns:
                data['Type_encoded'] = le_type.transform(data['Type'])

            # Fill missing values
            numeric_cols = data.select_dtypes(include=[np.number]).columns
            data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].median())

            return data

        def predict(self, data):
            """Make predictions on new data"""
            # Preprocess data
            processed_data = self.preprocess(data)

            # Create dataset for prediction
            prediction_dataset = TimeSeriesDataSet.from_dataset(
                self.dataset_config,
                processed_data,
                predict=True,
                stop_randomization=True
            )

            # Create dataloader
            prediction_dataloader = prediction_dataset.to_dataloader(
                train=False,
                batch_size=128,
                num_workers=0
            )

            # Make predictions
            predictions = self.model.predict(prediction_dataloader)

            return predictions

    # Create pipeline
    preprocessing_params = {
        'min_date': train_df['Date'].min(),
        'max_date': train_df['Date'].max(),
        'features': list(train_df.columns)
    }

    tft_pipeline = TFTPipeline(
        model=final_best_model,
        dataset_config=training_dataset,
        preprocessing_params=preprocessing_params
    )

    # Save pipeline
    pipeline_path = "tft_pipeline.pkl"
    joblib.dump(tft_pipeline, pipeline_path)

    # Log pipeline
    mlflow.log_artifact(pipeline_path)

    # Save additional components
    joblib.dump(le_type, "label_encoder_type.pkl")
    mlflow.log_artifact("label_encoder_type.pkl")

    print("Pipeline creation completed!")

# Model Registration


In [None]:
with mlflow.start_run(run_name="TFT_Model_Registration"):
    print("Registering model...")

    # Create model signature
    sample_input = train_df.head(100)
    sample_output = np.random.randn(100, max_prediction_length)
    signature = infer_signature(sample_input, sample_output)

    # Register model
    model_name = "TFT_Walmart_Sales_Forecast"

    mlflow.sklearn.log_model(
        sk_model=tft_pipeline,
        artifact_path="tft_model",
        signature=signature,
        registered_model_name=model_name
    )

    print(f"Model registered as '{model_name}'")

In [None]:
print("TFT experiment completed successfully!")
print("All artifacts and models have been logged to MLflow")
print("Check your MLflow UI to view the experiments and model registry")