In [None]:
# ISA 444 Final Project
# Daniel Woodward, Olivia Pisano

In [None]:
# ==========================================
# 1. Installation & Imports
# ==========================================
!pip install pandas numpy matplotlib seaborn statsforecast mlforecast neuralforecast lightgbm nixtla

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

# StatsForecast
from statsforecast import StatsForecast
from statsforecast.models import Naive, SeasonalNaive, AutoETS, AutoARIMA

# MLForecast
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from lightgbm import LGBMRegressor

# NeuralForecast
from neuralforecast import NeuralForecast
from neuralforecast.auto import AutoNBEATS, AutoNHITS

# TimeGPT
from nixtla import NixtlaClient




[notice] A new release of pip is available: 23.2.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
# ==========================================
# 2. Data Loading & Preprocessing
# ==========================================
import pandas as pd
import numpy as np

def load_and_prep_data():
    print("Loading data...")

    # 1. Load CSVs
    try:
        train = pd.read_csv('train.csv')
        features = pd.read_csv('features.csv')
        test = pd.read_csv('test.csv')
        stores = pd.read_csv('stores.csv')
    except FileNotFoundError as e:
        print(f"Error finding file: {e}")
        return None, None

    # 2. Merge Dataframes
    df = train.merge(stores, on='Store', how='left')
    df = df.merge(features, on=['Store', 'Date', 'IsHoliday'], how='left')

    # Merge Test data as well
    test_df = test.merge(stores, on='Store', how='left')
    test_df = test_df.merge(features, on=['Store', 'Date', 'IsHoliday'], how='left')

    # 3. Handle Missing Values

    md_cols = ['MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5']
    df[md_cols] = df[md_cols].fillna(0)
    test_df[md_cols] = test_df[md_cols].fillna(0)

    # Economic Indicators: Interpolate (Fill forward, then backward)
    exog_cols_to_fill = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
    for col in exog_cols_to_fill:
        df[col] = df.groupby('Store')[col].transform(lambda x: x.ffill().bfill())
        test_df[col] = test_df.groupby('Store')[col].transform(lambda x: x.ffill().bfill())

    # 4. Formatting for Time Series Models
    df['ds'] = pd.to_datetime(df['Date'])

    # DIAGNOSTIC 1: Check target before renaming
    if 'Weekly_Sales' not in df.columns:
        print(f"CRITICAL ERROR: 'Weekly_Sales' column not found. Available columns: {df.columns.tolist()}")
        return None, None
    # --------------------------------------------------

    df = df.rename(columns={'Weekly_Sales': 'y'})
    df['unique_id'] = df['Store'].astype(str) + '_' + df['Dept'].astype(str)

    test_df['ds'] = pd.to_datetime(test_df['Date'])
    test_df['unique_id'] = test_df['Store'].astype(str) + '_' + test_df['Dept'].astype(str)

    # 5. Downsample: Select Top 20 series by volume
    top_series = df.groupby('unique_id')['y'].sum().nlargest(20).index
    df_subset = df[df['unique_id'].isin(top_series)].reset_index(drop=True)
    test_subset = test_df[test_df['unique_id'].isin(top_series)].reset_index(drop=True)

    # 6. Feature Engineering (Encodings)
    df_subset['IsHoliday'] = df_subset['IsHoliday'].astype(int)
    df_subset['Type'] = df_subset['Type'].map({'A': 1, 'B': 2, 'C': 3})
    test_subset['IsHoliday'] = test_subset['IsHoliday'].astype(int)
    test_subset['Type'] = test_subset['Type'].map({'A': 1, 'B': 2, 'C': 3})

    # Ensure continuous date ranges for each unique_id in training data
    all_series_dfs_train = []
    for uid in df_subset['unique_id'].unique():
        series_df = df_subset[df_subset['unique_id'] == uid].copy()
        min_ds = series_df['ds'].min()
        max_ds = series_df['ds'].max()

        # This aligns the generated timeline with the Walmart Friday dates
        full_date_range = pd.date_range(start=min_ds, end=max_ds, freq='W-FRI')
        # ----------------------------------------------------

        full_series_df = pd.DataFrame({'ds': full_date_range, 'unique_id': uid})

        # Merge back with original series data
        series_df = full_series_df.merge(series_df, on=['unique_id', 'ds'], how='left')
        all_series_dfs_train.append(series_df)

    df_subset = pd.concat(all_series_dfs_train).reset_index(drop=True)

    # Final Safety Check: Ensure no NaNs remain in features used for ML
    exogenous_ml_cols = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment',
                         'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5',
                         'IsHoliday', 'Type', 'Size']

    # Fill any remaining NaNs (e.g., from reindexing or initial missing data) with 0
    df_subset['y'] = df_subset['y'].fillna(0)
    df_subset[exogenous_ml_cols] = df_subset[exogenous_ml_cols].fillna(0)
    test_subset[exogenous_ml_cols] = test_subset[exogenous_ml_cols].fillna(0)

    # Drop the original 'Date' column
    df_subset = df_subset.drop(columns=['Date'], errors='ignore')
    test_subset = test_subset.drop(columns=['Date'], errors='ignore')

    #  DIAGNOSTIC 2: Verify Sales Sum
    print("\n--- DIAGNOSTIC CHECK ---")
    if 'y' in df_subset.columns:
        total_y = df_subset['y'].sum()
        print(f"Total Sum of 'y' (Sales): {total_y:,.2f}")

        if total_y == 0:
            print("CRITICAL ERROR: 'y' still contains only 0s.")
            return None, None
        else:
            print("STATUS: Data verified. Sales column 'y' is populated.")
    else:
        print("CRITICAL ERROR: 'y' column disappeared.")
        return None, None
    print("------------------------\n")
    # ---------------------------------------

    print(f"Data Prepared. Modeling {df_subset['unique_id'].nunique()} series.")
    return df_subset, test_subset

# EXECUTE THE FUNCTION
train_subset, test_subset = load_and_prep_data()

# Verify results
if train_subset is not None and test_subset is not None:
    print("\nSuccess! First 5 rows of testing data:")
    print(test_subset.head())
else:
    print("Error: Data loading and preparation failed.")

Loading data...

--- DIAGNOSTIC CHECK ---
Total Sum of 'y' (Sales): 402,568,823.37
STATUS: Data verified. Sales column 'y' is populated.
------------------------

Data Prepared. Modeling 20 series.

Success! First 5 rows of testing data:
   Store  Dept  IsHoliday  Type    Size  Temperature  Fuel_Price  MarkDown1  \
0      1    92          0     1  151315        55.32       3.386    6766.44   
1      1    92          0     1  151315        61.24       3.314   11421.32   
2      1    92          0     1  151315        52.92       3.252    9696.28   
3      1    92          1     1  151315        56.23       3.211     883.59   
4      1    92          0     1  151315        52.34       3.207    2460.03   

   MarkDown2  MarkDown3  MarkDown4  MarkDown5         CPI  Unemployment  \
0    5147.70      50.82    3639.90    2737.42  223.462779         6.573   
1    3370.89      40.28    4646.79    6154.16  223.481307         6.573   
2     292.10     103.78    1133.15    6612.69  223.512911     

In [None]:
# ==========================================
# 3. TimeGPT Pipeline
# ==========================================
from nixtla import NixtlaClient
import pandas as pd

def run_timegpt_pipeline(df_train, df_test, horizon, n_windows):
    print("\n--- Running TimeGPT ---")

    # 1. PASTE YOUR API KEY HERE
    # ---------------------------------------------------------
    my_api_key = 'nixak-eX92lSIatoWxvaVDNG4v7IFJQ5cva8FrMRaNuADzsiAlt007Tm8ejGc7VUU0MJEt2YZtsa2CmsfcbYPg'
    # ---------------------------------------------------------

    try:
        # Initialize the client
        nixtla_client = NixtlaClient(api_key=my_api_key)

        # Validates the key before running heavy tasks
        nixtla_client.validate_api_key()

        # A. Cross-Validation
        # We use 'W-FRI' because the data ends on Fridays
        print("  Running TimeGPT Cross-Validation...")
        timegpt_cv = nixtla_client.cross_validation(
            df=df_train,
            h=horizon,
            n_windows=n_windows,
            step_size=horizon,
            freq='W-FRI'
        )

        # B. Future Forecast
        # Prepare future exogenous variables (Test set without the target 'y')
        future_exog = df_test.drop(columns=['Date', 'Weekly_Sales', 'y'], errors='ignore')

        # Calculate horizon based on test set length (should be 39 weeks for this dataset)
        test_horizon = df_test.groupby('unique_id')['ds'].count().max()

        print(f"  Running TimeGPT Future Forecast ({test_horizon} weeks)...")
        timegpt_fcst = nixtla_client.forecast(
            df=df_train,
            h=test_horizon,
            X_df=future_exog,
            freq='W-FRI'
        )

        return timegpt_cv, timegpt_fcst

    except Exception as e:
        print(f"Error running TimeGPT: {e}")
        print("TIP: Check if your API key is pasted correctly inside the quotes.")
        return pd.DataFrame(), pd.DataFrame()

# EXECUTE THE FUNCTION
if 'train_subset' in globals() and 'test_subset' in globals():
    # Run pipeline
    tgpt_cv, tgpt_fcst = run_timegpt_pipeline(train_subset, test_subset, horizon=4, n_windows=5)

    # Print results if successful
    if not tgpt_cv.empty:
        print("\nSuccess! TimeGPT Cross-Validation Head:")
        print(tgpt_cv.head())


--- Running TimeGPT ---


INFO:nixtla.nixtla_client:Happy Forecasting! :)
INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Querying model metadata...


  Running TimeGPT Cross-Validation...


INFO:nixtla.nixtla_client:Using future exogenous features: ['Store', 'Dept', 'IsHoliday', 'Type', 'Size', 'Temperature', 'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5', 'CPI', 'Unemployment']
INFO:nixtla.nixtla_client:Calling Cross Validation Endpoint...
INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Using future exogenous features: ['Store', 'Dept', 'IsHoliday', 'Type', 'Size', 'Temperature', 'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5', 'CPI', 'Unemployment']
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


  Running TimeGPT Future Forecast (39 weeks)...

Success! TimeGPT Cross-Validation Head:
  unique_id         ds     cutoff          y     TimeGPT
0     10_72 2012-06-15 2012-06-08  105499.39  115114.920
1     10_72 2012-06-22 2012-06-08  107949.41  181358.220
2     10_72 2012-06-29 2012-06-08   96579.10  133881.170
3     10_72 2012-07-06 2012-06-08  100464.25  148005.270
4     10_72 2012-07-13 2012-07-06   92923.05  113516.984


In [None]:
# ==========================================
# 4. Standard Models Pipeline
# ==========================================
import pandas as pd
import numpy as np
from statsforecast import StatsForecast
from statsforecast.models import Naive, SeasonalNaive, AutoETS, AutoARIMA
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from lightgbm import LGBMRegressor
from neuralforecast import NeuralForecast
from neuralforecast.auto import AutoNBEATS, AutoNHITS

# Set pandas to show all columns (so you can see all models)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

def run_standard_pipeline(df):
    HORIZON = 4
    N_WINDOWS = 5
    FREQ = 'W-FRI'

    # A. StatsForecast (Naive, ARIMA, ETS)
    print(f"[{'StatsForecast':<15}] Training: Naive, SeasonalNaive, AutoETS, AutoARIMA")
    sf = StatsForecast(
        models=[
            Naive(),
            SeasonalNaive(season_length=52),
            AutoETS(season_length=52),
            AutoARIMA(season_length=52)
        ],
        freq=FREQ,
        n_jobs=-1
    )
    stats_cv = sf.cross_validation(
        df=df[['unique_id', 'ds', 'y']],
        h=HORIZON, step_size=HORIZON, n_windows=N_WINDOWS
    )

    # B. MLForecast (LightGBM)
    print(f"[{'MLForecast':<15}] Training: LightGBM")
    lgbm = LGBMRegressor(verbosity=-1, random_state=42)

    # Prepare features
    mlforecast_cols = [
        'unique_id', 'ds', 'y', 'Type', 'Size',
        'Temperature', 'Fuel_Price', 'CPI', 'Unemployment',
        'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5',
        'IsHoliday'
    ]
    df_for_mlforecast = df[mlforecast_cols].copy()

    mlf = MLForecast(
        models=[lgbm], freq=FREQ, lags=[1, 4, 52],
        target_transforms=[Differences([1])],
        date_features=['month', 'week'], num_threads=4
    )

    ml_cv = mlf.cross_validation(
        df=df_for_mlforecast,
        h=HORIZON, step_size=HORIZON, n_windows=N_WINDOWS,
        static_features=['Type', 'Size']
    )
    ml_cv = ml_cv.rename(columns={'LGBMRegressor': 'LightGBM'})

    # C. NeuralForecast (AutoNBEATS, AutoNHITS)
    print(f"[{'NeuralForecast':<15}] Training: AutoNBEATS, AutoNHITS")

    nf = NeuralForecast(
        models=[
            AutoNBEATS(h=HORIZON, num_samples=2),
            AutoNHITS(h=HORIZON, num_samples=2)
        ],
        freq=FREQ
    )

    neural_cv = nf.cross_validation(
        df=df[['unique_id', 'ds', 'y']],
        val_size=HORIZON,
        n_windows=N_WINDOWS,
        step_size=HORIZON
    )

    # D. MERGING ALL RESULTS
    print("Merging all model predictions...")

    # Start with StatsForecast
    all_results = stats_cv.copy()

    # Merge MLForecast (dropping 'y' to avoid duplication)
    if ml_cv is not None:
        all_results = all_results.merge(
            ml_cv.drop(columns=['y'], errors='ignore'),
            on=['unique_id', 'ds', 'cutoff'],
            how='left'
        )

    # Merge NeuralForecast (dropping 'y' to avoid duplication)
    if neural_cv is not None:
        all_results = all_results.merge(
            neural_cv.drop(columns=['y'], errors='ignore'),
            on=['unique_id', 'ds', 'cutoff'],
            how='left'
        )

    return all_results

# EXECUTE
if 'train_subset' in globals():
    # Filter for active data
    active_ids = train_subset.groupby('unique_id')['y'].sum()
    active_ids = active_ids[active_ids > 0].index.tolist()

    if len(active_ids) > 0:
        train_subset_clean = train_subset[train_subset['unique_id'].isin(active_ids)].copy()

        # Run Pipeline
        combined_results = run_standard_pipeline(train_subset_clean)

        print("\n" + "="*50)
        print("SUCCESS! PREDICTIONS FROM ALL MODELS:")
        print("="*50)
        print(combined_results.head())

[36m(_train_tune pid=32564)[0m c:\Users\woodw\AppData\Local\Programs\Python\Python312\Lib\site-packages\ray\tune\integration\pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=32564)[0m Seed set to 6
[36m(_train_tune pid=32564)[0m GPU available: False, used: False
[36m(_train_tune pid=32564)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=32564)[0m 
[36m(_train_tune pid=32564)[0m   | Name         | Type          | Params | Mode  | FLOPs
[36m(_train_tune pid=32564)[0m ---------------------------------------------------------------
[36m(_train_tune pid=32564)[0m 0 | loss         | MAE           | 0      | train | 0    
[36m(_train_tune pid=32564)[0m 1 | padder_train | ConstantPad1d | 0      | train | 0    
[36m(_train_tune pid=32564)[0m 2 | scaler       | TemporalNorm  | 0      | train | 0    
[36m(

Epoch 2:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=2.83e+16, train_loss_epoch=2.83e+16]        
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 71.41it/s, v_num=0, train_loss_step=3.83e+15, train_loss_epoch=3.83e+15]
Epoch 3:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=3.83e+15, train_loss_epoch=3.83e+15]        
Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 62.46it/s, v_num=0, train_loss_step=5.6e+15, train_loss_epoch=5.6e+15]  
Epoch 4:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=5.6e+15, train_loss_epoch=5.6e+15]        
Epoch 11:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=4.45e+13, train_loss_epoch=4.45e+13]        
Epoch 18:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=8.18e+14, train_loss_epoch=8.18e+14]        
Epoch 19:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=1.12e+16, train_loss_epoch=1.12e+16]        
Epoch 26:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_

[36m(_train_tune pid=32564)[0m `Trainer.fit` stopped: `max_steps=1400.0` reached.
[36m(_train_tune pid=51864)[0m c:\Users\woodw\AppData\Local\Programs\Python\Python312\Lib\site-packages\ray\tune\integration\pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=51864)[0m Seed set to 15
[36m(_train_tune pid=51864)[0m GPU available: False, used: False
[36m(_train_tune pid=51864)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=51864)[0m 
[36m(_train_tune pid=51864)[0m   | Name         | Type          | Params | Mode  | FLOPs
[36m(_train_tune pid=51864)[0m ---------------------------------------------------------------
[36m(_train_tune pid=51864)[0m 0 | loss         | MAE           | 0      | train | 0    
[36m(_train_tune pid=51864)[0m 1 | padder_train | ConstantPad1d | 0      | train | 0    
[36m(_train

Epoch 0:   0%|          | 0/1 [00:00<?, ?it/s]                             
Epoch 1:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=2.070, train_loss_epoch=2.070]        
Epoch 2:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=2.120, train_loss_epoch=2.120]        
Epoch 3:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=2.210, train_loss_epoch=2.210]        
Epoch 5:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=1.680, train_loss_epoch=1.680]        
Epoch 7:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=1.430, train_loss_epoch=1.430]        
Epoch 9:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=1.430, train_loss_epoch=1.430]        
Epoch 11:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=1.250, train_loss_epoch=1.250]        
Epoch 13:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=1.090, train_loss_epoch=1.090]        
Epoch 15:   0%|          | 0/1 [00

2025-12-04 14:39:39,326	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/woodw/ray_results/_train_tune_2025-12-04_14-38-41' in 0.0071s.
Seed set to 15
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

  | Name         | Type          | Params | Mode  | FLOPs
---------------------------------------------------------------
0 | loss         | MAE           | 0      | eval  | 0    
1 | padder_train | ConstantPad1d | 0      | train | 0    
2 | scaler       | TemporalNorm  | 0      | train | 0    
3 | blocks       | ModuleList    | 2.4 M  | train | 0    
---------------------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.720     Total estimated model params size (MB)
33        Modules in train mode
1         Modules in eval mode
0         Total Flops
[36m(_train_tune pid=51864)[0m `Trainer.fit` stopped: `max_steps=500.0` reached.


Epoch 499:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0, train_loss_step=0.0298, train_loss_epoch=0.0298, valid_loss=1.17e+4]        
Epoch 499: 100%|██████████| 1/1 [00:00<00:00, 19.19it/s, v_num=0, train_loss_step=0.0263, train_loss_epoch=0.0298, valid_loss=1.17e+4]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 249.93it/s][A
Epoch 499: 100%|██████████| 1/1 [00:00<00:00, 15.97it/s, v_num=0, train_loss_step=0.0263, train_loss_epoch=0.0263, valid_loss=1.18e+4]
Epoch 499: 100%|██████████| 1/1 [00:00<00:00, 15.75it/s, v_num=4, train_loss_step=0.0246, train_loss_epoch=0.0246]

`Trainer.fit` stopped: `max_steps=500.0` reached.


Epoch 499: 100%|██████████| 1/1 [00:00<00:00, 15.27it/s, v_num=4, train_loss_step=0.0246, train_loss_epoch=0.0246]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 153.48it/s]
Merging all model predictions...

SUCCESS! PREDICTIONS FROM ALL MODELS:
  unique_id         ds     cutoff          y      Naive  SeasonalNaive        AutoETS      AutoARIMA       LightGBM     AutoNBEATS      AutoNHITS
0     10_72 2012-06-15 2012-06-08  105499.39  125434.23      127450.66  125453.077037  135382.011204  145607.035537  131838.000000  129803.835938
1     10_72 2012-06-22 2012-06-08  107949.41  125434.23      117948.54  114657.507531  117358.605031  141443.112113  126753.062500  115893.484375
2     10_72 2012-06-29 2012-06-08   96579.10  125434.23      114398.47  116075.626160  109980.247169  148534.664866  100515.156250  109323.000000
3     10_72 2012-07-06 2012-06-08  100464.25  125434.23      108519.93  109074.835739  102381.803899  149283.028227  109166.476562  110246.250000
4     10_72 2012-07-13 2012-07-06   92923.05  100464.25      115004.83  110645.290441  107966.684059  105233.901058  111366.52

In [None]:
# ==========================================
# 5. Generate Testing Outputs 
# ==========================================
import pandas as pd
from statsforecast import StatsForecast
from statsforecast.models import Naive, SeasonalNaive, AutoETS, AutoARIMA
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from lightgbm import LGBMRegressor
from neuralforecast import NeuralForecast
from neuralforecast.auto import AutoNBEATS, AutoNHITS
from neuralforecast.losses.pytorch import MAE 

def generate_future_forecasts(df_train, df_test):
    print("\n--- Generating Future Forecasts (Testing Outputs) ---")

    # 1. Configuration
    FREQ = 'W-FRI'
    # Calculate horizon based on the test set
    HORIZON = df_test.groupby('unique_id')['ds'].count().max()
    print(f"Forecasting horizon: {HORIZON} weeks")

    # 2. StatsForecast
    print("1. Generating StatsForecast predictions...")
    sf = StatsForecast(
        models=[
            Naive(),
            SeasonalNaive(season_length=52),
            AutoETS(season_length=52),
            AutoARIMA(season_length=52)
        ],
        freq=FREQ,
        n_jobs=-1
    )
    sf_fut = sf.forecast(df=df_train[['unique_id', 'ds', 'y']], h=HORIZON)
    sf_fut = sf_fut.reset_index()

    # 3. MLForecast (LightGBM)
    print("2. Generating MLForecast (LightGBM) predictions...")
    lgbm = LGBMRegressor(verbosity=-1, random_state=42)

    mlf = MLForecast(
        models=[lgbm],
        freq=FREQ,
        lags=[1, 4, 52],
        target_transforms=[Differences([1])],
        date_features=['month', 'week'],
        num_threads=4
    )

    # Define columns
    ml_train_cols = ['unique_id', 'ds', 'y', 'Type', 'Size', 'Temperature',
                     'Fuel_Price', 'CPI', 'Unemployment', 'IsHoliday',
                     'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5']

    # Fit (Defining Type and Size as static)
    mlf.fit(df=df_train[ml_train_cols], static_features=['Type', 'Size'])

    # The model already knows 'Type' and 'Size' from training.
    cols_to_drop = ['y', 'Weekly_Sales', 'Type', 'Size']
    X_df_future = df_test.drop(columns=cols_to_drop, errors='ignore')

    ml_fut = mlf.predict(h=HORIZON, X_df=X_df_future)
    ml_fut = ml_fut.rename(columns={'LGBMRegressor': 'LightGBM'})

    # 4. NeuralForecast (Auto Models Stabilized)
    print("3. Generating NeuralForecast predictions...")

    # STABILITY SETTINGS:
    nf = NeuralForecast(
        models=[
            AutoNBEATS(
                h=HORIZON,
                loss=MAE(),
                num_samples=5
            ),
            AutoNHITS(
                h=HORIZON,
                loss=MAE(),
                num_samples=5
            )
        ],
        freq=FREQ
    )

    nf.fit(df=df_train[['unique_id', 'ds', 'y']])
    nf_fut = nf.predict(futr_df=df_test[['unique_id', 'ds']])

    # 5. Merge All Results

    print("Merging future forecasts...")
    final_fut = sf_fut.copy()
    final_fut = final_fut.merge(ml_fut, on=['unique_id', 'ds'], how='left')
    final_fut = final_fut.merge(nf_fut, on=['unique_id', 'ds'], how='left')

    return final_fut

# EXECUTE
if 'train_subset' in globals() and 'test_subset' in globals():

    # Filter for active data to ensure stability
    active_ids = train_subset.groupby('unique_id')['y'].sum()
    active_ids = active_ids[active_ids > 0].index.tolist()
    train_clean = train_subset[train_subset['unique_id'].isin(active_ids)].copy()
    test_clean = test_subset[test_subset['unique_id'].isin(active_ids)].copy()

    # RUN FUNCTION
    future_forecasts = generate_future_forecasts(train_clean, test_clean)

    # Checks if TimeGPT was run in Step 3 and merges it if it exists
    if 'tgpt_fcst' in globals() and not tgpt_fcst.empty:
        print("Merging TimeGPT results...")
        cols_to_merge = ['unique_id', 'ds', 'TimeGPT']
        if all(col in tgpt_fcst.columns for col in cols_to_merge):
             future_forecasts = future_forecasts.merge(
                 tgpt_fcst[cols_to_merge],
                 on=['unique_id', 'ds'],
                 how='left'
             )

    print("\nSuccess! Future forecasts generated.")
    print(future_forecasts.head())

else:
    print("Error: 'train_subset' or 'test_subset' not found. Please run Step 2 (Data Loading) first.")

TypeError: 'int' object is not subscriptable

In [None]:
# ==========================================
# 6. Evaluation Metrics
# ==========================================
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

def calculate_metrics(cv_df):
    # Identify model columns (exclude ID/Date/Target columns)
    models = [c for c in cv_df.columns if c not in ['unique_id', 'ds', 'cutoff', 'y']]
    results = []

    for model in models:
        y_true = cv_df['y']
        y_pred = cv_df[model]

        # Drop NaNs for metric calculation (in case of alignment issues)
        valid_mask = ~np.isnan(y_pred)
        y_true_clean = y_true[valid_mask]
        y_pred_clean = y_pred[valid_mask]

        if len(y_true_clean) == 0:
            continue

        # Rubric Metrics
        me = np.mean(y_true_clean - y_pred_clean)
        mae = mean_absolute_error(y_true_clean, y_pred_clean)
        rmse = np.sqrt(mean_squared_error(y_true_clean, y_pred_clean))

        # MAPE (handling zeros)
        mask = y_true_clean != 0
        if mask.any():
            mape = np.mean(np.abs((y_true_clean[mask] - y_pred_clean[mask]) / y_true_clean[mask])) * 100
        else:
            mape = np.nan

        results.append({'Model': model, 'ME': me, 'MAE': mae, 'RMSE': rmse, 'MAPE': mape})

    return pd.DataFrame(results).sort_values(by='RMSE')

def count_winners(cv_df):
    models = [c for c in cv_df.columns if c not in ['unique_id', 'ds', 'cutoff', 'y']]
    errors = cv_df.copy()

    # Calculate absolute error for every row
    for m in models:
        errors[m] = (errors['y'] - errors[m]).abs()

    # Find model with lowest MAE per series
    series_mae = errors.groupby('unique_id')[models].mean()
    series_mae['Winner'] = series_mae.idxmin(axis=1)

    return series_mae['Winner'].value_counts().reset_index()

# EXECUTE THE FUNCTIONS
# We look for 'combined_results' which comes from Step 4 (The Cross-Validation Step)
if 'combined_results' in globals():
    print("\n--- Calculating Global Accuracy Metrics ---")
    metrics_df = calculate_metrics(combined_results)
    print(metrics_df)

    print("\n--- Model Leaderboard (Wins per Series) ---")
    winners_df = count_winners(combined_results)
    print(winners_df)


--- Calculating Global Accuracy Metrics ---
           Model           ME           MAE          RMSE      MAPE
5     AutoNBEATS  -850.619566   7107.947170   9782.697450  5.175090
2        AutoETS  -857.830317   7845.232619  10662.438821  5.672372
3      AutoARIMA -2206.756063   7418.543313  11411.103296  5.473035
6      AutoNHITS -1811.165523   9425.748159  12897.903357  6.778289
4       LightGBM -5034.945115  10399.368105  14645.473281  7.725298
1  SeasonalNaive  1012.752825  11298.365025  15861.602483  8.184313
0          Naive -2025.840950  12065.408550  15934.427679  8.689676

--- Model Leaderboard (Wins per Series) ---
          Winner  count
0     AutoNBEATS      9
1      AutoARIMA      4
2        AutoETS      3
3  SeasonalNaive      3
4      AutoNHITS      1


In [None]:
# ==========================================
# 7. Main Execution 
# ==========================================
import matplotlib.pyplot as plt
import pandas as pd

# Check if functions from previous cells are defined
required_funcs = ['load_and_prep_data', 'run_standard_pipeline', 'run_timegpt_pipeline',
                  'generate_future_forecasts', 'calculate_metrics', 'count_winners']

if not all(func in globals() for func in required_funcs):
    print("Error: Required functions are missing.")
    print("Please make sure you have run ALL previous cells (Steps 2-6) before running this one.")
else:
    print("Starting Main Execution Pipeline...\n")

    # 1. Load Data
    train_subset, test_subset = load_and_prep_data()

    if train_subset is not None:
        # 2. Run Standard Model
        final_eval = run_standard_pipeline(train_subset)

        # 3. Run TimeGPT (Remote)
        tgpt_cv, tgpt_fcst = run_timegpt_pipeline(train_subset, test_subset, horizon=4, n_windows=5)

        # 4. Merge TimeGPT into Evaluation Results
        print("Merging TimeGPT results...")
        merge_keys = ['unique_id', 'ds', 'cutoff']

        if not tgpt_cv.empty and 'TimeGPT' in tgpt_cv.columns:
            # Drop 'y' from TimeGPT CV to avoid duplicates if it exists
            tgpt_clean = tgpt_cv.drop(columns=['y'], errors='ignore')
            final_eval = final_eval.merge(tgpt_clean, on=merge_keys, how='left')

        # 5. Generate Future Forecasts
        future_forecasts = generate_future_forecasts(train_subset, test_subset)

        # Merge TimeGPT Future Forecasts if available
        if not tgpt_fcst.empty and 'TimeGPT' in tgpt_fcst.columns:
             future_forecasts = future_forecasts.merge(
                 tgpt_fcst[['unique_id', 'ds', 'TimeGPT']],
                 on=['unique_id', 'ds'],
                 how='left'
             )

        # 6. Metrics & Winners
        metrics_df = calculate_metrics(final_eval)
        winners_df = count_winners(final_eval)

        print("\n--- Final Metrics ---")
        print(metrics_df)
        print("\n--- Model Winners (Count by Series) ---")
        print(winners_df)

        # 7. Save CSVs
        final_eval.to_csv('final_evaluation_output.csv', index=False)
        metrics_df.to_csv('final_metrics_summary.csv', index=False)
        future_forecasts.to_csv('testing_outputs.csv', index=False)
        print("\n Files Saved: final_evaluation_output.csv, final_metrics_summary.csv, testing_outputs.csv")

        # 8. Plotting
        # Check if we have data to plot
        if not final_eval.empty:
            u_id = final_eval['unique_id'].unique()[0]
            subset = final_eval[final_eval['unique_id'] == u_id]

            plt.figure(figsize=(14, 6))

            # Plot Actuals
            if 'y' in subset.columns:
                plt.plot(subset['ds'], subset['y'], label='Actual', color='black', linewidth=2)

            # Plot models dynamically
            plot_models = [c for c in subset.columns if c not in ['unique_id', 'ds', 'cutoff', 'y']]
            for m in plot_models:
                # Plot only if column is numeric
                if pd.api.types.is_numeric_dtype(subset[m]):
                    plt.plot(subset['ds'], subset[m], label=m, alpha=0.7)

            plt.title(f"Forecast Models vs Actual: {u_id}")
            plt.legend()
            plt.show()

Error: Required functions are missing.
Please make sure you have run ALL previous cells (Steps 2-6) before running this one.
