# Advanced Time Series Forecasting for Retail: A Comparative Study

# Author: Amina Abacon

# Part 5: Neural Networks WITH Exogenous Variables

In [1]:
# Import data and packages

import pandas as pd
import numpy as np
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

from neuralforecast import NeuralForecast
from neuralforecast.models import PatchTST
from neuralforecast.losses.pytorch import MAE
from neuralforecast.models import (
    NHITS,
    TFT,           
    TimesNet,     
)
import torch

print("=" * 70)
print("PART 5: NEURAL NETWORKS WITH EXOGENOUS VARIABLES")
print("=" * 70)

# ============================================================================
# STEP 1: Load Data with Features
# ============================================================================
print("\n[1] Loading Data with Features...")
print("-" * 70)

df = pd.read_csv('data/processed/train_with_features.csv', parse_dates=['ds'])

print(f"✓ Loaded data: {df.shape}")
print(f"Time Series: {df['unique_id'].nunique()}")
print(f"Date Range: {df['ds'].min().date()} to {df['ds'].max().date()}")

# Load feature configuration
import json
with open('data/processed/feature_config.json', 'r') as f:
    feature_config = json.load(f)

EXOG_FEATURES = feature_config['exogenous_features']
print(f"\nExogenous Features ({len(EXOG_FEATURES)}):")
for i, feat in enumerate(EXOG_FEATURES, 1):
    print(f"  {i}. {feat}")

PART 5: NEURAL NETWORKS WITH EXOGENOUS VARIABLES

[1] Loading Data with Features...
----------------------------------------------------------------------
✓ Loaded data: (109650, 17)
Time Series: 150
Date Range: 2015-08-16 to 2017-08-15

Exogenous Features (9):
  1. has_promotion
  2. dcoilwtico
  3. oil_ma7
  4. is_holiday
  5. is_weekend
  6. month
  7. dayofweek
  8. is_month_start
  9. is_month_end


In [2]:
# ============================================================================
# STEP 2: Train/Test Split
# ============================================================================
print("\n[2] Creating Train/Test Split...")
print("-" * 70)

test_days = 7
max_date = df['ds'].max()
test_start = max_date - pd.Timedelta(days=test_days - 1)

train_df = df[df['ds'] < test_start].copy()
test_df = df[df['ds'] >= test_start].copy()

print(f"Train Set: {train_df['ds'].min().date()} to {train_df['ds'].max().date()}")
print(f"  Records: {len(train_df):,}")

print(f"\nTest Set: {test_df['ds'].min().date()} to {test_df['ds'].max().date()}")
print(f"  Records: {len(test_df):,}")


[2] Creating Train/Test Split...
----------------------------------------------------------------------
Train Set: 2015-08-16 to 2017-08-08
  Records: 108,600

Test Set: 2017-08-09 to 2017-08-15
  Records: 1,050


In [3]:
# ============================================================================
# STEP 3: Prepare Data with Exogenous Features
# ============================================================================
print("\n[3] Preparing Data with Exogenous Features...")
print("-" * 70)

# Training data with exogenous
train_cols = ['unique_id', 'ds', 'y'] + EXOG_FEATURES
neural_train = train_df[train_cols].copy()
neural_train = neural_train.sort_values(['unique_id', 'ds']).reset_index(drop=True)

# Future exogenous (test period features)
future_exog = test_df[['unique_id', 'ds'] + EXOG_FEATURES].copy()
future_exog = future_exog.sort_values(['unique_id', 'ds']).reset_index(drop=True)

print(f"✓ Training data: {neural_train.shape}")
print(f"✓ Future exogenous: {future_exog.shape}")

# Check for missing values
missing_train = neural_train[EXOG_FEATURES].isnull().sum().sum()
missing_future = future_exog[EXOG_FEATURES].isnull().sum().sum()

if missing_train > 0 or missing_future > 0:
    print(f"\n  Missing values: train={missing_train}, future={missing_future}")
else:
    print("✓ No missing values in exogenous features")


[3] Preparing Data with Exogenous Features...
----------------------------------------------------------------------
✓ Training data: (108600, 12)
✓ Future exogenous: (1050, 11)
✓ No missing values in exogenous features


In [4]:
# ============================================================================
# STEP 4: Initialize Models with Exogenous Features
# ============================================================================

# Define forecast configuration
forecast_horizon = 7  # 7-day forecast
input_size = 28       # Use 28 days of history

print("[4] Initializing Models with Exogenous Features...")
print("-" * 70)
print("\nModel Configuration:")
print(f"  Forecast Horizon: {forecast_horizon} days")
print(f"  Input Size: {input_size} days")
print(f"  Exogenous Features: {len(EXOG_FEATURES)}")
print(f"  Loss Function: MAE")
print(f"  Training Steps: 500")

# Use NHITS with different configurations
nhits_base = NHITS(
    h=forecast_horizon,
    input_size=input_size,
    futr_exog_list=EXOG_FEATURES,
    max_steps=500,
    early_stop_patience_steps=5,
    scaler_type='robust',
    random_seed=42,
    alias='NHITS_base'
)

nhits_deep = NHITS(
    h=forecast_horizon,
    input_size=input_size,
    futr_exog_list=EXOG_FEATURES,
    max_steps=500,
    early_stop_patience_steps=5,
    scaler_type='robust',
    random_seed=42,
    n_pool_kernel_size=[2, 2, 2],
    alias='NHITS_deep'
)

models = [nhits_base, nhits_deep]

Seed set to 42


[4] Initializing Models with Exogenous Features...
----------------------------------------------------------------------

Model Configuration:
  Forecast Horizon: 7 days
  Input Size: 28 days
  Exogenous Features: 9
  Loss Function: MAE
  Training Steps: 500


Seed set to 42


In [5]:
# ============================================================================
# STEP 5: Train Models
# ============================================================================
print("\n[5] Training Models with Exogenous Variables...")
print("-" * 70)
print(" Training 2 models on 150 time series with 9 features...")
print("   Estimated time: 8-12 minutes (CPU) or 3-5 minutes (GPU)\n")

nf = NeuralForecast(models=models, freq='D')

# Train with validation set
nf.fit(df=neural_train, val_size=7)

print("\n✓ Training complete!")


[5] Training Models with Exogenous Variables...
----------------------------------------------------------------------
 Training 2 models on 150 time series with 9 features...
   Estimated time: 8-12 minutes (CPU) or 3-5 minutes (GPU)



GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.8 M  | train
-------------------------------------------------------
2.8 M     Trainable params
0         Non-trainable params
2.8 M     Total params
11.080    Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=500` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.7 M  | train
-------------------------------------------------------
2.7 M     Trainable params
0         Non-trainable params
2.7 M     Total params
10.738    Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=500` reached.



✓ Training complete!


In [6]:
# ============================================================================
# STEP 6: Generate Forecasts with Future Exogenous
# ============================================================================
print("\n[6] Generating Forecasts with Future Exogenous...")
print("-" * 70)

# CRITICAL: Pass future exogenous features
forecasts = nf.predict(futr_df=future_exog)

print(f"✓ Generated forecasts: {forecasts.shape}")
print(f"\nForecast columns: {forecasts.columns.tolist()}")
print(f"\nSample forecasts:")
print(forecasts.head(10))

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.



[6] Generating Forecasts with Future Exogenous...
----------------------------------------------------------------------


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

✓ Generated forecasts: (1050, 4)

Forecast columns: ['unique_id', 'ds', 'NHITS_base', 'NHITS_deep']

Sample forecasts:
               unique_id         ds   NHITS_base   NHITS_deep
0     store_10_BEVERAGES 2017-08-09  1111.021606  1154.508301
1     store_10_BEVERAGES 2017-08-10  1253.534180  1176.073486
2     store_10_BEVERAGES 2017-08-11  1324.482300  1355.609619
3     store_10_BEVERAGES 2017-08-12  1562.751343  1566.406372
4     store_10_BEVERAGES 2017-08-13  1486.057251  1478.462280
5     store_10_BEVERAGES 2017-08-14  1224.601929  1246.021118
6     store_10_BEVERAGES 2017-08-15  1464.591431  1400.602905
7  store_10_BREAD/BAKERY 2017-08-09    95.368370   102.760956
8  store_10_BREAD/BAKERY 2017-08-10    93.047325    96.740891
9  store_10_BREAD/BAKERY 2017-08-11    87.632378    91.555168


In [11]:
# ============================================================================
# STEP 7: Evaluate Performance
# ============================================================================
print("\n[7] Evaluating Model Performance...")
print("-" * 70)

# Merge forecasts with actuals
forecasts_df = forecasts.reset_index()

eval_df = test_df[['unique_id', 'ds', 'y']].merge(
    forecasts_df,
    on=['unique_id', 'ds'],
    how='inner'
)

print(f"✓ Merged evaluation data: {eval_df.shape}")

def calculate_metrics(actual, predicted):
    """Calculate forecasting error metrics"""
    actual = np.array(actual)
    predicted = np.array(predicted)
    
    mask = ~(np.isnan(actual) | np.isnan(predicted))
    actual = actual[mask]
    predicted = predicted[mask]
    
    if len(actual) == 0:
        return {'MAE': np.nan, 'RMSE': np.nan, 'MAPE': np.nan}
    
    mae = np.mean(np.abs(actual - predicted))
    rmse = np.sqrt(np.mean((actual - predicted) ** 2))
    mape = np.mean(np.abs((actual - predicted) / (actual + 1e-10))) * 100
    
    return {'MAE': mae, 'RMSE': rmse, 'MAPE': mape}

# Calculate metrics for each model
model_cols = [col for col in eval_df.columns if col in ['NHITS_base', 'NHITS_deep']]
results = []

for model in model_cols:
    metrics = calculate_metrics(eval_df['y'], eval_df[model])
    metrics['Model'] = model + ' + Exog'
    metrics['Type'] = 'Neural Network'
    results.append(metrics)

results_df = pd.DataFrame(results)
results_df = results_df[['Model', 'Type', 'MAE', 'RMSE', 'MAPE']].sort_values('MAE')

print("\n Neural Network + Exogenous Performance:")
print(results_df.to_string(index=False))

best_with_exog = results_df.iloc[0]
print(f"\n Best Model with Exogenous: {best_with_exog['Model']}")
print(f"   MAE: {best_with_exog['MAE']:.2f}")
print(f"   RMSE: {best_with_exog['RMSE']:.2f}")
print(f"   MAPE: {best_with_exog['MAPE']:.2f}%")


[7] Evaluating Model Performance...
----------------------------------------------------------------------
✓ Merged evaluation data: (1050, 6)

 Neural Network + Exogenous Performance:
            Model           Type        MAE       RMSE      MAPE
NHITS_base + Exog Neural Network 204.074263 442.010958 15.874679
NHITS_deep + Exog Neural Network 212.775158 444.583458 16.571112

 Best Model with Exogenous: NHITS_base + Exog
   MAE: 204.07
   RMSE: 442.01
   MAPE: 15.87%


In [12]:
# ============================================================================
# STEP 8: Compare with Previous Results
# ============================================================================
print("\n[8] Comparing with Previous Results...")
print("-" * 70)

# Load Part 4 results (neural without exogenous)
neural_no_exog = pd.read_csv('output/neural_results.csv')
best_no_exog = neural_no_exog.iloc[0]

# Load baseline (statistical)
baseline = pd.read_csv('output/baseline_results.csv')
best_statistical = baseline.iloc[0]

print(f"\nBest Statistical: {best_statistical['Model']}")
print(f"  MAE: {best_statistical['MAE']:.2f}")

print(f"\nBest Neural (no exog): {best_no_exog['Model']}")
print(f"  MAE: {best_no_exog['MAE']:.2f}")

print(f"\nBest Neural (with exog): {best_with_exog['Model']}")
print(f"  MAE: {best_with_exog['MAE']:.2f}")

improvement_vs_statistical = ((best_statistical['MAE'] - best_with_exog['MAE']) / best_statistical['MAE']) * 100
improvement_vs_neural = ((best_no_exog['MAE'] - best_with_exog['MAE']) / best_no_exog['MAE']) * 100

print(f"\n Improvements:")
print(f"  vs. Statistical Baseline: {improvement_vs_statistical:+.1f}%")
print(f"  vs. Neural (no exog): {improvement_vs_neural:+.1f}%")

if improvement_vs_neural > 2:
    print("\n Exogenous features significantly improved neural networks!")
elif improvement_vs_neural > 0:
    print("\n✓ Exogenous features provided modest improvement")
else:
    print("\n  Exogenous features did not improve neural networks")
    print("   Simpler model (no exog) may be preferred for deployment")


[8] Comparing with Previous Results...
----------------------------------------------------------------------

Best Statistical: AutoTheta
  MAE: 276.44

Best Neural (no exog): PatchTST
  MAE: 213.83

Best Neural (with exog): NHITS_base + Exog
  MAE: 204.07

 Improvements:
  vs. Statistical Baseline: +26.2%
  vs. Neural (no exog): +4.6%

 Exogenous features significantly improved neural networks!


In [9]:
# ============================================================================
# STEP 9: Save Results (Checkpoint)
# ============================================================================
print("\n[9] Saving Results...")
print("-" * 70)

output_path = Path('output')

# Save evaluation data
eval_df.to_csv(output_path / 'neural_exog_eval_checkpoint.csv', index=False)
print(f"✓ Saved: {output_path / 'neural_exog_eval_checkpoint.csv'}")

# Save results
results_df.to_csv(output_path / 'neural_exog_results.csv', index=False)
print(f"✓ Saved: {output_path / 'neural_exog_results.csv'}")

# Save forecasts
forecasts.to_csv(output_path / 'neural_exog_forecasts.csv')
print(f"✓ Saved: {output_path / 'neural_exog_forecasts.csv'}")

print("\n  RECOMMENDED: Restart kernel now to free memory")
print("   Then run Part 5B (final analysis & decision)")


[9] Saving Results...
----------------------------------------------------------------------
✓ Saved: output\neural_exog_eval_checkpoint.csv
✓ Saved: output\neural_exog_results.csv
✓ Saved: output\neural_exog_forecasts.csv

  RECOMMENDED: Restart kernel now to free memory
   Then run Part 5B (final analysis & decision)


In [13]:
# ============================================================================
# Summary
# ============================================================================
print("\n" + "=" * 70)
print("NEURAL NETWORKS + EXOGENOUS TRAINING COMPLETE")
print("=" * 70)

print(f"\n Results Summary:")
print(f"  Best Model: {best_with_exog['Model']}")
print(f"  MAE: {best_with_exog['MAE']:.2f}")
print(f"  Improvement vs Statistical: {improvement_vs_statistical:+.1f}%")
print(f"  Improvement vs Neural (no exog): {improvement_vs_neural:+.1f}%")

print(f"\n Checkpoint Files Saved:")
print(f"  - neural_exog_eval_checkpoint.csv")
print(f"  - neural_exog_results.csv")
print(f"  - neural_exog_forecasts.csv")

print(f"\n Next Steps:")
print(f"  1. Restart kernel (Kernel → Restart)")
print(f"  2. Run Part 5B - Final Analysis & Model Decision")
print(f"  3. Get final recommendation for capstone")

print("\n" + "=" * 70)


NEURAL NETWORKS + EXOGENOUS TRAINING COMPLETE

 Results Summary:
  Best Model: NHITS_base + Exog
  MAE: 204.07
  Improvement vs Statistical: +26.2%
  Improvement vs Neural (no exog): +4.6%

 Checkpoint Files Saved:
  - neural_exog_eval_checkpoint.csv
  - neural_exog_results.csv
  - neural_exog_forecasts.csv

 Next Steps:
  1. Restart kernel (Kernel → Restart)
  2. Run Part 5B - Final Analysis & Model Decision
  3. Get final recommendation for capstone



---