# 2.04 Forecasting Metrics. (This may be better suited as 2.05)

In [40]:
import pandas as pd 
import numpy as np
import os
import sys 
from pathlib import Path
import matplotlib.pyplot as plt

import tsforge as tsf

import warnings

* fetch metadata

In [41]:
# Set working directory to project root
ROOT_DIR = Path("../..").resolve()
os.chdir(ROOT_DIR)
sys.path.insert(0, str(ROOT_DIR))


warnings.filterwarnings("ignore")
plt.style.use("seaborn-v0_8-whitegrid")

DATA_DIR = Path("data")
DATA_DIR.mkdir(exist_ok=True)

OUTPUT_DIR = DATA_DIR / "output"
OUTPUT_DIR.mkdir(exist_ok=True)

data = tsf.load_m5(data_dir=DATA_DIR, create_unique_id=True, verbose=False,include_hierarchy=True)


OSError: [Errno 30] Read-only file system: 'data'

In [42]:
meta_data = data.select_dtypes(include=['category','object']).drop_duplicates(subset='unique_id').reset_index(drop=True)

In [43]:
df = pd.read_parquet(
    "/Users/jackrodenberg/Desktop/real-world-forecasting-foundations/notebooks/module_02_baselines/statsforecast_backtest.parquet",
)

* make bespoke metrics out of the baseline dataframe, here we can be more creative with custom error measures

In [None]:
e_df = df.melt(
    id_vars=["unique_id", "ds", "cutoff", "y"], var_name="model", value_name="y_pred"
).assign(
    error=lambda x: x["y"] - x["y_pred"],
    abs_error=lambda x: np.abs(x["error"])
)  # add granular error/abs error columns.. these will be used for bespoke metrics...

e_df = e_df.assign(
    timestep=e_df.groupby(["unique_id", "model", "cutoff"]).cumcount().add(1),
    horizon_group=lambda x: np.where(x["timestep"] > 4, "5-13", "1-4"),
)

e_df.head()


Unnamed: 0,unique_id,ds,cutoff,y,model,y_pred,error,abs_error,timestep,horizon_group
0,FOODS_1_001_CA_1,2015-07-04,2015-06-27,2.0,Naive,2.0,0.0,0.0,1,1-4
1,FOODS_1_001_CA_1,2015-07-11,2015-06-27,2.0,Naive,2.0,0.0,0.0,2,1-4
2,FOODS_1_001_CA_1,2015-07-18,2015-06-27,7.0,Naive,2.0,5.0,5.0,3,1-4
3,FOODS_1_001_CA_1,2015-07-25,2015-06-27,4.0,Naive,2.0,2.0,2.0,4,1-4
4,FOODS_1_001_CA_1,2015-08-01,2015-06-27,2.0,Naive,2.0,0.0,0.0,5,5-13


In [45]:
# join in metadata 
e_df = e_df.merge(
    meta_data[['unique_id','item_id']],
    on=['unique_id'],
    how='inner',
    validate="m:1"
)

In [None]:
sku_lvl = e_df.groupby(["item_id","ds","cutoff","model"])[['y','y_pred','error']].transform("sum")

# add absolute SKU level error to table 
sku_lvl['abs_error'] = sku_lvl['error'].abs()


# assign sku level totals to original table..
e_df = e_df.assign(
    sku_error=sku_lvl["error"],
    sku_abs_error=sku_lvl["abs_error"],
    sku_y=sku_lvl["y"],
    sku_pred=sku_lvl["y_pred"],
)

Unnamed: 0,y,y_pred,error,abs_error
0,38.0,26.0,12.0,12.0
1,41.0,26.0,15.0,15.0
2,38.0,26.0,12.0,12.0
3,30.0,26.0,4.0,4.0
4,31.0,26.0,5.0,5.0


In [98]:
e_df.head()

Unnamed: 0,unique_id,ds,cutoff,y,model,y_pred,error,abs_error,timestep,horizon_group,item_id,sku_error,sku_abs_error,sku_y,sku_pred
0,FOODS_1_001_CA_1,2015-07-04,2015-06-27,2.0,Naive,2.0,0.0,0.0,1,1-4,FOODS_1_001,12.0,12.0,38.0,26.0
1,FOODS_1_001_CA_1,2015-07-11,2015-06-27,2.0,Naive,2.0,0.0,0.0,2,1-4,FOODS_1_001,15.0,15.0,41.0,26.0
2,FOODS_1_001_CA_1,2015-07-18,2015-06-27,7.0,Naive,2.0,5.0,5.0,3,1-4,FOODS_1_001,12.0,12.0,38.0,26.0
3,FOODS_1_001_CA_1,2015-07-25,2015-06-27,4.0,Naive,2.0,2.0,2.0,4,1-4,FOODS_1_001,4.0,4.0,30.0,26.0
4,FOODS_1_001_CA_1,2015-08-01,2015-06-27,2.0,Naive,2.0,0.0,0.0,5,5-13,FOODS_1_001,5.0,5.0,31.0,26.0


In [106]:
# ============================================================================
# STEP 3: Aggregate Errors by Model/Horizon/Series/Cutoff
# ============================================================================

groupby_keys = ["model", "horizon_group", "unique_id","item_id", "cutoff"]

base_metric_specs = [
    ("sum_ae", "abs_error", "sum"),
    ("sum_demand", "y", "sum"),
    ("bias", "error", "mean"),
]

sku_metric_specs = [
    ("sku_sum_ae", "sku_abs_error", "sum"),
    ("sku_sum_demand", "sku_y", "sum"),
    ("sku_bias", "sku_error", "mean"),
]

agg_dict = {name: (column, func) for name, column, func in base_metric_specs + sku_metric_specs}

aggregated_errors = e_df.groupby(groupby_keys, sort=False, observed=True, as_index=False).agg(
    **agg_dict
)

print("\u2713 Aggregated base- and SKU-level errors")
print(f"  Number of unique combinations: {len(aggregated_errors):,}")
print(f"  Groupby keys: {groupby_keys}")


âœ“ Aggregated base- and SKU-level errors
  Number of unique combinations: 1,463,520
  Groupby keys: ['model', 'horizon_group', 'unique_id', 'item_id', 'cutoff']


* jitter computation, just the stddev of wMAPE. We can make wMAPE Combined to keep the table simpler... 

In [110]:
# ============================================================================
# STEP 4: Calculate Derived Metrics
# ============================================================================

aggregated_errors = aggregated_errors.assign(
    # Weighted Mean Absolute Percentage Error
    wMAPE=lambda df: df["sum_ae"] / df["sum_demand"],
    sku_wMAPE=lambda df: df["sku_sum_ae"] / df["sku_sum_demand"]
)

In [125]:
tmp = aggregated_errors.assign(wMAPE_jitter = aggregated_errors.groupby(['unique_id',"model","horizon_group"])['wMAPE'].transform('std'),
                        sku_wMAPE_jitter = aggregated_errors.groupby(['item_id',"model","horizon_group"])['sku_wMAPE'].transform('std'))


tmp = tmp.merge(
    tmp.query("model == 'SN52'")[['unique_id','cutoff','sku_wMAPE']],
    on=['unique_id','cutoff'],
    how='left',
    suffixes=("","_SN52")
).assign(
    beat_ind = lambda df: np.where(df['sku_wMAPE'] < df['sku_wMAPE_SN52'], 1, 0),
    beat_rate = lambda df: df.groupby("model")['beat_ind'].transform('mean')
) 


sb = tmp[['model','horizon_group','unique_id','cutoff','wMAPE','sku_wMAPE','sku_wMAPE_jitter','bias','sku_bias']]

In [126]:
sb.head()

Unnamed: 0,model,horizon_group,unique_id,cutoff,wMAPE,sku_wMAPE,sku_wMAPE_jitter,bias,sku_bias
0,Naive,1-4,FOODS_1_001_CA_1,2015-06-27,0.466667,0.292517,0.148181,1.75,10.75
1,Naive,1-4,FOODS_1_001_CA_1,2015-06-27,0.466667,0.292517,0.148181,1.75,10.75
2,Naive,5-13,FOODS_1_001_CA_1,2015-06-27,0.588235,0.335227,0.071615,1.777778,13.111111
3,Naive,5-13,FOODS_1_001_CA_1,2015-06-27,0.588235,0.335227,0.071615,1.777778,13.111111
4,Naive,1-4,FOODS_1_001_CA_1,2015-09-26,0.6,0.175676,0.148181,0.75,-2.0


In [130]:
sb = sb.query("wMAPE != inf and sku_wMAPE != inf")

In [132]:
sb.groupby(["model","horizon_group"]).mean(numeric_only=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,wMAPE,sku_wMAPE,sku_wMAPE_jitter,bias,sku_bias
model,horizon_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CrostonOptimized,1-4,0.841353,0.630765,2.752405,0.554411,4.498957
CrostonOptimized,5-13,1.065158,0.887451,4.955669,0.204007,1.622176
HW52,1-4,1.05267,0.664239,2.320267,0.917479,8.328614
HW52,5-13,1.174016,0.721758,2.734021,0.710093,6.728833
MA4,1-4,0.794379,0.378758,0.584428,0.252898,2.317363
MA4,5-13,1.030017,0.840467,4.338474,0.185488,1.782581
Naive,1-4,0.865039,0.376425,0.330834,0.744257,7.280411
Naive,5-13,1.086278,0.898628,4.033078,0.711071,7.003988
SN52,1-4,1.110136,0.886145,2.532973,1.393733,12.83995
SN52,5-13,1.206192,0.92718,2.766839,1.017791,9.721648
