# 03 â€” Baseline Models

Establish naive and seasonal benchmarks.

## Load features

In [1]:
import pandas as pd
from sklearn.metrics import mean_absolute_error

df = pd.read_parquet("../data/processed/features.parquet")


## Train/validation split

In [4]:
split_date = df["date"].max() - pd.Timedelta(days=28)


train = df[df["date"] <= split_date]
val = df[df["date"] > split_date]


## Naive baseline

In [7]:
# Make an explicit copy (kills the warning)
val = val.copy()

# Naive forecast
val["naive"] = val.groupby("id")["sales"].shift(1)

# Only drop rows where naive is NaN
mask = val["naive"].notna()

mae_naive = mean_absolute_error(
    val.loc[mask, "sales"],
    val.loc[mask, "naive"]
)

mae_naive


1.0948518518518517

## Seasonal naive (7-day)

In [8]:
val["seasonal"] = val.groupby("id")["sales"].shift(7)

mask = val["seasonal"].notna()

mae_seasonal = mean_absolute_error(
    val.loc[mask, "sales"],
    val.loc[mask, "seasonal"]
)

mae_seasonal


1.0972857142857142

In [9]:
mask.sum()


np.int64(21000)

## Baseline comparison

In [10]:
pd.DataFrame({
    "Model": ["Naive", "Seasonal Naive"],
    "MAE": [mae_naive, mae_seasonal]
})


Unnamed: 0,Model,MAE
0,Naive,1.094852
1,Seasonal Naive,1.097286
