In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load from the baseline module's data folder
train = pd.read_csv("../baseline_prophet_forecast/data/train.csv")
features = pd.read_csv("../baseline_prophet_forecast/data/features.csv")
stores = pd.read_csv("../baseline_prophet_forecast/data/stores.csv")

df = (
    train
    .merge(features, on=["Store", "Date", "IsHoliday"])
    .merge(stores, on="Store")
)

df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values(["Store", "Dept", "Date"])
df.head()


In [None]:
store = 1
dept = 1

ts = (
    df[(df["Store"] == store) & (df["Dept"] == dept)]
    [["Date", "Weekly_Sales", "IsHoliday"]]
    .rename(columns={"Date": "ds", "Weekly_Sales": "y"})
    .sort_values("ds")
)

ts = ts.set_index("ds")
ts = ts.asfreq("W")
ts.head()


In [None]:
window = 8

ts["rolling_mean"] = ts["y"].rolling(window).mean()
ts["rolling_std"] = ts["y"].rolling(window).std()

ts["z_score"] = (ts["y"] - ts["rolling_mean"]) / ts["rolling_std"]

# Flag anomalies (spikes or dips)
threshold = 2.0
ts["anomaly"] = ts["z_score"].abs() > threshold

ts.dropna(inplace=True)
ts.head()


In [None]:
plt.figure(figsize=(14, 6))

plt.plot(ts.index, ts["y"], label="Weekly Sales", linewidth=1.5)

anoms = ts[ts["anomaly"]]

plt.scatter(
    anoms.index,
    anoms["y"],
    color="red",
    label="Anomaly",
    s=50,
    zorder=3,
)

plt.title(f"Sales Anomaly Detection — Store {store}, Dept {dept}")
plt.xlabel("Date")
plt.ylabel("Weekly Sales")
plt.legend()
plt.tight_layout()
plt.savefig("images/anomaly_series_store1_dept1.png")
plt.show()


In [None]:
anomaly_table = anoms[["y", "z_score", "IsHoliday"]].copy()
anomaly_table = anomaly_table.rename(columns={"y": "Weekly_Sales"})

anomaly_table.head()


Interpretation
- Points flagged with |z-score| > 2 are considered statistically unusual given an 8-week rolling window.
- Positive anomalies often correspond to holiday or promotion periods, where demand spikes above typical levels.
- Negative anomalies may indicate stockouts, supply issues, or data/reporting problems, and should be investigated by operations or replenishment teams.
- This logic can be extended across all store–department combinations as part of a monitoring layer in production.