# AI Forecasting for Retail Demand — Answer Key
Compares lag-based RandomForest vs. Naive; optional LSTM if TF installed.

In [None]:
import pandas as pd, numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

df = pd.read_csv("../data/daily_store_category_sales.csv", parse_dates=["date"])
# Focus on one store/category for simplicity
sub = df[(df["store"]=="S1") & (df["category"]=="A")].sort_values("date")
y = sub["sales"].values
# Build lag features
for lag in [1,7,14]:
    sub[f"lag{lag}"] = sub["sales"].shift(lag)
sub = sub.dropna().reset_index(drop=True)

train_ratio = 0.8
split = int(len(sub)*train_ratio)
train, test = sub.iloc[:split], sub.iloc[split:]
X_train = train[["lag1","lag7","lag14"]].values
y_train = train["sales"].values
X_test = test[["lag1","lag7","lag14"]].values
y_test = test["sales"].values

rf = RandomForestRegressor(random_state=42).fit(X_train, y_train)
y_pred = rf.predict(X_test)

# Naive baseline: yesterday's value
y_naive = test["lag1"].values

print("MAE RF:", mean_absolute_error(y_test, y_pred).round(2))
print("MAE Naive:", mean_absolute_error(y_test, y_naive).round(2))

plt.figure()
plt.plot(test["date"], y_test, label="actual")
plt.plot(test["date"], y_pred, label="rf_pred")
plt.plot(test["date"], y_naive, label="naive")
plt.title("Forecast vs Actual")
plt.legend()
plt.show()
