# 05_Forecasting.ipynb

## 1. Load features & segments  
## 2. Prepare time series (daily aggregate sales)  
## 3. Fit Prophet model  
## 4. Evaluate simple metrics (MAE, MAPE)  
## 5. Save forecast to `data/processed/forecast.csv`


In [1]:
import pandas as pd
from prophet import Prophet

# 1️⃣ Load cleaned transactions and segments
df_clean = pd.read_csv("../data/processed/cleaned_transactions.csv", parse_dates=["order_date"])
segments = pd.read_csv("../data/processed/customer_segments.csv")

# 2️⃣ Prepare daily aggregate sales
daily = (
    df_clean
    .groupby("order_date")["price"]
    .sum()
    .reset_index(name="y")
    .rename(columns={"order_date": "ds"})
)

# 3️⃣ Fit Prophet model
m = Prophet()
m.fit(daily)

# 4️⃣ Create a future dataframe and forecast
future = m.make_future_dataframe(periods=30)  # next 30 days
forecast = m.predict(future)

# 5️⃣ Evaluate (using only the training period for simplicity)
from sklearn.metrics import mean_absolute_error
y_true = daily.set_index("ds")["y"]
y_pred  = forecast.set_index("ds")["yhat"].loc[y_true.index]
mae = mean_absolute_error(y_true, y_pred)
print(f"MAE on training data: {mae:.2f}")

# 6️⃣ Save forecast
import os
os.makedirs("../data/processed", exist_ok=True)
forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]] \
    .to_csv("../data/processed/forecast.csv", index=False)
print("Forecast saved to ../data/processed/forecast.csv")


  from .autonotebook import tqdm as notebook_tqdm
14:29:06 - cmdstanpy - INFO - Chain [1] start processing
14:29:06 - cmdstanpy - INFO - Chain [1] done processing


MAE on training data: 3.35
Forecast saved to ../data/processed/forecast.csv
