In [None]:
# %%
import pandas as pd
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

orders = pd.read_csv("../data/raw/synthetic_orders.csv")
orders["created_at"] = pd.to_datetime(orders["created_at"])

# Aggregate demand by day + SKU
daily_demand = (
    orders.groupby([orders["created_at"].dt.date, "sku"])["qty"].sum().reset_index()
)
daily_demand.rename(columns={"created_at": "date", "qty": "demand"}, inplace=True)

# %%
# Simple features: day of week, SKU one-hot
daily_demand["dow"] = pd.to_datetime(daily_demand["date"]).dt.dayofweek
X = pd.get_dummies(daily_demand[["dow", "sku"]], columns=["sku"])
y = daily_demand["demand"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# %%
model = LGBMRegressor(n_estimators=200)
model.fit(X_train, y_train)

preds = model.predict(X_test)
rmse = mean_squared_error(y_test, preds, squared=False)
print(f"RMSE: {rmse:.2f}")

# %%
import matplotlib.pyplot as plt

plt.plot(y_test.values, label="True")
plt.plot(preds, label="Predicted")
plt.legend()
plt.title("Demand Forecast vs Actual")
plt.show()
