<a href="https://colab.research.google.com/github/huckfive/ImageClassification/blob/main/forecast1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from dateutil.relativedelta import relativedelta
import os

# === CONFIGURATION ===
CSV_FILE = "monthly_demand.csv"   # Your input file
FORECAST_HORIZON = 6              # Months to forecast
CONFIDENCE_LEVEL_Z = 1.15         # Z-score for 75% confidence interval
PLOT_DIR = "plots"
PDF_FILE = "forecast_report.pdf"

# === SETUP ===
os.makedirs(PLOT_DIR, exist_ok=True)
df = pd.read_csv(CSV_FILE, parse_dates=["date"])
df.columns = df.columns.str.strip()
df = df.sort_values(by=["article_id", "date"])

forecast_results = []
pdf = PdfPages(PDF_FILE)

# === PROCESS EACH ARTICLE ===
for article_id in df["article_id"].unique():
    article_df = df[df["article_id"] == article_id].copy()

    # Ensure monthly frequency
    article_df = article_df.set_index("date").resample("MS").asfreq().fillna(0).reset_index()

    # Feature engineering
    article_df["month"] = article_df["date"].dt.month
    article_df["year"] = article_df["date"].dt.year
    article_df["lag_1"] = article_df["demand"].shift(1)
    article_df["lag_12"] = article_df["demand"].shift(12)
    article_df = article_df.dropna()

    if len(article_df) < 24:
        print(f"Skipping article {article_id}: not enough data.")
        continue

    # Train model
    train_df = article_df.iloc[:-FORECAST_HORIZON]
    X_train = train_df[["month", "year", "lag_1", "lag_12"]]
    y_train = train_df["demand"]
    model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=100)
    model.fit(X_train, y_train)

    # Compute RMSE for confidence intervals
    y_train_pred = model.predict(X_train)
    rmse = np.sqrt(np.mean((y_train - y_train_pred) ** 2))

    # Forecast into the future
    current_df = article_df.copy()
    last_date = current_df["date"].max()
    future_rows = []

    for i in range(FORECAST_HORIZON):
        future_date = last_date + relativedelta(months=i + 1)
        month = future_date.month
        year = future_date.year
        lag_1 = current_df.iloc[-1]["demand"]
        lag_12_row = current_df[current_df["date"] == future_date - relativedelta(months=12)]
        lag_12 = lag_12_row["demand"].values[0] if not lag_12_row.empty else 0

        X_pred = pd.DataFrame([{
            "month": month,
            "year": year,
            "lag_1": lag_1,
            "lag_12": lag_12
        }])
        pred = model.predict(X_pred)[0]

        # Store forecast
        forecast_results.append({
            "article_id": article_id,
            "date": future_date,
            "predicted_demand": pred
        })

        future_rows.append({
            "date": future_date,
            "predicted_demand": pred,
            "lower": pred - CONFIDENCE_LEVEL_Z * rmse,
            "upper": pred + CONFIDENCE_LEVEL_Z * rmse
        })

        # Add prediction as next lag_1
        current_df = pd.concat([
            current_df,
            pd.DataFrame([{
                "date": future_date,
                "demand": pred,
                "month": month,
                "year": year,
                "lag_1": lag_1,
                "lag_12": lag_12
            }])
        ], ignore_index=True)

    # === Plotting ===
    past = article_df[["date", "demand"]].iloc[-24:]
    future_df = pd.DataFrame(future_rows)

    plt.figure(figsize=(10, 5))
    plt.plot(past["date"], past["demand"], label="Historical", marker="o")
    plt.plot(future_df["date"], future_df["predicted_demand"], label="Forecast", marker="o")
    plt.fill_between(future_df["date"], future_df["lower"], future_df["upper"], alpha=0.3, label="75% CI")
    plt.title(f"Forecast for Article {article_id}")
    plt.xlabel("Date")
    plt.ylabel("Demand")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()

    # Save PNG and add to PDF
    plt.savefig(f"{PLOT_DIR}/forecast_{article_id}.png")
    pdf.savefig()
    plt.close()

# === FINISH UP ===
pdf.close()
forecast_df = pd.DataFrame(forecast_results)
forecast_df.to_csv("forecast_output.csv", index=False)

print("✅ Forecast complete.")
print(f"📂 Plots saved in: {PLOT_DIR}/")
print(f"📄 PDF report saved as: {PDF_FILE}")
print(f"📊 Forecast data saved as: forecast_output.csv")


✅ Forecast complete.
📂 Plots saved in: plots/
📄 PDF report saved as: forecast_report.pdf
📊 Forecast data saved as: forecast_output.csv
