In [None]:
# Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import sys

sys.path.insert(0, "..")
warnings.filterwarnings("ignore")
plt.style.use("seaborn-v0_8-whitegrid")

COLORS = ["#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4", "#FFEAA7"]

print("Setup complete!")

In [None]:
# Load data
from ml.utils.data_utils import load_daily_aggregates, load_item_daily_sales

daily_sales = load_daily_aggregates()
item_sales = load_item_daily_sales()

print(f"Daily aggregate data: {len(daily_sales)} days")
print(f"Item-level data: {len(item_sales)} records")
print(f"Unique items: {item_sales['item_id'].nunique()}")

## 1. Data Exploration


In [None]:
# Examine daily sales
print("Daily Aggregate Sales:")
print(daily_sales.head())
print(f"\nDate range: {daily_sales['date'].min()} to {daily_sales['date'].max()}")

In [None]:
# Visualize time series
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Total orders over time
axes[0, 0].plot(daily_sales["date"], daily_sales["total_orders"], color=COLORS[0])
axes[0, 0].set_title("Daily Orders Over Time", fontweight="bold")
axes[0, 0].set_xlabel("Date")
axes[0, 0].set_ylabel("Total Orders")

# Total revenue over time
axes[0, 1].plot(daily_sales["date"], daily_sales["total_revenue"], color=COLORS[1])
axes[0, 1].set_title("Daily Revenue Over Time", fontweight="bold")
axes[0, 1].set_xlabel("Date")
axes[0, 1].set_ylabel("Revenue ($)")

# Distribution of daily orders
axes[1, 0].hist(daily_sales["total_orders"], bins=30, color=COLORS[2], edgecolor="white")
axes[1, 0].set_title("Distribution of Daily Orders", fontweight="bold")
axes[1, 0].set_xlabel("Number of Orders")
axes[1, 0].set_ylabel("Frequency")

# Autocorrelation
from pandas.plotting import autocorrelation_plot

autocorrelation_plot(daily_sales["total_orders"], ax=axes[1, 1])
axes[1, 1].set_title("Order Autocorrelation", fontweight="bold")
axes[1, 1].set_xlim([0, 60])

plt.tight_layout()
plt.show()

In [None]:
# Day of week patterns
daily_sales["date"] = pd.to_datetime(daily_sales["date"])
daily_sales["day_of_week"] = daily_sales["date"].dt.day_name()

dow_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
dow_stats = (
    daily_sales.groupby("day_of_week")["total_orders"].agg(["mean", "std"]).reindex(dow_order)
)

plt.figure(figsize=(10, 6))
plt.bar(
    dow_stats.index, dow_stats["mean"], yerr=dow_stats["std"], capsize=5, color=COLORS[0], alpha=0.8
)
plt.title("Average Daily Orders by Day of Week", fontweight="bold")
plt.xlabel("Day of Week")
plt.ylabel("Average Orders")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 2. Feature Engineering


In [None]:
from ml.utils.data_utils import create_time_features, create_lag_features, create_rolling_features

# Create a copy for feature engineering
df = daily_sales.copy()

# Time features
df = create_time_features(df, date_col="date")
print("Time features created:")
print([c for c in df.columns if c not in daily_sales.columns])

In [None]:
# Lag features
df = create_lag_features(df, target_col="total_orders", lags=[1, 7, 14, 28])
print("\nLag features created:")
print([c for c in df.columns if "lag" in c])

In [None]:
# Rolling features
df = create_rolling_features(df, target_col="total_orders", windows=[7, 14, 28])
print("\nRolling features created:")
print([c for c in df.columns if "rolling" in c])

In [None]:
# Final feature set
print(f"\nTotal features: {len(df.columns) - 2}")
print(f"\nSample of engineered data:")
df.dropna().head()

## 3. Model Training


In [None]:
from ml.pipelines.demand_forecasting import DemandForecaster

# Train basic forecaster
forecaster = DemandForecaster(
    target_col="total_orders", lags=[1, 7, 14, 21, 28], rolling_windows=[7, 14, 28]
)

# Use fresh data
train_df = load_daily_aggregates()
metrics = forecaster.train(train_df)

print("\nTraining Complete!")
print("=" * 40)
for metric, value in metrics.items():
    print(f"  {metric}: {value:.4f}")

In [None]:
# Feature importance
importance = forecaster.get_feature_importance(20)

plt.figure(figsize=(10, 8))
plt.barh(importance["feature"], importance["importance"], color=COLORS[1])
plt.title("Top 20 Features for Demand Forecasting", fontweight="bold")
plt.xlabel("Feature Importance")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

## 4. Hyperparameter Optimization with Optuna


In [None]:
from ml.pipelines.enhanced_forecasting import EnhancedDemandForecaster

# Initialize enhanced forecaster
enhanced_forecaster = EnhancedDemandForecaster(
    target_col="total_orders", lags=[1, 7, 14, 21, 28], rolling_windows=[7, 14, 28]
)

# Train with optimization (reduced trials for demo)
train_df = load_daily_aggregates()
results = enhanced_forecaster.train_with_optimization(
    train_df,
    n_trials=20,  # Increase for better results
    n_cv_splits=3,
    experiment_name="demand_forecast_notebook",
)

print("\nOptimization Complete!")

In [None]:
# Best parameters
print("\nBest Hyperparameters:")
print("=" * 40)
for param, value in results["best_params"].items():
    print(f"  {param}: {value}")

In [None]:
# Final metrics
print("\nFinal Model Metrics:")
print("=" * 40)
for metric, value in results["metrics"].items():
    print(f"  {metric}: {value:.4f}")

In [None]:
# Optimization history
history = pd.DataFrame(results["optimization_history"])

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Optimization progress
axes[0].plot(history["trial"], history["value"], "o-", color=COLORS[0], alpha=0.7)
axes[0].axhline(
    history["value"].min(),
    color="green",
    linestyle="--",
    label=f"Best: {history['value'].min():.4f}",
)
axes[0].set_title("Optimization Progress", fontweight="bold")
axes[0].set_xlabel("Trial")
axes[0].set_ylabel("CV Score (RMSE)")
axes[0].legend()

# Score distribution
axes[1].hist(history["value"], bins=20, color=COLORS[1], edgecolor="white")
axes[1].axvline(history["value"].min(), color="green", linestyle="--", label="Best")
axes[1].set_title("Score Distribution Across Trials", fontweight="bold")
axes[1].set_xlabel("CV Score (RMSE)")
axes[1].set_ylabel("Frequency")
axes[1].legend()

plt.tight_layout()
plt.show()

## 5. Model Explainability with SHAP


In [None]:
# Get SHAP explanations
# Prepare test data
test_df = enhanced_forecaster._prepare_features(train_df, is_training=False)
test_df = test_df.dropna()

explanations = enhanced_forecaster.explain_predictions(
    test_df[enhanced_forecaster.feature_cols].tail(100), num_features=15
)

print("\nSHAP-based Feature Importance:")
shap_importance = pd.DataFrame(explanations["feature_importance_shap"])
shap_importance

In [None]:
# SHAP visualization
try:
    import shap

    # Create SHAP summary plot
    X = test_df[enhanced_forecaster.feature_cols].tail(100)
    explainer = shap.TreeExplainer(enhanced_forecaster.model)
    shap_values = explainer.shap_values(X)

    plt.figure(figsize=(10, 8))
    shap.summary_plot(shap_values, X, plot_type="bar", max_display=15, show=False)
    plt.title("SHAP Feature Importance", fontweight="bold")
    plt.tight_layout()
    plt.show()
except ImportError:
    print("SHAP not installed. Install with: pip install shap")

## 6. Generate Forecasts


In [None]:
# Generate 14-day forecast
historical = load_daily_aggregates()
forecast = enhanced_forecaster.forecast_future(historical, days_ahead=14)

print("\n14-Day Forecast:")
forecast

In [None]:
# Visualize forecast
fig, ax = plt.subplots(figsize=(14, 6))

# Historical data (last 60 days)
recent = historical.tail(60).copy()
ax.plot(recent["date"], recent["total_orders"], "b-", label="Historical", linewidth=1.5)

# Forecast
if len(forecast) > 0:
    ax.plot(forecast["date"], forecast["total_orders"], "r--", label="Forecast", linewidth=2)
    ax.scatter(forecast["date"], forecast["total_orders"], color="red", s=50, zorder=5)

ax.axvline(historical["date"].max(), color="gray", linestyle=":", alpha=0.7)
ax.set_title("Demand Forecast - Next 14 Days", fontweight="bold", fontsize=14)
ax.set_xlabel("Date")
ax.set_ylabel("Total Orders")
ax.legend()
plt.tight_layout()
plt.show()

## 7. Item-Level Forecasting


In [None]:
# Train item-level forecaster
from ml.pipelines.demand_forecasting import train_item_level_forecaster

item_forecaster, item_metrics = train_item_level_forecaster()

print("\nItem-Level Forecaster Metrics:")
for metric, value in item_metrics.items():
    print(f"  {metric}: {value:.4f}")

In [None]:
# Item-level feature importance
item_importance = item_forecaster.get_feature_importance(15)

plt.figure(figsize=(10, 6))
plt.barh(item_importance["feature"], item_importance["importance"], color=COLORS[3])
plt.title("Item-Level Forecaster: Feature Importance", fontweight="bold")
plt.xlabel("Feature Importance")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

## 8. Cross-Validation Analysis


In [None]:
# Run cross-validation
cv_results = enhanced_forecaster.cross_validate(load_daily_aggregates(), n_splits=5)

print("\nCross-Validation Results:")
print("=" * 40)
for metric, value in cv_results.items():
    print(f"  {metric}: {value:.4f}")

In [None]:
# Visualize CV results
cv_scores = enhanced_forecaster.cv_results

fig, axes = plt.subplots(1, 3, figsize=(14, 4))

for i, (metric, scores) in enumerate(cv_scores.items()):
    axes[i].bar(range(1, len(scores) + 1), scores, color=COLORS[i])
    axes[i].axhline(
        np.mean(scores), color="red", linestyle="--", label=f"Mean: {np.mean(scores):.4f}"
    )
    axes[i].set_title(f"{metric.upper()} by Fold", fontweight="bold")
    axes[i].set_xlabel("Fold")
    axes[i].set_ylabel(metric.upper())
    axes[i].legend()

plt.tight_layout()
plt.show()

## 9. Model Summary


In [None]:
# Get optimization summary
summary = enhanced_forecaster.get_optimization_summary()

print("\nOptimization Summary:")
print("=" * 50)
print(f"  Total Trials: {summary['n_trials']}")
print(f"  Best Score: {summary['best_score']:.4f}")
print(f"  Worst Score: {summary['worst_score']:.4f}")
print(f"  Mean Score: {summary['mean_score']:.4f}")
print(f"  Improvement: {summary['improvement']:.1f}%")
print(f"\nBest Parameters: {summary['best_params']}")

In [None]:
# Save model
enhanced_forecaster.save("demand_forecaster_notebook")
print("\nâœ… Model saved successfully!")