# FMCG Demand & Sales Forecasting — End-to-End Demo

This notebook walks through the full pipeline:
1. Generate synthetic FMCG data (10 SKUs, 2 warehouses, 6 months)
2. Visualize Indonesian calendar features
3. Preprocess demand data (OOS detection, lag features, Lebaran signal)
4. Train demand model (BiLSTM + Attention) and plot forecast
5. Run individual sales forecasting (LSTM + MultiheadAttention)
6. Run merged COGS forecasting (plain LSTM, sequential)
7. Generate purchase recommendations
8. Summary table

> **Fast demo config:** 10 SKUs, 2 warehouses, 6 months data, 50 epochs.

In [None]:
import sys
sys.path.insert(0, '../src')

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

plt.rcParams.update({'figure.figsize': (12, 5), 'font.size': 11})
print('Setup complete')

## 1. Generate Synthetic Data

In [None]:
from fmcg_forecast.data.generator import generate_products, generate_orders, generate_sales_data

products = generate_products(num_skus=10, seed=42)
orders = generate_orders(products, num_warehouses=2, start_date='2024-01-01', end_date='2024-06-30', seed=42)
sales = generate_sales_data(orders, products, seed=42)

print(f'Products: {len(products)} SKUs across {products["category"].nunique()} categories')
print(f'Orders:   {len(orders):,} rows')
print(f'Sales:    {len(sales):,} rows  |  columns: {list(sales.columns)}')
sales.head(3)

## 2. Visualize Calendar Features

In [None]:
from fmcg_forecast.data.calendar import create_feature_calendar

cal = create_feature_calendar('2024-01-01', '2024-06-30')
cal['date'] = pd.to_datetime(cal['date'])

fig, axes = plt.subplots(3, 1, figsize=(14, 8), sharex=True)

axes[0].fill_between(cal['date'], cal['is_ramadan_month'], alpha=0.4, color='green', label='Ramadan')
axes[0].fill_between(cal['date'], cal['is_lebaran_peak_week'], alpha=0.6, color='red', label='Lebaran peak')
axes[0].set_ylabel('Ramadan/Lebaran'); axes[0].legend(loc='upper right')

axes[1].plot(cal['date'], cal['lebaran_proximity_signal'], color='darkorange', label='Lebaran proximity')
axes[1].set_ylabel('Signal (0-1)'); axes[1].legend(loc='upper right')

axes[2].fill_between(cal['date'], cal['is_payday_period'], alpha=0.4, color='navy', label='Payday period (25th-5th)')
axes[2].set_ylabel('Payday'); axes[2].legend(loc='upper right')
axes[2].xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))

plt.suptitle('Indonesian FMCG Calendar Features — Jan to Jun 2024', fontsize=13)
plt.tight_layout()
plt.show()

## 3. Preprocess Demand Data

In [None]:
from fmcg_forecast.demand.preprocessing import detect_oos_periods, remove_outliers, preprocess_demand_data

# Use one product-warehouse pair for demand
sample_demand = sales[sales['product_name'] == sales['product_name'].iloc[0]].copy()
sample_demand = sample_demand.groupby('date').agg({'main_product_sales': 'sum', 'promo_sales': 'sum'}).reset_index()
sample_demand['id_gudang'] = 'WH-A'
sample_demand['product_name'] = sales['product_name'].iloc[0]
sample_demand['prioritas_sales'] = 0
sample_demand['is_payday_period'] = ((pd.to_datetime(sample_demand['date']).dt.day >= 25) | 
                                      (pd.to_datetime(sample_demand['date']).dt.day <= 5)).astype(int)

# Detect OOS periods
oos = detect_oos_periods(sample_demand)
print(f'OOS periods detected: {len(oos)}')

# Preprocess
processed = preprocess_demand_data(sample_demand)
print(f'Preprocessed shape: {processed.shape}')
print(f'Features: {[c for c in processed.columns if c.startswith("sales_") or c.startswith("lebaran")]}')

## 4. Train Demand Model and Plot Forecast

In [None]:
from fmcg_forecast.config import DemandConfig
from fmcg_forecast.demand.forecaster import TimeSeriesForecaster

# Fast demo config
cfg = DemandConfig(epochs=50, input_window=7, forecast_horizon=14, batch_size=8, cv_splits=2)
forecaster = TimeSeriesForecaster(cfg)

print('Training demand model (50 epochs)...')
forecaster.train_global_model(processed)
print('Training complete.')

In [None]:
forecast_start = str(processed['date'].max().date())
forecast_df = forecaster.predict_future(processed, forecast_start)

fig, ax = plt.subplots(figsize=(13, 5))
ax.plot(processed['date'], processed['main_product_sales'], label='Historical', color='steelblue', alpha=0.8)
ax.plot(forecast_df['forecast_date'], forecast_df['forecast_value'], 
        label='Forecast', color='darkorange', linestyle='--', marker='o', markersize=4)
ax.set_title('Demand Forecast — BiLSTM + Additive Attention', fontsize=13)
ax.set_xlabel('Date'); ax.set_ylabel('Units')
ax.legend(); ax.grid(True, linestyle=':')
plt.tight_layout()
plt.show()
print(f'Forecast rows: {len(forecast_df)}')
forecast_df.head()

## 5. Individual Sales Forecasting (per region-warehouse)

In [None]:
from fmcg_forecast.config import SalesConfig
from fmcg_forecast.sales.individual import SeasonalFinancialForecaster, preprocess_raw_data

# Use first 2 region-warehouse combos for speed
sales_small = sales.copy()
if 'id_region' not in sales_small.columns:
    sales_small['id_region'] = 'R-1'

pairs = sales_small[['id_region', 'id_gudang']].drop_duplicates().values[:2]
sales_small = sales_small[sales_small.apply(lambda r: (r['id_region'], r['id_gudang']) in [tuple(p) for p in pairs], axis=1)]

cfg_sales = SalesConfig(epochs=50, input_window=7, forecast_horizon=10, batch_size=8)
data_dict = preprocess_raw_data(sales_small, holiday_dates=set())

ind_forecaster = SeasonalFinancialForecaster(cfg_sales, holiday_dates=set())
print('Training individual models (2 region-warehouse pairs)...')
ind_results = ind_forecaster.run_forecasting(data_dict)
print(f'Trained {len(ind_results)} models.')

In [None]:
# Plot one sales forecast
sales_keys = [k for k in ind_results if 'sales' in k]
if sales_keys:
    key = sales_keys[0]
    res = ind_results[key]
    hist = res['historical_df']
    fcst = res['forecast_df']
    
    fig, ax = plt.subplots(figsize=(13, 5))
    ax.plot(hist['date'], hist['sales'], label='Historical', color='steelblue', alpha=0.8)
    ax.plot(fcst['date'], fcst['sales'], label='Forecast', color='darkorange', linestyle='--', marker='o', markersize=4)
    ax.set_title(f'Individual Sales Forecast — {key}', fontsize=13)
    ax.set_xlabel('Date'); ax.set_ylabel('Sales (IDR)')
    ax.legend(); ax.grid(True, linestyle=':')
    plt.tight_layout()
    plt.show()

## 6. Merged Sales + COGS Forecasting

In [None]:
from fmcg_forecast.sales.merged import FinancialForecaster

# Aggregate all to daily totals
daily = sales.groupby('date').agg({'main_product_sales': 'sum', 'promo_sales': 'sum'}).reset_index()
daily = daily.rename(columns={'main_product_sales': 'sales', 'promo_sales': 'cogs'})
daily['cogs'] = daily['sales'] * 0.6  # approximate COGS as 60% of sales

merged_forecaster = FinancialForecaster(cfg_sales, holiday_dates=set())
print('Training merged sales + COGS model...')
merged_results = merged_forecaster.run_forecasting(daily, daily)
print('Done.')

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(13, 8), sharex=True)

for ax, metric, color in zip(axes, ['sales', 'cogs'], ['steelblue', 'tomato']):
    if metric in merged_results:
        hist = merged_results[metric]['historical_df']
        fcst = merged_results[metric]['forecast_df']
        ax.plot(hist['date'], hist[metric], label='Historical', color=color, alpha=0.8)
        ax.plot(fcst['date'], fcst[metric], label='Forecast', color='darkorange', linestyle='--')
        ax.set_ylabel(f'{metric.upper()} (IDR)'); ax.legend(); ax.grid(True, linestyle=':')
        ax.set_title(f'Merged {metric.upper()} Forecast')

plt.tight_layout()
plt.show()

summary = merged_forecaster.generate_summary(merged_results)
summary

## 7. Purchase Recommendations

In [None]:
from fmcg_forecast.sales.recommendations import calculate_recommendations

if 'sales' in merged_results and 'cogs' in merged_results:
    merged_forecast = pd.merge(
        merged_results['sales']['forecast_df'],
        merged_results['cogs']['forecast_df'],
        on='date'
    )
    holidays = pd.DataFrame({'date': pd.to_datetime([])})
    rec = calculate_recommendations(merged_forecast, holidays, mode='merged')
    
    fig, ax = plt.subplots(figsize=(13, 5))
    ax.bar(rec['date'], rec['recommended_buy'], 
           color=['steelblue' if w else 'lightgray' for w in rec['is_working_day']],
           label='Rec Buy')
    ax.plot(rec['date'], rec['cogs'], 'r--', label='COGS Forecast', linewidth=2)
    ax.set_title('Purchase Recommendations (blue=working day, grey=non-working day redistributed)')
    ax.set_xlabel('Date'); ax.set_ylabel('IDR')
    ax.legend(); ax.grid(True, linestyle=':', axis='y')
    plt.tight_layout()
    plt.show()
    print(f'Total COGS: {rec["cogs"].sum():,.0f} | Total Rec Buy: {rec["recommended_buy"].sum():,.0f}')

## 8. Summary

In [None]:
import pandas as pd

summary_rows = [
    {'Pipeline': 'Demand', 'Model': 'BiLSTM + Additive Attention', 'Features': 'Lag + Calendar + Lebaran', 
     'Horizon': f'{cfg.forecast_horizon} days', 'Outputs': 'Quantile forecast per product-WH'},
    {'Pipeline': 'Sales Individual', 'Model': 'LSTM + MultiheadAttention', 'Features': 'Sin/cos seasonal', 
     'Horizon': f'{cfg_sales.forecast_horizon} business days', 'Outputs': 'Sales + COGS per region-WH'},
    {'Pipeline': 'Sales Merged', 'Model': 'Plain LSTM', 'Features': 'DOW one-hot', 
     'Horizon': f'{cfg_sales.forecast_horizon} business days', 'Outputs': 'Aggregated Sales + COGS'},
    {'Pipeline': 'Recommendations', 'Model': 'Rule-based', 'Features': 'COGS forecast + holidays', 
     'Horizon': 'Same as sales', 'Outputs': 'Rec buy per working day (60/40 redistribution)'},
]

pd.DataFrame(summary_rows).set_index('Pipeline')