# Reconciliation Algorithm Testing

This notebook tests the forecast reconciliation algorithm that brings ML and TS forecasts to the same granularity level.


In [None]:
import sys
import os

sys.path.insert(0, os.path.abspath('../src'))

from reconciliation import reconciliation
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


## 1. Generate Test Data

Generate sample TS and ML forecasts at different time granularities.


In [None]:
hist_end_dt = datetime(2024, 1, 31)
fc_horizon = 90

dates_ts = pd.date_range(start='2024-02-01', end='2024-04-30', freq='MS')
dates_ml = pd.date_range(start='2024-02-01', end='2024-04-30', freq='W-MON')

ts_data = []
ml_data = []

for date in dates_ts:
    for prod in ['P001', 'P002', 'P003']:
        for loc in ['L001', 'L002']:
            ts_data.append({
                'PRODUCT_LVL_ID': prod,
                'LOCATION_LVL_ID': loc,
                'CUSTOMER_LVL_ID': 'C001',
                'DISTR_CHANNEL_LVL_ID': 'CH1',
                'PERIOD_DT': date,
                'FORECAST_VALUE': np.random.uniform(50, 150)
            })

for date in dates_ml:
    for prod in ['P001', 'P002', 'P003']:
        for loc in ['L001', 'L002']:
            ml_data.append({
                'PRODUCT_LVL_ID': prod,
                'LOCATION_LVL_ID': loc,
                'CUSTOMER_LVL_ID': 'C001',
                'DISTR_CHANNEL_LVL_ID': 'CH1',
                'PERIOD_DT': date,
                'FORECAST_VALUE_total': np.random.uniform(60, 140),
                'DEMAND_TYPE': np.random.choice(['promo', 'regular']),
                'ASSORTMENT_TYPE': np.random.choice(['new', 'old'])
            })

ts_forecast = pd.DataFrame(ts_data)
ml_forecast = pd.DataFrame(ml_data)

segments_data = []
for prod in ['P001', 'P002', 'P003']:
    for loc in ['L001', 'L002']:
        segments_data.append({
            'product_lvl_id': prod,
            'location_lvl_id': loc,
            'customer_lvl_id': 'C001',
            'distr_channel_lvl_id': 'CH1',
            'SEGMENT_NAME': np.random.choice(['Regular', 'Short', 'Retired', 'Low Volume'])
        })

segments = pd.DataFrame(segments_data)


In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 10))

ts_forecast_sorted = ts_forecast.sort_values('PERIOD_DT')
ml_forecast_sorted = ml_forecast.sort_values('PERIOD_DT')

for product_id in ts_forecast_sorted['PRODUCT_LVL_ID'].unique()[:2]:
    ts_product = ts_forecast_sorted[ts_forecast_sorted['PRODUCT_LVL_ID'] == product_id]
    ml_product = ml_forecast_sorted[ml_forecast_sorted['PRODUCT_LVL_ID'] == product_id]
    
    axes[0].plot(ts_product['PERIOD_DT'], ts_product['FORECAST_VALUE'], 
                 marker='o', linewidth=2, markersize=8, label=f'TS Product {product_id}', alpha=0.7)
    axes[0].plot(ml_product['PERIOD_DT'], ml_product['FORECAST_VALUE_total'], 
                 marker='s', linewidth=1.5, markersize=6, label=f'ML Product {product_id}', alpha=0.7, linestyle='--')

axes[0].set_title('TS (Monthly) vs ML (Weekly) Forecasts - Before Reconciliation', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Forecast Value')
axes[0].legend()
axes[0].tick_params(axis='x', rotation=45)
axes[0].grid(True, alpha=0.3)

ts_counts = ts_forecast.groupby('PERIOD_DT').size()
ml_counts = ml_forecast.groupby('PERIOD_DT').size()

axes[1].bar(ts_counts.index, ts_counts.values, alpha=0.6, label='TS Forecasts (Monthly)', color='#4ECDC4', width=5)
axes[1].bar(ml_counts.index, ml_counts.values, alpha=0.6, label='ML Forecasts (Weekly)', color='#FF6B6B', width=1)
axes[1].set_title('Number of Forecast Records by Date - Different Granularities', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Number of Records')
axes[1].legend()
axes[1].tick_params(axis='x', rotation=45)
axes[1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()


## 2. Run Reconciliation

Align TS (monthly) and ML (weekly) forecasts to same granularity.


In [None]:
config = {
    'IB_HIST_END_DT': hist_end_dt,
    'IB_FC_HORIZ': fc_horizon,
    'ts_time_lvl': 'MONTH',
    'ml_time_lvl': 'WEEK.2'
}

result = reconciliation(
    ts_forecast=ts_forecast,
    ml_forecast=ml_forecast,
    ts_segments=segments,
    config=config
)

result[['PERIOD_DT', 'PERIOD_END_DT', 'product_lvl_id', 
        'TS_FORECAST_VALUE_REC', 'ML_FORECAST_VALUE', 
        'SEGMENT_NAME', 'DEMAND_TYPE']].head(10)


In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

result_sorted = result.sort_values('PERIOD_DT')

for product_id in result_sorted['product_lvl_id'].unique()[:2]:
    result_product = result_sorted[result_sorted['product_lvl_id'] == product_id]
    
    axes[0, 0].plot(result_product['PERIOD_DT'], result_product['TS_FORECAST_VALUE_REC'], 
                    marker='o', linewidth=2, markersize=6, label=f'TS Product {product_id}', alpha=0.7, color='#4ECDC4')
    axes[0, 0].plot(result_product['PERIOD_DT'], result_product['ML_FORECAST_VALUE'], 
                    marker='s', linewidth=2, markersize=6, label=f'ML Product {product_id}', alpha=0.7, color='#FF6B6B')

axes[0, 0].set_title('Reconciled Forecasts - TS vs ML', fontsize=12, fontweight='bold')
axes[0, 0].set_xlabel('Date')
axes[0, 0].set_ylabel('Forecast Value')
axes[0, 0].legend()
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].grid(True, alpha=0.3)

axes[0, 1].scatter(result['TS_FORECAST_VALUE_REC'], result['ML_FORECAST_VALUE'], 
                   alpha=0.6, s=50, color='steelblue')
axes[0, 1].plot([result['TS_FORECAST_VALUE_REC'].min(), result['TS_FORECAST_VALUE_REC'].max()], 
                [result['TS_FORECAST_VALUE_REC'].min(), result['TS_FORECAST_VALUE_REC'].max()], 
                'r--', lw=2, label='y=x')
axes[0, 1].set_title('TS vs ML Forecast Values (Reconciled)', fontsize=12, fontweight='bold')
axes[0, 1].set_xlabel('TS Forecast Value (Reconciled)')
axes[0, 1].set_ylabel('ML Forecast Value')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

result_counts = result.groupby('PERIOD_DT').size()
axes[1, 0].bar(result_counts.index, result_counts.values, alpha=0.7, color='mediumseagreen', width=2)
axes[1, 0].set_title('Number of Reconciled Records by Date', fontsize=12, fontweight='bold')
axes[1, 0].set_xlabel('Date')
axes[1, 0].set_ylabel('Number of Records')
axes[1, 0].tick_params(axis='x', rotation=45)
axes[1, 0].grid(True, alpha=0.3, axis='y')

result['TS_FORECAST_VALUE_REC'].hist(bins=20, ax=axes[1, 1], alpha=0.6, label='TS', color='#4ECDC4', edgecolor='black')
result['ML_FORECAST_VALUE'].hist(bins=20, ax=axes[1, 1], alpha=0.6, label='ML', color='#FF6B6B', edgecolor='black')
axes[1, 1].set_title('Distribution of Reconciled Forecast Values', fontsize=12, fontweight='bold')
axes[1, 1].set_xlabel('Forecast Value')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

segment_counts = result['SEGMENT_NAME'].value_counts()
segment_counts.plot(kind='bar', ax=axes[0], color='steelblue', edgecolor='black')
axes[0].set_title('Distribution of Segments in Reconciled Data', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Segment')
axes[0].set_ylabel('Count')
axes[0].tick_params(axis='x', rotation=45)
axes[0].grid(True, alpha=0.3, axis='y')

demand_counts = result['DEMAND_TYPE'].value_counts()
demand_counts.plot(kind='bar', ax=axes[1], color='coral', edgecolor='black')
axes[1].set_title('Distribution of Demand Types in Reconciled Data', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Demand Type')
axes[1].set_ylabel('Count')
axes[1].tick_params(axis='x', rotation=0)
axes[1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()


## 3. Summary

Reconciliation test completed successfully! The output is ready for hybridization.


In [None]:
result
