# Notebook 18: Forecast Validation - 2025 Actual vs ML vs Human

This notebook validates our forecasting models against actual 2025 data (Jan-Sep).

**Three Methods Compared:**
1. **Human/Traditional**: 2024 annual total √∑ 12 (current budgeting method)
2. **Machine/ML**: Model predictions from consolidated forecasts
3. **Actual**: Real 2025 data (ground truth)

**CRITICAL**: This is validation only - do NOT retrain models with 2025 data!

**User Emphasis**: "This will be the most crucial visualisation of the whole project"

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Add parent directory to path for utils
sys.path.append(str(Path.cwd().parent))

print('Notebook 18: Forecast Validation - 2025 Actual vs ML vs Human')
print('='*80)

Notebook 18: Forecast Validation - 2025 Actual vs ML vs Human


## Section 1: Load 2025 Actual Data (Jan-Sep)

In [2]:
# Load all 9 monthly files
data_dir = Path('../data/raw/2025')

monthly_files = [
    '2025 01 Jan QS Auftragsanalyse.xlsx',
    '2025 02 Feb QS Auftragsanalyse.xlsx',
    '2025 03 M√§r QS Auftragsanalyse.xlsx',
    '2025 04 Apr QS Auftragsanalyse.xlsx',
    '2025 05 Mai QS Auftragsanalyse.xlsx',
    '2025 06 Jun QS Auftragsanalyse.xlsx',
    '2025 07 Jul QS Auftragsanalyse.xlsx',
    '2025 08 Aug QS Auftragsanalyse.xlsx',
    '2025 09 Sep QS Auftragsanalyse.xlsx'
]

print('Loading 2025 actual data (Jan-Sep)...')
dfs_2025 = []

for file in monthly_files:
    filepath = data_dir / file
    print(f'  Loading {file}...')
    df = pd.read_excel(filepath)
    dfs_2025.append(df)
    print(f'    Shape: {df.shape}')

# Concatenate all months
df_2025_raw = pd.concat(dfs_2025, ignore_index=True)
print(f'\n‚úì Total 2025 data loaded: {df_2025_raw.shape}')
print(f'  Date range: {df_2025_raw["Datum.Tour"].min()} to {df_2025_raw["Datum.Tour"].max()}')

Loading 2025 actual data (Jan-Sep)...
  Loading 2025 01 Jan QS Auftragsanalyse.xlsx...
    Shape: (141699, 104)
  Loading 2025 02 Feb QS Auftragsanalyse.xlsx...
    Shape: (135739, 104)
  Loading 2025 03 M√§r QS Auftragsanalyse.xlsx...
    Shape: (149431, 104)
  Loading 2025 04 Apr QS Auftragsanalyse.xlsx...
    Shape: (143718, 104)
  Loading 2025 05 Mai QS Auftragsanalyse.xlsx...
    Shape: (143614, 104)
  Loading 2025 06 Jun QS Auftragsanalyse.xlsx...
    Shape: (136156, 104)
  Loading 2025 07 Jul QS Auftragsanalyse.xlsx...
    Shape: (150103, 104)
  Loading 2025 08 Aug QS Auftragsanalyse.xlsx...
    Shape: (136073, 104)
  Loading 2025 09 Sep QS Auftragsanalyse.xlsx...
    Shape: (146849, 104)

‚úì Total 2025 data loaded: (1283382, 104)
  Date range: 2025-01-01 00:00:00 to 2025-09-30 00:00:00


In [3]:
# Load tours and Sparten data
print('Loading supporting data...')

df_tours_2025 = pd.read_excel(data_dir / '2025 QS Tourenaufstellung bis Sept.xlsx')
print(f'  Tours data: {df_tours_2025.shape}')

df_sparten_2025 = pd.read_excel(data_dir / '2025 Sparten.xlsx')
print(f'  Sparten data: {df_sparten_2025.shape}')

Loading supporting data...
  Tours data: (134940, 25)
  Sparten data: (384, 7)


## Section 2: Process 2025 Data Through Full Pipeline

Apply same processing as training data (Notebooks 02-04 logic)

In [7]:
print('Processing 2025 data with simplified aggregation...')
print('='*80)

# Step 1: Data Cleaning
print('1. Data Cleaning...')
df_2025 = df_2025_raw.copy()

# Convert dates
df_2025['Datum.Tour'] = pd.to_datetime(df_2025['Datum.Tour'])

# Exclude Lager orders (warehouse operations) - inline filtering
if 'Lieferart 2.0' in df_2025.columns:
    df_2025 = df_2025[df_2025['Lieferart 2.0'] != 'Lager Auftrag']
    print(f'  After filtering Lager orders: {df_2025.shape[0]:,} records')
else:
    print(f'  No Lager filtering applied (column not found)')

# Step 2: Add temporal features (simplified)
print('2. Adding temporal features...')
df_2025['year'] = df_2025['Datum.Tour'].dt.year
df_2025['month'] = df_2025['Datum.Tour'].dt.month
df_2025['year_month'] = df_2025['Datum.Tour'].dt.to_period('M')

# Step 3: Identify carrier type (internal vs external)
print('3. Identifying carrier types...')
if 'Nummer.Spedition' in df_2025.columns:
    # Convert to numeric first, then classify
    # Internal carriers: 1-8889, External: 9000+
    df_2025['carrier_numeric'] = pd.to_numeric(df_2025['Nummer.Spedition'], errors='coerce')
    df_2025['carrier_type'] = df_2025['carrier_numeric'].apply(
        lambda x: 'internal' if pd.notna(x) and x < 9000 else 'external'
    )
    print(f'  Carrier types identified')
else:
    print(f'  Warning: Nummer.Spedition column not found')
    df_2025['carrier_type'] = 'unknown'

# Step 4: Map Betriebszentrale (dispatch centers)
print('4. Mapping Betriebszentralen...')
if 'Nummer.Auftraggeber' in df_2025.columns:
    # Load betriebszentrale mapping
    bz_mapping = pd.read_csv('../data/raw/TRAVECO_Betriebszentralen.csv')
    # Convert both columns to Int64 to ensure matching types (FIX for merge error)
    df_2025['Nummer.Auftraggeber'] = pd.to_numeric(df_2025['Nummer.Auftraggeber'], errors='coerce').astype('Int64')
    bz_mapping['Nummer.Auftraggeber'] = pd.to_numeric(bz_mapping['Nummer.Auftraggeber'], errors='coerce').astype('Int64')
    # Merge to add betriebszentrale_name
    df_2025 = df_2025.merge(
        bz_mapping[['Nummer.Auftraggeber', 'Name1']],
        on='Nummer.Auftraggeber',
        how='left'
    )
    df_2025.rename(columns={'Name1': 'betriebszentrale_name'}, inplace=True)
    # Fill missing values with "Unknown BZ"
    df_2025['betriebszentrale_name'].fillna('Unknown BZ', inplace=True)
    print(f'  ‚úì Mapped {df_2025["betriebszentrale_name"].nunique()} betriebszentralen')
    print(f'  BZ counts: {dict(sorted(df_2025["betriebszentrale_name"].value_counts().items()))}')
else:
    print('  ‚ö†Ô∏è Warning: Nummer.Auftraggeber column not found')
    df_2025['betriebszentrale_name'] = 'Unknown BZ'

print(f'‚úì Data processed: {df_2025.shape}')

Processing 2025 data with simplified aggregation...
1. Data Cleaning...
  After filtering Lager orders: 1,279,181 records
2. Adding temporal features...
3. Identifying carrier types...
  Carrier types identified
4. Mapping Betriebszentralen...
  ‚úì Mapped 12 betriebszentralen
  BZ counts: {'B&T Landquart': 4251, 'B&T Puidoux': 21759, 'B&T Winterthur': 78445, 'BZ Herzogenbuchsee': 62805, 'BZ Intermodal / Rail': 3566, 'BZ Landquart': 155391, 'BZ Oberbipp': 341879, 'BZ Puidoux': 16933, 'BZ Sierre': 55769, 'BZ Sursee': 274739, 'BZ Winterthur': 260136, 'Unknown BZ': 3508}
‚úì Data processed: (1279181, 110)


In [None]:
# Step 3: Monthly Aggregation (Orders & Revenue Focus)
print('\n3. Monthly Aggregation (Orders & Revenue)...')

# Group by year_month for aggregation
df_2025['year_month'] = df_2025['Datum.Tour'].dt.to_period('M')

# Aggregate order-level metrics (focus on key business metrics)
df_2025_monthly = df_2025.groupby('year_month').agg({
    'NummerKomplett.Auftrag': 'count',  # total_orders
    'Distanz_BE.Auftrag': 'sum',  # total_km_billed (order-based billing KM)
    'carrier_type': lambda x: (x == 'external').sum(),  # external_drivers count
    '‚àë Einnahmen': 'sum'  # revenue_total
}).reset_index()

# Rename columns
df_2025_monthly.columns = ['year_month', 'total_orders', 'total_km_billed', 'external_drivers', 'revenue_total']

# Add date column
df_2025_monthly['date'] = df_2025_monthly['year_month'].dt.to_timestamp()

# Count total drivers (unique carriers per month)
drivers_per_month = df_2025.groupby('year_month')['Nummer.Spedition'].nunique().reset_index()
drivers_per_month.columns = ['year_month', 'total_drivers']
df_2025_monthly = df_2025_monthly.merge(drivers_per_month, on='year_month', how='left')

print(f'  ‚úì Aggregated to monthly level: {df_2025_monthly.shape}')
print(f'  ‚úì Metrics: {list(df_2025_monthly.columns)}')
print(f'\n  Monthly aggregated data (first 3 months):')
display_cols = ['date', 'total_orders', 'total_drivers', 'revenue_total']
display(df_2025_monthly[display_cols].head(3))

print('\n  ‚ö†Ô∏è  Note: Tour-level metrics (total_km_actual, vehicle costs) not available')
print('      2025 tour data structure differs from 2024 (missing Nummer.Auftraggeber, IST KM PraCar)')
print('      Validation focuses on key business metrics: orders & revenue')

# Step 4: Branch-level aggregation (keep existing)
print('\n4. Branch-Level Aggregation (by Betriebszentrale)...')

# Aggregate by year_month AND betriebszentrale
df_2025_monthly_bz = df_2025.groupby(['year_month', 'betriebszentrale_name']).agg({
    'NummerKomplett.Auftrag': 'count',  # total_orders
    'Distanz_BE.Auftrag': 'sum',  # total_km
    'carrier_type': lambda x: (x == 'external').sum(),  # external_drivers count
    '‚àë Einnahmen': 'sum'  # revenue_total
}).reset_index()

# Rename columns
df_2025_monthly_bz.columns = ['year_month', 'betriebszentrale', 'total_orders', 'total_km', 'external_drivers', 'revenue_total']

# Add date column
df_2025_monthly_bz['date'] = df_2025_monthly_bz['year_month'].dt.to_timestamp()

# Count total drivers per branch per month
drivers_per_branch_month = df_2025.groupby(['year_month', 'betriebszentrale_name'])['Nummer.Spedition'].nunique().reset_index()
drivers_per_branch_month.columns = ['year_month', 'betriebszentrale', 'total_drivers']
df_2025_monthly_bz = df_2025_monthly_bz.merge(drivers_per_branch_month, on=['year_month', 'betriebszentrale'], how='left')

# Save branch-level 2025 actual data for Notebook 19
df_2025_monthly_bz.to_csv('../data/processed/2025_actual_by_branch.csv', index=False)

print(f'  ‚úì Aggregated by branch: {df_2025_monthly_bz.shape}')
print(f'  ‚úì Betriebszentralen: {df_2025_monthly_bz["betriebszentrale"].nunique()} branches')
print(f'  ‚úì Saved: data/processed/2025_actual_by_branch.csv')
print(f'\n  Branch-level sample (first 5 rows):')
display(df_2025_monthly_bz[['date', 'betriebszentrale', 'total_orders', 'revenue_total']].head(5))

## Section 3: Plausibility Check - Compare 2025 vs 2024 Same Months

Ensure 2025 data is comparable to 2024 (within ¬±20%)

In [None]:
# Load 2024 processed data for comparison
print('Loading 2024 data for plausibility check...')
df_2024_monthly = pd.read_csv('../data/processed/monthly_aggregated_full_company.csv')
df_2024_monthly['date'] = pd.to_datetime(df_2024_monthly['date'])
df_2024_monthly['year'] = df_2024_monthly['date'].dt.year
df_2024_monthly['month'] = df_2024_monthly['date'].dt.month

# Filter to 2024 data only
df_2024_comparison = df_2024_monthly[df_2024_monthly['year'] == 2024].copy()
print(f'  2024 data: {len(df_2024_comparison)} months')

# Prepare 2025 data for comparison
df_2025_comparison = df_2025_monthly.copy()
df_2025_comparison['year'] = df_2025_comparison['date'].dt.year
df_2025_comparison['month'] = df_2025_comparison['date'].dt.month
print(f'  2025 data: {len(df_2025_comparison)} months')

In [None]:
# Compare 2025 vs 2024 same monthsprint('\nPlausibility Check: 2025 vs 2024 Same Months')print('='*80)# Metrics to comparemetrics = ['total_orders', 'revenue_total']# Merge on month for comparisoncomparison = df_2025_comparison[['month', 'total_orders', 'revenue_total']].merge(    df_2024_comparison[['month', 'total_orders', 'revenue_total']],    on='month',    suffixes=('_2025', '_2024'),    how='inner')# Calculate percentage changesfor metric in metrics:    comparison[f'{metric}_change_%'] = ((comparison[f'{metric}_2025'] - comparison[f'{metric}_2024']) / comparison[f'{metric}_2024'] * 100)# Add validation statusdef validate_change(change):    if -10 <= change <= 10:        return '‚úì Green (Normal)'    elif -20 <= change < -10 or 10 < change <= 20:        return '‚ö†Ô∏è  Yellow (Significant but plausible)'    else:        return '‚ùå Red (Investigate data quality)'# Display comparisonmonth_names = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sep'}comparison['month_name'] = comparison['month'].map(month_names)print('\nTotal Orders Comparison:')for _, row in comparison.iterrows():    change = row['total_orders_change_%']    status = validate_change(change)    print(f"  {row['month_name']}: {row['total_orders_2025']:,.0f} vs {row['total_orders_2024']:,.0f} ({change:+.1f}%) {status}")print('\nRevenue Total Comparison:')for _, row in comparison.iterrows():    change = row['revenue_total_change_%']    status = validate_change(change)    print(f"  {row['month_name']}: CHF {row['revenue_total_2025']:,.0f} vs CHF {row['revenue_total_2024']:,.0f} ({change:+.1f}%) {status}")# Summaryprint('\nPlausibility Summary:')for metric in metrics:    col = f'{metric}_change_%'    avg_change = comparison[col].mean()    max_change = comparison[col].max()    min_change = comparison[col].min()    print(f'  {metric}:')    print(f'    Average change: {avg_change:+.1f}%')    print(f'    Range: {min_change:+.1f}% to {max_change:+.1f}%')    # Check if any month is in red zone    red_months = comparison[comparison[col].abs() > 20]    if len(red_months) > 0:        print(f'    ‚ö†Ô∏è  {len(red_months)} month(s) in RED zone - investigate!')    else:        print(f'    ‚úì All months within acceptable range')print('='*80)

## Section 4: Calculate Human Baseline Forecast

Traditional budgeting method: 2024 annual total √∑ 12 months

In [None]:
print('Calculating Human/Traditional Forecast (2024 total √∑ 12)...')
print('='*80)

# Focus on key business metrics
target_metrics = ['total_orders', 'revenue_total']

# Calculate 2024 annual totals
print(f'\n2024 Annual Totals:')
human_forecast_values = {}

for metric in target_metrics:
    if metric in df_2024_comparison.columns:
        total_2024 = df_2024_comparison[metric].sum()
        monthly_avg = total_2024 / 12
        human_forecast_values[metric] = monthly_avg

        if 'revenue' in metric:
            print(f'  {metric:25s}: CHF {total_2024:,.2f} ‚Üí CHF {monthly_avg:,.2f}/month')
        else:
            print(f'  {metric:25s}: {total_2024:,.0f} ‚Üí {monthly_avg:,.0f}/month')
    else:
        print(f'  ‚ö†Ô∏è  {metric}: Not available in 2024 data')
        human_forecast_values[metric] = 0

# Create human forecast dataframe for Jan-Sep 2025
dates_2025 = pd.date_range('2025-01-01', '2025-09-01', freq='MS')
df_human = pd.DataFrame({'date': dates_2025})

for metric in target_metrics:
    df_human[metric] = human_forecast_values[metric]

df_human['method'] = 'Human (2024√∑12)'

print(f'\n‚úì Human forecast created for {len(df_human)} months √ó {len(target_metrics)} metrics')
print(f'  Metrics: {target_metrics}')

## Section 5: Load Machine/ML Forecasts

In [None]:
print('Loading Machine/ML Forecasts...')
print('='*80)

# Load consolidated forecasts (best model per metric)
df_ml_full = pd.read_csv('../data/processed/consolidated_forecast_2025.csv')
df_ml_full['date'] = pd.to_datetime(df_ml_full['date'])

# Filter to Jan-Sep 2025 for comparison
df_ml = df_ml_full[(df_ml_full['date'] >= '2025-01-01') & (df_ml_full['date'] <= '2025-09-01')].copy()
df_ml['method'] = 'Machine (ML Models)'

# Focus on key metrics
focus_metrics = ['total_orders', 'revenue_total']
available_metrics = [m for m in focus_metrics if m in df_ml.columns]

print(f'\n‚úì ML forecasts loaded: {len(df_ml)} months')
print(f'  Available metrics: {available_metrics}')

# Display first few months
print(f'\nML Forecast Sample (first 3 months):')
display_cols = ['date'] + available_metrics
display(df_ml[display_cols].head(3))

## Section 6: Calculate Error Metrics

Compare both methods against actual 2025 data

In [None]:
# Prepare actual data
df_actual = df_2025_monthly[['date', 'total_orders', 'revenue_total']].copy()
df_actual['method'] = 'Actual'

# Merge all three datasets
df_comparison = pd.concat([
    df_actual.assign(source='Actual'),
    df_human[['date', 'total_orders', 'revenue_total']].assign(source='Human'),
    df_ml[['date', 'total_orders', 'revenue_total']].assign(source='Machine')
], ignore_index=True)

# Pivot for easier comparison
metrics = ['total_orders', 'revenue_total']
results = {}

for metric in metrics:
    pivot = df_comparison.pivot(index='date', columns='source', values=metric)
    # Calculate errors
    pivot['human_error'] = pivot['Human'] - pivot['Actual']
    pivot['machine_error'] = pivot['Machine'] - pivot['Actual']
    pivot['human_error_%'] = (pivot['human_error'] / pivot['Actual'] * 100)
    pivot['machine_error_%'] = (pivot['machine_error'] / pivot['Actual'] * 100)
    pivot['human_abs_error'] = pivot['human_error'].abs()
    pivot['machine_abs_error'] = pivot['machine_error'].abs()
    results[metric] = pivot

print('Error Metrics Calculated')
print('='*80)

In [None]:
# Calculate summary statistics
print('\nSummary Statistics: TOTAL ORDERS')
print('-'*80)

orders_results = results['total_orders']

# MAPE (Mean Absolute Percentage Error)
human_mape = orders_results['human_error_%'].abs().mean()
machine_mape = orders_results['machine_error_%'].abs().mean()

# MAE (Mean Absolute Error)
human_mae = orders_results['human_abs_error'].mean()
machine_mae = orders_results['machine_abs_error'].mean()

# Cumulative Error
human_cumulative = orders_results['human_error'].sum()
machine_cumulative = orders_results['machine_error'].sum()

print(f'\nHuman Method (2024√∑12):')
print(f'  MAPE: {human_mape:.2f}%')
print(f'  MAE: {human_mae:,.0f} orders')
print(f'  Cumulative Error (Jan-Sep): {human_cumulative:,.0f} orders')

print(f'\nMachine Method (ML Models):')
print(f'  MAPE: {machine_mape:.2f}%')
print(f'  MAE: {machine_mae:,.0f} orders')
print(f'  Cumulative Error (Jan-Sep): {machine_cumulative:,.0f} orders')

improvement_mape = ((human_mape - machine_mape) / human_mape * 100)
print(f'\n‚úì ML Improvement over Human: {improvement_mape:.1f}% reduction in MAPE')

print('\n' + '='*80)
print('Summary Statistics: REVENUE TOTAL')
print('-'*80)

revenue_results = results['revenue_total']

# MAPE
human_mape_rev = revenue_results['human_error_%'].abs().mean()
machine_mape_rev = revenue_results['machine_error_%'].abs().mean()

# MAE
human_mae_rev = revenue_results['human_abs_error'].mean()
machine_mae_rev = revenue_results['machine_abs_error'].mean()

# Cumulative Error
human_cumulative_rev = revenue_results['human_error'].sum()
machine_cumulative_rev = revenue_results['machine_error'].sum()

print(f'\nHuman Method (2024√∑12):')
print(f'  MAPE: {human_mape_rev:.2f}%')
print(f'  MAE: CHF {human_mae_rev:,.0f}')
print(f'  Cumulative Error (Jan-Sep): CHF {human_cumulative_rev:,.0f}')

print(f'\nMachine Method (ML Models):')
print(f'  MAPE: {machine_mape_rev:.2f}%')
print(f'  MAE: CHF {machine_mae_rev:,.0f}')
print(f'  Cumulative Error (Jan-Sep): CHF {machine_cumulative_rev:,.0f}')

improvement_mape_rev = ((human_mape_rev - machine_mape_rev) / human_mape_rev * 100)
print(f'\n‚úì ML Improvement over Human: {improvement_mape_rev:.1f}% reduction in MAPE')

print('='*80)

## Section 7: Create Visualizations

**The Most Crucial Visualization**: Human Error vs Machine Error

In [None]:
# Visualization 1: Monthly Comparison - Total Orders
fig1 = go.Figure()

# Actual data
fig1.add_trace(go.Scatter(
    x=orders_results.index,
    y=orders_results['Actual'],
    mode='lines+markers',
    name='Actual 2025',
    line=dict(color='black', width=3),
    marker=dict(size=10)
))

# Human forecast
fig1.add_trace(go.Scatter(
    x=orders_results.index,
    y=orders_results['Human'],
    mode='lines+markers',
    name='Human (2024√∑12)',
    line=dict(color='#FF6B6B', width=2, dash='dash'),
    marker=dict(size=8)
))

# Machine forecast
fig1.add_trace(go.Scatter(
    x=orders_results.index,
    y=orders_results['Machine'],
    mode='lines+markers',
    name='Machine (ML)',
    line=dict(color='#4ECDC4', width=2, dash='dot'),
    marker=dict(size=8)
))

fig1.update_layout(
    title='Total Orders: Actual vs Human vs Machine Forecasts (Jan-Sep 2025)',
    xaxis_title='Month',
    yaxis_title='Total Orders',
    hovermode='x unified',
    height=500,
    template='plotly_white'
)

fig1.show()

# Save
fig1.write_html('../results/forecast_validation_orders_comparison.html')
print('‚úì Saved: results/forecast_validation_orders_comparison.html')

In [None]:
# Visualization 2: Monthly Comparison - Revenue Total
fig2 = go.Figure()

# Actual data
fig2.add_trace(go.Scatter(
    x=revenue_results.index,
    y=revenue_results['Actual'],
    mode='lines+markers',
    name='Actual 2025',
    line=dict(color='black', width=3),
    marker=dict(size=10)
))

# Human forecast
fig2.add_trace(go.Scatter(
    x=revenue_results.index,
    y=revenue_results['Human'],
    mode='lines+markers',
    name='Human (2024√∑12)',
    line=dict(color='#FF6B6B', width=2, dash='dash'),
    marker=dict(size=8)
))

# Machine forecast
fig2.add_trace(go.Scatter(
    x=revenue_results.index,
    y=revenue_results['Machine'],
    mode='lines+markers',
    name='Machine (ML)',
    line=dict(color='#4ECDC4', width=2, dash='dot'),
    marker=dict(size=8)
))

fig2.update_layout(
    title='Revenue Total: Actual vs Human vs Machine Forecasts (Jan-Sep 2025)',
    xaxis_title='Month',
    yaxis_title='Revenue (CHF)',
    hovermode='x unified',
    height=500,
    template='plotly_white'
)

fig2.show()

# Save
fig2.write_html('../results/forecast_validation_revenue_comparison.html')
print('‚úì Saved: results/forecast_validation_revenue_comparison.html')

In [None]:
# Visualization 3: THE CRUCIAL ONE - Error Comparison Side-by-Side
fig3 = make_subplots(
    rows=2, cols=2,
    subplot_titles=[
        'Total Orders: Human Error (%)',
        'Total Orders: Machine Error (%)',
        'Revenue Total: Human Error (%)',
        'Revenue Total: Machine Error (%)'
    ],
    vertical_spacing=0.15,
    horizontal_spacing=0.12
)

# Orders - Human Error
fig3.add_trace(go.Bar(
    x=orders_results.index,
    y=orders_results['human_error_%'],
    marker_color='#FF6B6B',
    name='Human Error',
    showlegend=False
), row=1, col=1)

# Orders - Machine Error
fig3.add_trace(go.Bar(
    x=orders_results.index,
    y=orders_results['machine_error_%'],
    marker_color='#4ECDC4',
    name='Machine Error',
    showlegend=False
), row=1, col=2)

# Revenue - Human Error
fig3.add_trace(go.Bar(
    x=revenue_results.index,
    y=revenue_results['human_error_%'],
    marker_color='#FF6B6B',
    name='Human Error',
    showlegend=False
), row=2, col=1)

# Revenue - Machine Error
fig3.add_trace(go.Bar(
    x=revenue_results.index,
    y=revenue_results['machine_error_%'],
    marker_color='#4ECDC4',
    name='Machine Error',
    showlegend=False
), row=2, col=2)

# Add zero line to all subplots
for row in [1, 2]:
    for col in [1, 2]:
        fig3.add_hline(y=0, line_dash='dash', line_color='gray', row=row, col=col)

fig3.update_layout(
    title_text='<b>Human vs Machine Forecast Error Comparison (Jan-Sep 2025)</b>',
    title_font_size=18,
    height=800,
    template='plotly_white'
)

# Update y-axes labels
fig3.update_yaxes(title_text='Error (%)', row=1, col=1)
fig3.update_yaxes(title_text='Error (%)', row=1, col=2)
fig3.update_yaxes(title_text='Error (%)', row=2, col=1)
fig3.update_yaxes(title_text='Error (%)', row=2, col=2)

fig3.show()

# Save
fig3.write_html('../results/forecast_validation_error_comparison.html')
print('‚úì Saved: results/forecast_validation_error_comparison.html')
print('\nüéØ THIS IS THE MOST CRUCIAL VISUALIZATION OF THE WHOLE PROJECT!')

In [None]:
# Visualization 4: Cumulative Error Over Time
fig4 = make_subplots(
    rows=1, cols=2,
    subplot_titles=['Total Orders: Cumulative Error', 'Revenue Total: Cumulative Error']
)

# Orders cumulative error
fig4.add_trace(go.Scatter(
    x=orders_results.index,
    y=orders_results['human_error'].cumsum(),
    mode='lines+markers',
    name='Human',
    line=dict(color='#FF6B6B', width=2)
), row=1, col=1)

fig4.add_trace(go.Scatter(
    x=orders_results.index,
    y=orders_results['machine_error'].cumsum(),
    mode='lines+markers',
    name='Machine',
    line=dict(color='#4ECDC4', width=2)
), row=1, col=1)

# Revenue cumulative error
fig4.add_trace(go.Scatter(
    x=revenue_results.index,
    y=revenue_results['human_error'].cumsum(),
    mode='lines+markers',
    name='Human',
    line=dict(color='#FF6B6B', width=2),
    showlegend=False
), row=1, col=2)

fig4.add_trace(go.Scatter(
    x=revenue_results.index,
    y=revenue_results['machine_error'].cumsum(),
    mode='lines+markers',
    name='Machine',
    line=dict(color='#4ECDC4', width=2),
    showlegend=False
), row=1, col=2)

# Add zero lines
fig4.add_hline(y=0, line_dash='dash', line_color='gray', row=1, col=1)
fig4.add_hline(y=0, line_dash='dash', line_color='gray', row=1, col=2)

fig4.update_layout(
    title='Cumulative Forecast Error (Jan-Sep 2025)',
    height=500,
    template='plotly_white'
)

fig4.update_yaxes(title_text='Cumulative Error (Orders)', row=1, col=1)
fig4.update_yaxes(title_text='Cumulative Error (CHF)', row=1, col=2)

fig4.show()

# Save
fig4.write_html('../results/forecast_validation_cumulative_error.html')
print('‚úì Saved: results/forecast_validation_cumulative_error.html')

In [None]:
# Visualization 5: Error Distribution Box Plot
fig5 = go.Figure()

# Orders
fig5.add_trace(go.Box(
    y=orders_results['human_error_%'],
    name='Human (Orders)',
    marker_color='#FF6B6B',
    boxmean='sd'
))

fig5.add_trace(go.Box(
    y=orders_results['machine_error_%'],
    name='Machine (Orders)',
    marker_color='#4ECDC4',
    boxmean='sd'
))

# Revenue
fig5.add_trace(go.Box(
    y=revenue_results['human_error_%'],
    name='Human (Revenue)',
    marker_color='#FFB6C1',
    boxmean='sd'
))

fig5.add_trace(go.Box(
    y=revenue_results['machine_error_%'],
    name='Machine (Revenue)',
    marker_color='#95E1D3',
    boxmean='sd'
))

fig5.add_hline(y=0, line_dash='dash', line_color='gray')

fig5.update_layout(
    title='Forecast Error Distribution (Jan-Sep 2025)',
    yaxis_title='Error (%)',
    height=500,
    template='plotly_white'
)

fig5.show()

# Save
fig5.write_html('../results/forecast_validation_error_distribution.html')
print('‚úì Saved: results/forecast_validation_error_distribution.html')

## Section 8: Executive Summary Table

In [None]:
# Create executive summary table
summary_data = {
    'Metric': ['Total Orders', 'Total Orders', 'Revenue Total', 'Revenue Total'],
    'Method': ['Human (2024√∑12)', 'Machine (ML)', 'Human (2024√∑12)', 'Machine (ML)'],
    'MAPE (%)': [human_mape, machine_mape, human_mape_rev, machine_mape_rev],
    'MAE': [human_mae, machine_mae, human_mae_rev, machine_mae_rev],
    'Cumulative Error': [human_cumulative, machine_cumulative, human_cumulative_rev, machine_cumulative_rev]
}

df_summary = pd.DataFrame(summary_data)

print('\nExecutive Summary: Forecast Validation Results (Jan-Sep 2025)')
print('='*80)
display(df_summary)

# Save to CSV
df_summary.to_csv('../results/forecast_validation_summary.csv', index=False)
print('\n‚úì Saved: results/forecast_validation_summary.csv')

## Section 9: Insights and Recommendations

In [None]:
print('\n' + '='*80)
print('FORECAST VALIDATION INSIGHTS')
print('='*80)

print('\n1. ACCURACY COMPARISON:')
print(f'   Total Orders:')
print(f'     - Human MAPE: {human_mape:.2f}%')
print(f'     - Machine MAPE: {machine_mape:.2f}%')
print(f'     - Improvement: {improvement_mape:.1f}%')
print(f'\n   Revenue Total:')
print(f'     - Human MAPE: {human_mape_rev:.2f}%')
print(f'     - Machine MAPE: {machine_mape_rev:.2f}%')
print(f'     - Improvement: {improvement_mape_rev:.1f}%')

print('\n2. CUMULATIVE IMPACT (Jan-Sep 2025):')
print(f'   Total Orders:')
print(f'     - Human cumulative error: {human_cumulative:,.0f} orders')
print(f'     - Machine cumulative error: {machine_cumulative:,.0f} orders')
print(f'     - Difference: {abs(human_cumulative - machine_cumulative):,.0f} orders')
print(f'\n   Revenue Total:')
print(f'     - Human cumulative error: CHF {human_cumulative_rev:,.0f}')
print(f'     - Machine cumulative error: CHF {machine_cumulative_rev:,.0f}')
print(f'     - Difference: CHF {abs(human_cumulative_rev - machine_cumulative_rev):,.0f}')

print('\n3. KEY FINDINGS:')
if machine_mape < human_mape:
    print(f'   ‚úì ML models outperform traditional method by {improvement_mape:.1f}% for orders')
else:
    print(f'   ‚ö†Ô∏è  Traditional method performs better for orders by {-improvement_mape:.1f}%')

if machine_mape_rev < human_mape_rev:
    print(f'   ‚úì ML models outperform traditional method by {improvement_mape_rev:.1f}% for revenue')
else:
    print(f'   ‚ö†Ô∏è  Traditional method performs better for revenue by {-improvement_mape_rev:.1f}%')

print('\n4. RECOMMENDATIONS:')
if machine_mape < human_mape and machine_mape_rev < human_mape_rev:
    print('   ‚úì ADOPT ML forecasting for both orders and revenue planning')
    print('   ‚úì Replace 2024√∑12 method with ML-based forecasts')
    print('   ‚úì Monitor monthly actuals vs forecasts to track ongoing accuracy')
elif machine_mape < human_mape or machine_mape_rev < human_mape_rev:
    print('   ‚ö†Ô∏è  Consider hybrid approach: ML for some metrics, traditional for others')
    print('   ‚ö†Ô∏è  Continue validation with additional months of data')
else:
    print('   ‚ö†Ô∏è  Further model refinement needed before deployment')
    print('   ‚ö†Ô∏è  Investigate why ML models underperform')

print('\n' + '='*80)
print('VALIDATION COMPLETE')
print('='*80)

In [None]:
# Save detailed monthly comparison
monthly_comparison = pd.DataFrame({
    'date': orders_results.index,
    'orders_actual': orders_results['Actual'],
    'orders_human': orders_results['Human'],
    'orders_machine': orders_results['Machine'],
    'orders_human_error_%': orders_results['human_error_%'],
    'orders_machine_error_%': orders_results['machine_error_%'],
    'revenue_actual': revenue_results['Actual'],
    'revenue_human': revenue_results['Human'],
    'revenue_machine': revenue_results['Machine'],
    'revenue_human_error_%': revenue_results['human_error_%'],
    'revenue_machine_error_%': revenue_results['machine_error_%']
})

monthly_comparison.to_csv('../results/forecast_validation_monthly_detail.csv', index=False)
print('\n‚úì Saved detailed monthly comparison: results/forecast_validation_monthly_detail.csv')

print('\n' + '='*80)
print('ALL OUTPUTS SAVED TO: /Users/kk/dev/customer_traveco/results/')
print('='*80)
print('Files created:')
print('  1. forecast_validation_orders_comparison.html (interactive)')
print('  2. forecast_validation_revenue_comparison.html (interactive)')
print('  3. forecast_validation_error_comparison.html (MOST CRUCIAL)')
print('  4. forecast_validation_cumulative_error.html (interactive)')
print('  5. forecast_validation_error_distribution.html (interactive)')
print('  6. forecast_validation_summary.csv (executive summary)')
print('  7. forecast_validation_monthly_detail.csv (detailed data)')
print('='*80)

## Section 10: Comprehensive Model Comparison - ALL Forecasting Approaches

**Goal**: Compare ALL 5 forecasting approaches to understand which truly performs best:
1. **Seasonal Naive** (current consolidated forecast)
2. **XGBoost** (original "best" model from training)
3. **CatBoost** (alternative ML model)
4. **Ensemble Best** (model selection from 14b)
5. **Human Baseline** (2024√∑12)

In [None]:
print('Loading ALL Forecasting Approaches for Comprehensive Comparison...')
print('='*80)

# Dictionary to store all forecasts
all_forecasts = {}

# 1. Seasonal Naive (from consolidated_forecast_2025.csv)
print('\n1. Loading Seasonal Naive (from consolidated forecast)...')
df_seasonal_naive = pd.read_csv('../data/processed/consolidated_forecast_2025.csv')
df_seasonal_naive['date'] = pd.to_datetime(df_seasonal_naive['date'])
df_seasonal_naive = df_seasonal_naive[(df_seasonal_naive['date'] >= '2025-01-01') & 
                                      (df_seasonal_naive['date'] <= '2025-09-01')].copy()
all_forecasts['Seasonal_Naive'] = df_seasonal_naive[['date', 'total_orders', 'revenue_total']].copy()
print(f'   ‚úì Seasonal Naive loaded: {len(all_forecasts["Seasonal_Naive"])} months')
print(f'     Jan Orders: {df_seasonal_naive.iloc[0]["total_orders"]:,.0f}')
print(f'     Jan Revenue: CHF {df_seasonal_naive.iloc[0]["revenue_total"]:,.0f}')

# 2. XGBoost
print('\n2. Loading XGBoost forecasts...')
try:
    df_xgboost = pd.read_csv('../data/processed/xgboost_forecast_2025.csv')
    df_xgboost['date'] = pd.to_datetime(df_xgboost['date'])
    df_xgboost = df_xgboost[(df_xgboost['date'] >= '2025-01-01') & 
                            (df_xgboost['date'] <= '2025-09-01')].copy()
    all_forecasts['XGBoost'] = df_xgboost[['date', 'total_orders', 'revenue_total']].copy()
    print(f'   ‚úì XGBoost loaded: {len(all_forecasts["XGBoost"])} months')
    print(f'     Jan Orders: {df_xgboost.iloc[0]["total_orders"]:,.0f}')
    print(f'     Jan Revenue: CHF {df_xgboost.iloc[0]["revenue_total"]:,.0f}')
except FileNotFoundError:
    print('   ‚ö†Ô∏è  XGBoost forecast file not found')
    all_forecasts['XGBoost'] = None

# 3. CatBoost
print('\n3. Loading CatBoost forecasts...')
try:
    df_catboost = pd.read_csv('../data/processed/catboost_forecast_2025.csv')
    df_catboost['date'] = pd.to_datetime(df_catboost['date'])
    df_catboost = df_catboost[(df_catboost['date'] >= '2025-01-01') & 
                              (df_catboost['date'] <= '2025-09-01')].copy()
    all_forecasts['CatBoost'] = df_catboost[['date', 'total_orders', 'revenue_total']].copy()
    print(f'   ‚úì CatBoost loaded: {len(all_forecasts["CatBoost"])} months')
    print(f'     Jan Orders: {df_catboost.iloc[0]["total_orders"]:,.0f}')
    print(f'     Jan Revenue: CHF {df_catboost.iloc[0]["revenue_total"]:,.0f}')
except FileNotFoundError:
    print('   ‚ö†Ô∏è  CatBoost forecast file not found')
    all_forecasts['CatBoost'] = None

# 4. Ensemble Best Model
print('\n4. Loading Ensemble Best Model forecasts...')
try:
    df_ensemble = pd.read_csv('../data/processed/ensemble_best_model_2025.csv')
    df_ensemble['date'] = pd.to_datetime(df_ensemble['date'])
    df_ensemble = df_ensemble[(df_ensemble['date'] >= '2025-01-01') & 
                             (df_ensemble['date'] <= '2025-09-01')].copy()
    all_forecasts['Ensemble_Best'] = df_ensemble[['date', 'total_orders', 'revenue_total']].copy()
    print(f'   ‚úì Ensemble Best loaded: {len(all_forecasts["Ensemble_Best"])} months')
    print(f'     Jan Orders: {df_ensemble.iloc[0]["total_orders"]:,.0f}')
    print(f'     Jan Revenue: CHF {df_ensemble.iloc[0]["revenue_total"]:,.0f}')
except FileNotFoundError:
    print('   ‚ö†Ô∏è  Ensemble forecast file not found')
    all_forecasts['Ensemble_Best'] = None

# 5. Human Baseline (already loaded)
all_forecasts['Human'] = df_human[['date', 'total_orders', 'revenue_total']].copy()
print('\n5. Human Baseline (2024√∑12):')
print(f'   ‚úì Human forecast loaded: {len(all_forecasts["Human"])} months')
print(f'     Monthly Orders: {df_human.iloc[0]["total_orders"]:,.0f}')
print(f'     Monthly Revenue: CHF {df_human.iloc[0]["revenue_total"]:,.0f}')

# Summary
print(f'\n{"="*80}')
print('SUMMARY:')
available_approaches = [k for k, v in all_forecasts.items() if v is not None]
print(f'  Total approaches loaded: {len(available_approaches)}')
print(f'  Approaches: {", ".join(available_approaches)}')
print('='*80)

In [None]:
print('Calculating MAPE and MAE for ALL Approaches...')
print('='*80)

# Prepare actual data for comparison
df_actual_comp = df_2025_monthly[['date', 'total_orders', 'revenue_total']].copy()

# Dictionary to store results
all_results = {}
metrics_to_compare = ['total_orders', 'revenue_total']

for approach_name, forecast_df in all_forecasts.items():
    if forecast_df is None:
        print(f'\n‚ö†Ô∏è  Skipping {approach_name} (not available)')
        continue
    
    print(f'\n{approach_name}:')
    all_results[approach_name] = {}
    
    for metric in metrics_to_compare:
        # Merge actual with forecast
        merged = df_actual_comp[['date', metric]].merge(
            forecast_df[['date', metric]],
            on='date',
            suffixes=('_actual', '_forecast'),
            how='inner'
        )
        
        # Calculate errors
        merged['error'] = merged[f'{metric}_forecast'] - merged[f'{metric}_actual']
        merged['error_%'] = (merged['error'] / merged[f'{metric}_actual'] * 100)
        merged['abs_error'] = merged['error'].abs()
        merged['abs_error_%'] = merged['error_%'].abs()
        
        # Calculate metrics
        mape = merged['abs_error_%'].mean()
        mae = merged['abs_error'].mean()
        cumulative_error = merged['error'].sum()
        
        # Store results
        all_results[approach_name][metric] = {
            'mape': mape,
            'mae': mae,
            'cumulative_error': cumulative_error,
            'monthly_errors': merged['error_%'].tolist()
        }
        
        # Print results
        if 'revenue' in metric:
            print(f'  {metric:20s}: MAPE={mape:5.2f}%  MAE=CHF {mae:,.0f}  Cumulative=CHF {cumulative_error:,.0f}')
        else:
            print(f'  {metric:20s}: MAPE={mape:5.2f}%  MAE={mae:,.0f}  Cumulative={cumulative_error:,.0f}')

print('\n' + '='*80)

In [None]:
print('Creating Comprehensive Comparison Summary Table...')
print('='*80)

# Create summary dataframe
summary_rows = []

for approach_name in all_results.keys():
    for metric in metrics_to_compare:
        results = all_results[approach_name][metric]
        summary_rows.append({
            'Approach': approach_name,
            'Metric': metric,
            'MAPE (%)': results['mape'],
            'MAE': results['mae'],
            'Cumulative Error': results['cumulative_error']
        })

df_all_approaches_summary = pd.DataFrame(summary_rows)

# Pivot for easier comparison
print('\nüìä COMPREHENSIVE MAPE COMPARISON (Lower is Better):')
print('='*80)

# Orders comparison
print('\n** TOTAL ORDERS **')
orders_pivot = df_all_approaches_summary[df_all_approaches_summary['Metric'] == 'total_orders'].copy()
orders_pivot = orders_pivot.sort_values('MAPE (%)')
print(orders_pivot[['Approach', 'MAPE (%)', 'MAE', 'Cumulative Error']].to_string(index=False))

# Identify winner
best_orders = orders_pivot.iloc[0]['Approach']
best_orders_mape = orders_pivot.iloc[0]['MAPE (%)']
print(f'\nüèÜ WINNER (Orders): {best_orders} with MAPE = {best_orders_mape:.2f}%')

# Revenue comparison
print('\n\n** REVENUE TOTAL **')
revenue_pivot = df_all_approaches_summary[df_all_approaches_summary['Metric'] == 'revenue_total'].copy()
revenue_pivot = revenue_pivot.sort_values('MAPE (%)')
print(revenue_pivot[['Approach', 'MAPE (%)', 'MAE', 'Cumulative Error']].to_string(index=False))

# Identify winner
best_revenue = revenue_pivot.iloc[0]['Approach']
best_revenue_mape = revenue_pivot.iloc[0]['MAPE (%)']
print(f'\nüèÜ WINNER (Revenue): {best_revenue} with MAPE = {best_revenue_mape:.2f}%')

# Save comprehensive summary
df_all_approaches_summary.to_csv('../results/forecast_validation_all_approaches_summary.csv', index=False)
print(f'\n‚úì Saved: results/forecast_validation_all_approaches_summary.csv')
print('='*80)

In [None]:
print('Creating Comprehensive Visualization: All Approaches Comparison...')
print('='*80)

# Create interactive comparison chart - ORDERS
fig_all_orders = go.Figure()

# Add actual data
fig_all_orders.add_trace(go.Scatter(
    x=df_actual_comp['date'],
    y=df_actual_comp['total_orders'],
    mode='lines+markers',
    name='Actual 2025',
    line=dict(color='black', width=4),
    marker=dict(size=12, symbol='diamond')
))

# Color scheme for different approaches
colors = {
    'Seasonal_Naive': '#FF6B6B',
    'XGBoost': '#4ECDC4',
    'CatBoost': '#95E1D3',
    'Ensemble_Best': '#FFD93D',
    'Human': '#A8E6CF'
}

# Add all forecast approaches
for approach_name, forecast_df in all_forecasts.items():
    if forecast_df is None:
        continue
    
    mape = all_results[approach_name]['total_orders']['mape']
    
    fig_all_orders.add_trace(go.Scatter(
        x=forecast_df['date'],
        y=forecast_df['total_orders'],
        mode='lines+markers',
        name=f'{approach_name} (MAPE={mape:.2f}%)',
        line=dict(color=colors.get(approach_name, '#999999'), width=2),
        marker=dict(size=8)
    ))

fig_all_orders.update_layout(
    title='<b>Total Orders: All Forecasting Approaches vs Actual (Jan-Sep 2025)</b>',
    xaxis_title='Month',
    yaxis_title='Total Orders',
    hovermode='x unified',
    height=600,
    template='plotly_white',
    legend=dict(
        orientation='v',
        yanchor='top',
        y=0.99,
        xanchor='left',
        x=0.01,
        bgcolor='rgba(255, 255, 255, 0.9)'
    )
)

fig_all_orders.show()

# Save
fig_all_orders.write_html('../results/forecast_validation_all_approaches_orders.html')
print('‚úì Saved: results/forecast_validation_all_approaches_orders.html')

In [None]:
# Create interactive comparison chart - REVENUE
fig_all_revenue = go.Figure()

# Add actual data
fig_all_revenue.add_trace(go.Scatter(
    x=df_actual_comp['date'],
    y=df_actual_comp['revenue_total'],
    mode='lines+markers',
    name='Actual 2025',
    line=dict(color='black', width=4),
    marker=dict(size=12, symbol='diamond')
))

# Add all forecast approaches
for approach_name, forecast_df in all_forecasts.items():
    if forecast_df is None:
        continue
    
    mape = all_results[approach_name]['revenue_total']['mape']
    
    fig_all_revenue.add_trace(go.Scatter(
        x=forecast_df['date'],
        y=forecast_df['revenue_total'],
        mode='lines+markers',
        name=f'{approach_name} (MAPE={mape:.2f}%)',
        line=dict(color=colors.get(approach_name, '#999999'), width=2),
        marker=dict(size=8)
    ))

fig_all_revenue.update_layout(
    title='<b>Revenue Total: All Forecasting Approaches vs Actual (Jan-Sep 2025)</b>',
    xaxis_title='Month',
    yaxis_title='Revenue (CHF)',
    hovermode='x unified',
    height=600,
    template='plotly_white',
    legend=dict(
        orientation='v',
        yanchor='top',
        y=0.99,
        xanchor='left',
        x=0.01,
        bgcolor='rgba(255, 255, 255, 0.9)'
    )
)

fig_all_revenue.show()

# Save
fig_all_revenue.write_html('../results/forecast_validation_all_approaches_revenue.html')
print('‚úì Saved: results/forecast_validation_all_approaches_revenue.html')

In [None]:
print('Creating MAPE Ranking Bar Chart...')
print('='*80)

# Create side-by-side MAPE comparison
fig_mape_comparison = make_subplots(
    rows=1, cols=2,
    subplot_titles=['Total Orders - MAPE Comparison', 'Revenue Total - MAPE Comparison'],
    horizontal_spacing=0.15
)

# Prepare data for orders
orders_mape_data = []
for approach_name in all_results.keys():
    mape = all_results[approach_name]['total_orders']['mape']
    orders_mape_data.append({'Approach': approach_name, 'MAPE': mape})

df_orders_mape = pd.DataFrame(orders_mape_data).sort_values('MAPE')

# Prepare data for revenue
revenue_mape_data = []
for approach_name in all_results.keys():
    mape = all_results[approach_name]['revenue_total']['mape']
    revenue_mape_data.append({'Approach': approach_name, 'MAPE': mape})

df_revenue_mape = pd.DataFrame(revenue_mape_data).sort_values('MAPE')

# Add orders MAPE bars
fig_mape_comparison.add_trace(go.Bar(
    x=df_orders_mape['Approach'],
    y=df_orders_mape['MAPE'],
    marker_color=['#2ECC71' if i == 0 else '#E74C3C' if i == len(df_orders_mape)-1 else '#3498DB' 
                  for i in range(len(df_orders_mape))],
    text=[f'{v:.2f}%' for v in df_orders_mape['MAPE']],
    textposition='outside',
    showlegend=False
), row=1, col=1)

# Add revenue MAPE bars
fig_mape_comparison.add_trace(go.Bar(
    x=df_revenue_mape['Approach'],
    y=df_revenue_mape['MAPE'],
    marker_color=['#2ECC71' if i == 0 else '#E74C3C' if i == len(df_revenue_mape)-1 else '#3498DB' 
                  for i in range(len(df_revenue_mape))],
    text=[f'{v:.2f}%' for v in df_revenue_mape['MAPE']],
    textposition='outside',
    showlegend=False
), row=1, col=2)

fig_mape_comparison.update_layout(
    title_text='<b>MAPE Comparison: All Forecasting Approaches (Lower is Better)</b>',
    title_font_size=18,
    height=500,
    template='plotly_white'
)

fig_mape_comparison.update_yaxes(title_text='MAPE (%)', row=1, col=1)
fig_mape_comparison.update_yaxes(title_text='MAPE (%)', row=1, col=2)

fig_mape_comparison.show()

# Save
fig_mape_comparison.write_html('../results/forecast_validation_all_approaches_mape_ranking.html')
print('‚úì Saved: results/forecast_validation_all_approaches_mape_ranking.html')

## Section 11: Final Recommendations - All Approaches Evaluated

In [None]:
print('\n' + '='*80)
print('FINAL RECOMMENDATIONS: ALL FORECASTING APPROACHES EVALUATED')
print('='*80)

print('\nüìä PERFORMANCE RANKINGS:')
print('\n1. Total Orders (Jan-Sep 2025):')
print('-' * 60)
for i, row in df_orders_mape.iterrows():
    rank = list(df_orders_mape.index).index(i) + 1
    medal = 'ü•á' if rank == 1 else 'ü•à' if rank == 2 else 'ü•â' if rank == 3 else f'  {rank}.'
    print(f'   {medal} {row["Approach"]:20s} MAPE = {row["MAPE"]:5.2f}%')

print('\n2. Revenue Total (Jan-Sep 2025):')
print('-' * 60)
for i, row in df_revenue_mape.iterrows():
    rank = list(df_revenue_mape.index).index(i) + 1
    medal = 'ü•á' if rank == 1 else 'ü•à' if rank == 2 else 'ü•â' if rank == 3 else f'  {rank}.'
    print(f'   {medal} {row["Approach"]:20s} MAPE = {row["MAPE"]:5.2f}%')

print('\n\nüéØ KEY FINDINGS:')
print('-' * 80)

# Find best approach for each metric
best_orders_approach = df_orders_mape.iloc[0]['Approach']
best_orders_mape_val = df_orders_mape.iloc[0]['MAPE']
worst_orders_approach = df_orders_mape.iloc[-1]['Approach']
worst_orders_mape_val = df_orders_mape.iloc[-1]['MAPE']

best_revenue_approach = df_revenue_mape.iloc[0]['Approach']
best_revenue_mape_val = df_revenue_mape.iloc[0]['MAPE']
worst_revenue_approach = df_revenue_mape.iloc[-1]['Approach']
worst_revenue_mape_val = df_revenue_mape.iloc[-1]['MAPE']

print(f'\n1. BEST PERFORMERS:')
print(f'   - Orders:  {best_orders_approach} ({best_orders_mape_val:.2f}% MAPE)')
print(f'   - Revenue: {best_revenue_approach} ({best_revenue_mape_val:.2f}% MAPE)')

print(f'\n2. WORST PERFORMERS:')
print(f'   - Orders:  {worst_orders_approach} ({worst_orders_mape_val:.2f}% MAPE)')
print(f'   - Revenue: {worst_revenue_approach} ({worst_revenue_mape_val:.2f}% MAPE)')

print(f'\n3. PERFORMANCE SPREAD:')
print(f'   - Orders:  {worst_orders_mape_val - best_orders_mape_val:.2f}% difference between best and worst')
print(f'   - Revenue: {worst_revenue_mape_val - best_revenue_mape_val:.2f}% difference between best and worst')

print('\n\n‚úÖ FINAL RECOMMENDATION:')
print('='*80)

# Logic for recommendation
if best_orders_approach == best_revenue_approach:
    print(f'\nüéØ CLEAR WINNER: {best_orders_approach}')
    print(f'   - Performs best for BOTH orders and revenue')
    print(f'   - Orders MAPE: {best_orders_mape_val:.2f}%')
    print(f'   - Revenue MAPE: {best_revenue_mape_val:.2f}%')
    print(f'\n   ‚úì RECOMMENDATION: Adopt {best_orders_approach} as the official forecasting method')
else:
    print(f'\n‚ö†Ô∏è  SPLIT RESULTS:')
    print(f'   - Best for Orders:  {best_orders_approach} ({best_orders_mape_val:.2f}% MAPE)')
    print(f'   - Best for Revenue: {best_revenue_approach} ({best_revenue_mape_val:.2f}% MAPE)')
    print(f'\n   ‚úì RECOMMENDATION: Hybrid Approach')
    print(f'     - Use {best_orders_approach} for operational planning (orders, drivers, capacity)')
    print(f'     - Use {best_revenue_approach} for financial planning (revenue, budgets)')

print('\n\nüìà WHY SEASONAL NAIVE PERFORMS WELL (if it wins):')
print('-' * 80)
if 'Seasonal_Naive' in [best_orders_approach, best_revenue_approach]:
    print('   1. CAPTURES SEASONALITY: 2024 monthly pattern repeats in 2025')
    print('   2. BUSINESS STABILITY: Transport demand follows predictable seasonal cycles')
    print('   3. SIMPLICITY: Easy to explain and understand for stakeholders')
    print('   4. ROBUSTNESS: No overfitting to training data noise')
    print('   5. LIMITED TRAINING DATA: Only 36 months (2022-2024) may be insufficient for complex ML')
    print('\n   üí° INSIGHT: Sometimes simple methods outperform complex ML when:')
    print('      - Strong seasonal patterns exist')
    print('      - Limited training data available')
    print('      - Business environment is stable')

print('\n\nüìÅ OUTPUT FILES CREATED:')
print('='*80)
print('   1. forecast_validation_all_approaches_summary.csv')
print('   2. forecast_validation_all_approaches_orders.html')
print('   3. forecast_validation_all_approaches_revenue.html')
print('   4. forecast_validation_all_approaches_mape_ranking.html')
print('\n' + '='*80)
print('‚úÖ COMPREHENSIVE MODEL COMPARISON COMPLETE!')
print('='*80)