# Notebook 18: Forecast Validation - 2025 Actual vs ML vs Human

This notebook validates our forecasting models against actual 2025 data (Jan-Sep).

**Three Methods Compared:**
1. **Human/Traditional**: 2024 annual total ÷ 12 (current budgeting method)
2. **Machine/ML**: Model predictions from consolidated forecasts
3. **Actual**: Real 2025 data (ground truth)

**CRITICAL**: This is validation only - do NOT retrain models with 2025 data!

**User Emphasis**: "This will be the most crucial visualisation of the whole project"

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Add parent directory to path for utils
sys.path.append(str(Path.cwd().parent))

print('Notebook 18: Forecast Validation - 2025 Actual vs ML vs Human')
print('='*80)

Notebook 18: Forecast Validation - 2025 Actual vs ML vs Human


## Section 1: Load 2025 Actual Data (Jan-Sep)

In [2]:
# Load all 9 monthly files
data_dir = Path('../data/raw/2025')

monthly_files = [
    '2025 01 Jan QS Auftragsanalyse.xlsx',
    '2025 02 Feb QS Auftragsanalyse.xlsx',
    '2025 03 Mär QS Auftragsanalyse.xlsx',
    '2025 04 Apr QS Auftragsanalyse.xlsx',
    '2025 05 Mai QS Auftragsanalyse.xlsx',
    '2025 06 Jun QS Auftragsanalyse.xlsx',
    '2025 07 Jul QS Auftragsanalyse.xlsx',
    '2025 08 Aug QS Auftragsanalyse.xlsx',
    '2025 09 Sep QS Auftragsanalyse.xlsx'
]

print('Loading 2025 actual data (Jan-Sep)...')
dfs_2025 = []

for file in monthly_files:
    filepath = data_dir / file
    print(f'  Loading {file}...')
    df = pd.read_excel(filepath)
    dfs_2025.append(df)
    print(f'    Shape: {df.shape}')

# Concatenate all months
df_2025_raw = pd.concat(dfs_2025, ignore_index=True)
print(f'\n✓ Total 2025 data loaded: {df_2025_raw.shape}')
print(f'  Date range: {df_2025_raw["Datum.Tour"].min()} to {df_2025_raw["Datum.Tour"].max()}')

Loading 2025 actual data (Jan-Sep)...
  Loading 2025 01 Jan QS Auftragsanalyse.xlsx...


    Shape: (141699, 104)
  Loading 2025 02 Feb QS Auftragsanalyse.xlsx...


    Shape: (135739, 104)
  Loading 2025 03 Mär QS Auftragsanalyse.xlsx...


    Shape: (149431, 104)
  Loading 2025 04 Apr QS Auftragsanalyse.xlsx...


    Shape: (143718, 104)
  Loading 2025 05 Mai QS Auftragsanalyse.xlsx...


    Shape: (143614, 104)
  Loading 2025 06 Jun QS Auftragsanalyse.xlsx...


    Shape: (136156, 104)
  Loading 2025 07 Jul QS Auftragsanalyse.xlsx...


    Shape: (150103, 104)
  Loading 2025 08 Aug QS Auftragsanalyse.xlsx...


    Shape: (136073, 104)
  Loading 2025 09 Sep QS Auftragsanalyse.xlsx...


    Shape: (146849, 104)



✓ Total 2025 data loaded: (1283382, 104)


  Date range: 2025-01-01 00:00:00 to 2025-09-30 00:00:00


In [3]:
# Load tours and Sparten data
print('Loading supporting data...')

df_tours_2025 = pd.read_excel(data_dir / '2025 QS Tourenaufstellung bis Sept.xlsx')
print(f'  Tours data: {df_tours_2025.shape}')

df_sparten_2025 = pd.read_excel(data_dir / '2025 Sparten.xlsx')
print(f'  Sparten data: {df_sparten_2025.shape}')

Loading supporting data...


  Tours data: (134940, 25)


  Sparten data: (384, 7)


## Section 2: Process 2025 Data Through Full Pipeline

Apply same processing as training data (Notebooks 02-04 logic)

In [4]:
print('Processing 2025 data with simplified aggregation...')
print('='*80)

# Step 1: Data Cleaning
print('
1. Data Cleaning...')
df_2025 = df_2025_raw.copy()

# Convert dates
df_2025['Datum.Tour'] = pd.to_datetime(df_2025['Datum.Tour'])

# Exclude Lager orders (warehouse operations) - inline filtering
if 'Lieferart 2.0' in df_2025.columns:
    df_2025 = df_2025[df_2025['Lieferart 2.0'] != 'Lager Auftrag']
    print(f'  After filtering Lager orders: {df_2025.shape[0]:,} records')
else:
    print(f'  No Lager filtering applied (column not found)')

# Step 2: Add temporal features (simplified)
print('
2. Adding temporal features...')
df_2025['year'] = df_2025['Datum.Tour'].dt.year
df_2025['month'] = df_2025['Datum.Tour'].dt.month
df_2025['year_month'] = df_2025['Datum.Tour'].dt.to_period('M')

# Step 3: Identify carrier type (internal vs external)
print('
3. Identifying carrier types...')
if 'Nummer.Spedition' in df_2025.columns:
    # Convert to numeric first, then classify
    # Internal carriers: 1-8889, External: 9000+
    df_2025['carrier_numeric'] = pd.to_numeric(df_2025['Nummer.Spedition'], errors='coerce')
    df_2025['carrier_type'] = df_2025['carrier_numeric'].apply(
        lambda x: 'internal' if pd.notna(x) and x < 9000 else 'external'
    )
    print(f'  Carrier types identified')
else:
    print(f'  Warning: Nummer.Spedition column not found')
    df_2025['carrier_type'] = 'unknown'

# Step 4: Map Betriebszentrale (dispatch centers)
print('
4. Mapping Betriebszentralen...')
if 'Nummer.Auftraggeber' in df_2025.columns:
    # Load betriebszentrale mapping
    bz_mapping = pd.read_csv('../data/raw/TRAVECO_Betriebszentralen.csv')
    # Convert both columns to Int64 to ensure matching types (FIX for merge error)
    df_2025['Nummer.Auftraggeber'] = pd.to_numeric(df_2025['Nummer.Auftraggeber'], errors='coerce').astype('Int64')
    bz_mapping['Nummer.Auftraggeber'] = pd.to_numeric(bz_mapping['Nummer.Auftraggeber'], errors='coerce').astype('Int64')
    # Merge to add betriebszentrale_name
    df_2025 = df_2025.merge(
        bz_mapping[['Nummer.Auftraggeber', 'Name1']],
        on='Nummer.Auftraggeber',
        how='left'
    )
    df_2025.rename(columns={'Name1': 'betriebszentrale_name'}, inplace=True)
    # Fill missing values with "Unknown BZ"
    df_2025['betriebszentrale_name'].fillna('Unknown BZ', inplace=True)
    print(f'  ✓ Mapped {df_2025["betriebszentrale_name"].nunique()} betriebszentralen')
    print(f'  BZ counts: {dict(sorted(df_2025["betriebszentrale_name"].value_counts().items()))}')
else:
    print('  ⚠️ Warning: Nummer.Auftraggeber column not found')
    df_2025['betriebszentrale_name'] = 'Unknown BZ'

print(f'
✓ Data processed: {df_2025.shape}')

Processing 2025 data with simplified aggregation...

1. Data Cleaning...


  After filtering Lager orders: 1,279,181 records

2. Adding temporal features...



3. Identifying carrier types...


  Carrier types identified

4. Mapping Betriebszentralen...


  ✓ Mapped 12 betriebszentralen
  BZ counts: {'B&T Landquart': 4251, 'B&T Puidoux': 21759, 'B&T Winterthur': 78445, 'BZ Herzogenbuchsee': 62805, 'BZ Intermodal / Rail': 3566, 'BZ Landquart': 155391, 'BZ Oberbipp': 341879, 'BZ Puidoux': 16933, 'BZ Sierre': 55769, 'BZ Sursee': 274739, 'BZ Winterthur': 260136, 'Unknown BZ': 3508}

✓ Data processed: (1279181, 110)


In [5]:
# Step 3: Monthly Aggregation (ALL 10 METRICS)
print('
3. Monthly Aggregation (ALL 10 METRICS)...')

# Group by year_month for aggregation
df_2025['year_month'] = df_2025['Datum.Tour'].dt.to_period('M')

# First: Aggregate order-level metrics
df_2025_monthly = df_2025.groupby('year_month').agg({
    'NummerKomplett.Auftrag': 'count',  # total_orders
    'Distanz_BE.Auftrag': 'sum',  # This is total_km_billed (order-based billing KM)
    'carrier_type': lambda x: (x == 'external').sum(),  # external_drivers count
    '∑ Einnahmen': 'sum'  # revenue_total
}).reset_index()

# Rename columns
df_2025_monthly.columns = ['year_month', 'total_orders', 'total_km_billed', 'external_drivers', 'revenue_total']

# Add date column
df_2025_monthly['date'] = df_2025_monthly['year_month'].dt.to_timestamp()

# Count total drivers (unique carriers per month)
drivers_per_month = df_2025.groupby('year_month')['Nummer.Spedition'].nunique().reset_index()
drivers_per_month.columns = ['year_month', 'total_drivers']
df_2025_monthly = df_2025_monthly.merge(drivers_per_month, on='year_month', how='left')

# Now process tour-level data for tour/cost metrics
print('  Processing tour-level metrics...')

# Merge orders with tours to get tour-level data
df_2025_with_tours = df_2025.merge(
    df_tours_2025[['Nummer.Tour', 'Nummer.Auftraggeber', 'IST KM PraCar', 'IST Zeit PraCar', 'PC KM Kosten', 'PC Minuten Kosten']],
    on=['Nummer.Tour', 'Nummer.Auftraggeber'],
    how='left'
)

print(f'    Merged with tours: {df_2025_with_tours.shape}')

# Calculate vehicle costs per order
# Cost = (Actual KM × KM Cost) + (IST Zeit × 60 × Minute Cost)
df_2025_with_tours['order_km_cost'] = df_2025_with_tours['IST KM PraCar'] * df_2025_with_tours['PC KM Kosten']
df_2025_with_tours['order_time_cost'] = (df_2025_with_tours['IST Zeit PraCar'] * 60) * df_2025_with_tours['PC Minuten Kosten']
df_2025_with_tours['order_total_cost'] = df_2025_with_tours['order_km_cost'] + df_2025_with_tours['order_time_cost']

# Aggregate tour-level metrics by month
tour_metrics = df_2025_with_tours.groupby('year_month').agg({
    'IST KM PraCar': 'sum',  # total_km_actual (tour-based actual driven KM)
    'Nummer.Tour': 'nunique',  # total_tours (unique tour count)
    'order_km_cost': 'sum',  # vehicle_km_cost
    'order_time_cost': 'sum',  # vehicle_time_cost
    'order_total_cost': 'sum'  # total_vehicle_cost
}).reset_index()

tour_metrics.columns = ['year_month', 'total_km_actual', 'total_tours', 'vehicle_km_cost', 'vehicle_time_cost', 'total_vehicle_cost']

# Merge tour metrics into monthly aggregation
df_2025_monthly = df_2025_monthly.merge(tour_metrics, on='year_month', how='left')

# Fill any NaN values with 0
df_2025_monthly = df_2025_monthly.fillna(0)

print(f'  ✓ Aggregated to monthly level: {df_2025_monthly.shape}')
print(f'  ✓ Metrics: {list(df_2025_monthly.columns)}')
print(f'
  Monthly aggregated data (first 3 months):')
display_cols = ['date', 'total_orders', 'total_km_billed', 'total_km_actual', 'total_tours', 'total_drivers', 'revenue_total']
display(df_2025_monthly[display_cols].head(3))

# Step 4: Branch-level aggregation (keep existing)
print('
4. Branch-Level Aggregation (by Betriebszentrale)...')

# Aggregate by year_month AND betriebszentrale
df_2025_monthly_bz = df_2025.groupby(['year_month', 'betriebszentrale_name']).agg({
    'NummerKomplett.Auftrag': 'count',  # total_orders
    'Distanz_BE.Auftrag': 'sum',  # total_km
    'carrier_type': lambda x: (x == 'external').sum(),  # external_drivers count
    '∑ Einnahmen': 'sum'  # revenue_total
}).reset_index()

# Rename columns
df_2025_monthly_bz.columns = ['year_month', 'betriebszentrale', 'total_orders', 'total_km', 'external_drivers', 'revenue_total']

# Add date column
df_2025_monthly_bz['date'] = df_2025_monthly_bz['year_month'].dt.to_timestamp()

# Count total drivers per branch per month
drivers_per_branch_month = df_2025.groupby(['year_month', 'betriebszentrale_name'])['Nummer.Spedition'].nunique().reset_index()
drivers_per_branch_month.columns = ['year_month', 'betriebszentrale', 'total_drivers']
df_2025_monthly_bz = df_2025_monthly_bz.merge(drivers_per_branch_month, on=['year_month', 'betriebszentrale'], how='left')

# Save branch-level 2025 actual data for Notebook 19
df_2025_monthly_bz.to_csv('../data/processed/2025_actual_by_branch.csv', index=False)

print(f'  ✓ Aggregated by branch: {df_2025_monthly_bz.shape}')
print(f'  ✓ Betriebszentralen: {df_2025_monthly_bz["betriebszentrale"].nunique()} branches')
print(f'  ✓ Saved: data/processed/2025_actual_by_branch.csv')
print(f'
  Branch-level sample (first 5 rows):')
display(df_2025_monthly_bz[['date', 'betriebszentrale', 'total_orders', 'revenue_total']].head(5))




3. Monthly Aggregation...


  ✓ Aggregated to monthly level: (9, 7)

  Monthly aggregated data (first 3 months):


Unnamed: 0,date,total_orders,total_km,total_drivers,revenue_total,external_drivers
0,2025-01-01,141389,1551551551551551551291291291291291011011011011...,114,13711460.0,29303
1,2025-02-01,135100,5757112727241111303030302727272424272727161618...,109,12916930.0,27504
2,2025-03-01,149176,7674637074687359625850456362584574747070767676...,136,13956520.0,31173



4. Branch-Level Aggregation (by Betriebszentrale)...


  ✓ Aggregated by branch: (108, 8)
  ✓ Betriebszentralen: 12 branches
  ✓ Saved: data/processed/2025_actual_by_branch.csv

  Branch-level sample (first 5 rows):


Unnamed: 0,date,betriebszentrale,total_orders,revenue_total
0,2025-01-01,B&T Landquart,368,70859.6
1,2025-01-01,B&T Puidoux,2387,483141.1
2,2025-01-01,B&T Winterthur,8435,793004.4
3,2025-01-01,BZ Herzogenbuchsee,8043,3760446.0
4,2025-01-01,BZ Intermodal / Rail,364,138176.1


## Section 3: Plausibility Check - Compare 2025 vs 2024 Same Months

Ensure 2025 data is comparable to 2024 (within ±20%)

In [6]:
# Load 2024 processed data for comparison
print('Loading 2024 data for plausibility check...')
df_2024_monthly = pd.read_csv('../data/processed/monthly_aggregated_full_company.csv')
df_2024_monthly['date'] = pd.to_datetime(df_2024_monthly['date'])
df_2024_monthly['year'] = df_2024_monthly['date'].dt.year
df_2024_monthly['month'] = df_2024_monthly['date'].dt.month

# Filter to 2024 data only
df_2024_comparison = df_2024_monthly[df_2024_monthly['year'] == 2024].copy()
print(f'  2024 data: {len(df_2024_comparison)} months')

# Prepare 2025 data for comparison
df_2025_comparison = df_2025_monthly.copy()
df_2025_comparison['year'] = df_2025_comparison['date'].dt.year
df_2025_comparison['month'] = df_2025_comparison['date'].dt.month
print(f'  2025 data: {len(df_2025_comparison)} months')

Loading 2024 data for plausibility check...
  2024 data: 12 months
  2025 data: 9 months


In [7]:
# Compare 2025 vs 2024 same months
print('
Plausibility Check: 2025 vs 2024 Same Months')
print('='*80)

# Metrics to compare
metrics = ['total_orders', 'revenue_total']

# Merge on month for comparison
comparison = df_2025_comparison[['month', 'total_orders', 'revenue_total']].merge(
    df_2024_comparison[['month', 'total_orders', 'revenue_total']],
    on='month',
    suffixes=('_2025', '_2024'),
    how='inner'
)

# Calculate percentage changes
for metric in metrics:
    comparison[f'{metric}_change_%'] = ((comparison[f'{metric}_2025'] - comparison[f'{metric}_2024']) / comparison[f'{metric}_2024'] * 100)

# Add validation status
def validate_change(change):
    if -10 <= change <= 10:
        return '✓ Green (Normal)'
    elif -20 <= change < -10 or 10 < change <= 20:
        return '⚠️  Yellow (Significant but plausible)'
    else:
        return '❌ Red (Investigate data quality)'

# Display comparison
month_names = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sep'}
comparison['month_name'] = comparison['month'].map(month_names)

print('
Total Orders Comparison:')
for _, row in comparison.iterrows():
    change = row['total_orders_change_%']
    status = validate_change(change)
    print(f"  {row['month_name']}: {row['total_orders_2025']:,.0f} vs {row['total_orders_2024']:,.0f} ({change:+.1f}%) {status}")

print('
Revenue Total Comparison:')
for _, row in comparison.iterrows():
    change = row['revenue_total_change_%']
    status = validate_change(change)
    print(f"  {row['month_name']}: CHF {row['revenue_total_2025']:,.0f} vs CHF {row['revenue_total_2024']:,.0f} ({change:+.1f}%) {status}")

# Summary
print('
Plausibility Summary:')
for metric in metrics:
    col = f'{metric}_change_%'
    avg_change = comparison[col].mean()
    max_change = comparison[col].max()
    min_change = comparison[col].min()
    print(f'  {metric}:')
    print(f'    Average change: {avg_change:+.1f}%')
    print(f'    Range: {min_change:+.1f}% to {max_change:+.1f}%')
    # Check if any month is in red zone
    red_months = comparison[comparison[col].abs() > 20]
    if len(red_months) > 0:
        print(f'    ⚠️  {len(red_months)} month(s) in RED zone - investigate!')
    else:
        print(f'    ✓ All months within acceptable range')

print('='*80)


Plausibility Check: 2025 vs 2024 Same Months

Total Orders Comparison:
  Jan: 141,389 vs 132,440 (+6.8%) ✓ Green (Normal)
  Feb: 135,100 vs 131,269 (+2.9%) ✓ Green (Normal)
  Mar: 149,176 vs 134,670 (+10.8%) ⚠️  Yellow (Significant but plausible)
  Apr: 143,159 vs 135,502 (+5.7%) ✓ Green (Normal)
  May: 143,243 vs 137,209 (+4.4%) ✓ Green (Normal)
  Jun: 135,643 vs 130,283 (+4.1%) ✓ Green (Normal)
  Jul: 149,569 vs 145,610 (+2.7%) ✓ Green (Normal)
  Aug: 135,650 vs 139,036 (-2.4%) ✓ Green (Normal)
  Sep: 146,252 vs 140,808 (+3.9%) ✓ Green (Normal)

Revenue Total Comparison:
  Jan: CHF 13,711,456 vs CHF 12,968,295 (+5.7%) ✓ Green (Normal)
  Feb: CHF 12,916,931 vs CHF 12,790,931 (+1.0%) ✓ Green (Normal)
  Mar: CHF 13,956,517 vs CHF 13,140,099 (+6.2%) ✓ Green (Normal)
  Apr: CHF 14,252,342 vs CHF 13,675,285 (+4.2%) ✓ Green (Normal)
  May: CHF 13,803,539 vs CHF 13,969,124 (-1.2%) ✓ Green (Normal)
  Jun: CHF 12,756,544 vs CHF 12,064,081 (+5.7%) ✓ Green (Normal)
  Jul: CHF 14,172,580 vs CHF 

## Section 4: Calculate Human Baseline Forecast

Traditional budgeting method: 2024 annual total ÷ 12 months

In [8]:
print('Calculating Human/Traditional Forecast (2024 total ÷ 12) - ALL 10 METRICS...')
print('='*80)

# Define all 10 target metrics
target_metrics = [
    'total_orders',
    'total_km_billed',
    'total_km_actual',
    'total_tours',
    'total_drivers',
    'revenue_total',
    'external_drivers',
    'vehicle_km_cost',
    'vehicle_time_cost',
    'total_vehicle_cost'
]

# Calculate 2024 annual totals for all metrics
print(f'\n2024 Annual Totals:')
human_forecast_values = {}

for metric in target_metrics:
    if metric in df_2024_comparison.columns:
        total_2024 = df_2024_comparison[metric].sum()
        monthly_avg = total_2024 / 12
        human_forecast_values[metric] = monthly_avg

        if 'cost' in metric or 'revenue' in metric:
            print(f'  {metric:25s}: CHF {total_2024:,.2f} → CHF {monthly_avg:,.2f}/month')
        else:
            print(f'  {metric:25s}: {total_2024:,.0f} → {monthly_avg:,.0f}/month')
    else:
        print(f'  ⚠️  {metric}: Not available in 2024 data')
        human_forecast_values[metric] = 0

# Create human forecast dataframe for Jan-Sep 2025
dates_2025 = pd.date_range('2025-01-01', '2025-09-01', freq='MS')
df_human = pd.DataFrame({'date': dates_2025})

for metric in target_metrics:
    df_human[metric] = human_forecast_values[metric]

df_human['method'] = 'Human (2024÷12)'

print(f'\n✓ Human forecast created for {len(df_human)} months × {len(target_metrics)} metrics')
\n

Calculating Human/Traditional Forecast (2024 total ÷ 12)...

2024 Annual Totals:
  Total Orders: 1,641,250
  Revenue Total: CHF 157,996,583.14

Human Forecast (Monthly Average):
  Total Orders: 136,771 orders/month
  Revenue Total: CHF 13,166,381.93/month

✓ Human forecast created for 9 months (Jan-Sep 2025)


## Section 5: Load Machine/ML Forecasts

In [9]:
print('Loading Machine/ML Forecasts (ALL 10 METRICS)...')
print('='*80)

# Load consolidated forecasts (best model per metric - now includes all 10 metrics)
df_ml_full = pd.read_csv('../data/processed/consolidated_forecast_2025.csv')
df_ml_full['date'] = pd.to_datetime(df_ml_full['date'])

# Filter to Jan-Sep 2025 for comparison
df_ml = df_ml_full[(df_ml_full['date'] >= '2025-01-01') & (df_ml_full['date'] <= '2025-09-01')].copy()
df_ml['method'] = 'Machine (ML Models)'

print(f'\n✓ ML forecasts loaded: {len(df_ml)} months × {len([c for c in df_ml.columns if c not in ["date", "method"]])} metrics')
print(f'  Metrics available: {[c for c in df_ml.columns if c not in ["date", "method"]]}')

# Display first few months
print(f'\nML Forecast Sample (first 3 months, key metrics):')
display_cols = ['date', 'total_orders', 'revenue_total', 'total_tours', 'total_vehicle_cost']
display_cols = [c for c in display_cols if c in df_ml.columns]
display(df_ml[display_cols].head(3))
\n

Loading Machine/ML Forecasts...

✓ ML forecasts loaded: 9 months
  Metrics available: ['date', 'total_orders', 'total_km_billed', 'total_km_actual', 'total_tours', 'total_drivers', 'revenue_total', 'external_drivers', 'vehicle_km_cost', 'vehicle_time_cost', 'total_vehicle_cost', 'method']

ML Forecast Sample (first 3 months):


Unnamed: 0,date,total_orders,revenue_total
0,2025-01-01,131959.666667,12120520.0
1,2025-02-01,130556.333333,12139450.0
2,2025-03-01,147637.666667,14367050.0


## Section 6: Calculate Error Metrics

Compare both methods against actual 2025 data

In [10]:
# Prepare actual data
df_actual = df_2025_monthly[['date', 'total_orders', 'revenue_total']].copy()
df_actual['method'] = 'Actual'

# Merge all three datasets
df_comparison = pd.concat([
    df_actual.assign(source='Actual'),
    df_human[['date', 'total_orders', 'revenue_total']].assign(source='Human'),
    df_ml[['date', 'total_orders', 'revenue_total']].assign(source='Machine')
], ignore_index=True)

# Pivot for easier comparison
metrics = ['total_orders', 'revenue_total']
results = {}

for metric in metrics:
    pivot = df_comparison.pivot(index='date', columns='source', values=metric)
    # Calculate errors
    pivot['human_error'] = pivot['Human'] - pivot['Actual']
    pivot['machine_error'] = pivot['Machine'] - pivot['Actual']
    pivot['human_error_%'] = (pivot['human_error'] / pivot['Actual'] * 100)
    pivot['machine_error_%'] = (pivot['machine_error'] / pivot['Actual'] * 100)
    pivot['human_abs_error'] = pivot['human_error'].abs()
    pivot['machine_abs_error'] = pivot['machine_error'].abs()
    results[metric] = pivot

print('Error Metrics Calculated')
print('='*80)

Error Metrics Calculated


In [11]:
# Calculate summary statistics
print('\nSummary Statistics: TOTAL ORDERS')
print('-'*80)

orders_results = results['total_orders']

# MAPE (Mean Absolute Percentage Error)
human_mape = orders_results['human_error_%'].abs().mean()
machine_mape = orders_results['machine_error_%'].abs().mean()

# MAE (Mean Absolute Error)
human_mae = orders_results['human_abs_error'].mean()
machine_mae = orders_results['machine_abs_error'].mean()

# Cumulative Error
human_cumulative = orders_results['human_error'].sum()
machine_cumulative = orders_results['machine_error'].sum()

print(f'\nHuman Method (2024÷12):')
print(f'  MAPE: {human_mape:.2f}%')
print(f'  MAE: {human_mae:,.0f} orders')
print(f'  Cumulative Error (Jan-Sep): {human_cumulative:,.0f} orders')

print(f'\nMachine Method (ML Models):')
print(f'  MAPE: {machine_mape:.2f}%')
print(f'  MAE: {machine_mae:,.0f} orders')
print(f'  Cumulative Error (Jan-Sep): {machine_cumulative:,.0f} orders')

improvement_mape = ((human_mape - machine_mape) / human_mape * 100)
print(f'\n✓ ML Improvement over Human: {improvement_mape:.1f}% reduction in MAPE')

print('\n' + '='*80)
print('Summary Statistics: REVENUE TOTAL')
print('-'*80)

revenue_results = results['revenue_total']

# MAPE
human_mape_rev = revenue_results['human_error_%'].abs().mean()
machine_mape_rev = revenue_results['machine_error_%'].abs().mean()

# MAE
human_mae_rev = revenue_results['human_abs_error'].mean()
machine_mae_rev = revenue_results['machine_abs_error'].mean()

# Cumulative Error
human_cumulative_rev = revenue_results['human_error'].sum()
machine_cumulative_rev = revenue_results['machine_error'].sum()

print(f'\nHuman Method (2024÷12):')
print(f'  MAPE: {human_mape_rev:.2f}%')
print(f'  MAE: CHF {human_mae_rev:,.0f}')
print(f'  Cumulative Error (Jan-Sep): CHF {human_cumulative_rev:,.0f}')

print(f'\nMachine Method (ML Models):')
print(f'  MAPE: {machine_mape_rev:.2f}%')
print(f'  MAE: CHF {machine_mae_rev:,.0f}')
print(f'  Cumulative Error (Jan-Sep): CHF {machine_cumulative_rev:,.0f}')

improvement_mape_rev = ((human_mape_rev - machine_mape_rev) / human_mape_rev * 100)
print(f'\n✓ ML Improvement over Human: {improvement_mape_rev:.1f}% reduction in MAPE')

print('='*80)


Summary Statistics: TOTAL ORDERS
--------------------------------------------------------------------------------

Human Method (2024÷12):
  MAPE: 4.28%
  MAE: 6,231 orders
  Cumulative Error (Jan-Sep): -48,243 orders

Machine Method (ML Models):
  MAPE: 4.04%
  MAE: 5,770 orders
  Cumulative Error (Jan-Sep): -45,464 orders

✓ ML Improvement over Human: 5.7% reduction in MAPE

Summary Statistics: REVENUE TOTAL
--------------------------------------------------------------------------------

Human Method (2024÷12):
  MAPE: 5.39%
  MAE: CHF 732,682
  Cumulative Error (Jan-Sep): CHF -3,311,197

Machine Method (ML Models):
  MAPE: 5.82%
  MAE: CHF 793,530
  Cumulative Error (Jan-Sep): CHF -4,892,517

✓ ML Improvement over Human: -8.0% reduction in MAPE


## Section 7: Create Visualizations

**The Most Crucial Visualization**: Human Error vs Machine Error

In [12]:
# Visualization 1: Monthly Comparison - Total Orders
fig1 = go.Figure()

# Actual data
fig1.add_trace(go.Scatter(
    x=orders_results.index,
    y=orders_results['Actual'],
    mode='lines+markers',
    name='Actual 2025',
    line=dict(color='black', width=3),
    marker=dict(size=10)
))

# Human forecast
fig1.add_trace(go.Scatter(
    x=orders_results.index,
    y=orders_results['Human'],
    mode='lines+markers',
    name='Human (2024÷12)',
    line=dict(color='#FF6B6B', width=2, dash='dash'),
    marker=dict(size=8)
))

# Machine forecast
fig1.add_trace(go.Scatter(
    x=orders_results.index,
    y=orders_results['Machine'],
    mode='lines+markers',
    name='Machine (ML)',
    line=dict(color='#4ECDC4', width=2, dash='dot'),
    marker=dict(size=8)
))

fig1.update_layout(
    title='Total Orders: Actual vs Human vs Machine Forecasts (Jan-Sep 2025)',
    xaxis_title='Month',
    yaxis_title='Total Orders',
    hovermode='x unified',
    height=500,
    template='plotly_white'
)

fig1.show()

# Save
fig1.write_html('../results/forecast_validation_orders_comparison.html')
print('✓ Saved: results/forecast_validation_orders_comparison.html')

✓ Saved: results/forecast_validation_orders_comparison.html


In [13]:
# Visualization 2: Monthly Comparison - Revenue Total
fig2 = go.Figure()

# Actual data
fig2.add_trace(go.Scatter(
    x=revenue_results.index,
    y=revenue_results['Actual'],
    mode='lines+markers',
    name='Actual 2025',
    line=dict(color='black', width=3),
    marker=dict(size=10)
))

# Human forecast
fig2.add_trace(go.Scatter(
    x=revenue_results.index,
    y=revenue_results['Human'],
    mode='lines+markers',
    name='Human (2024÷12)',
    line=dict(color='#FF6B6B', width=2, dash='dash'),
    marker=dict(size=8)
))

# Machine forecast
fig2.add_trace(go.Scatter(
    x=revenue_results.index,
    y=revenue_results['Machine'],
    mode='lines+markers',
    name='Machine (ML)',
    line=dict(color='#4ECDC4', width=2, dash='dot'),
    marker=dict(size=8)
))

fig2.update_layout(
    title='Revenue Total: Actual vs Human vs Machine Forecasts (Jan-Sep 2025)',
    xaxis_title='Month',
    yaxis_title='Revenue (CHF)',
    hovermode='x unified',
    height=500,
    template='plotly_white'
)

fig2.show()

# Save
fig2.write_html('../results/forecast_validation_revenue_comparison.html')
print('✓ Saved: results/forecast_validation_revenue_comparison.html')

✓ Saved: results/forecast_validation_revenue_comparison.html


In [14]:
# Visualization 3: THE CRUCIAL ONE - Error Comparison Side-by-Side
fig3 = make_subplots(
    rows=2, cols=2,
    subplot_titles=[
        'Total Orders: Human Error (%)',
        'Total Orders: Machine Error (%)',
        'Revenue Total: Human Error (%)',
        'Revenue Total: Machine Error (%)'
    ],
    vertical_spacing=0.15,
    horizontal_spacing=0.12
)

# Orders - Human Error
fig3.add_trace(go.Bar(
    x=orders_results.index,
    y=orders_results['human_error_%'],
    marker_color='#FF6B6B',
    name='Human Error',
    showlegend=False
), row=1, col=1)

# Orders - Machine Error
fig3.add_trace(go.Bar(
    x=orders_results.index,
    y=orders_results['machine_error_%'],
    marker_color='#4ECDC4',
    name='Machine Error',
    showlegend=False
), row=1, col=2)

# Revenue - Human Error
fig3.add_trace(go.Bar(
    x=revenue_results.index,
    y=revenue_results['human_error_%'],
    marker_color='#FF6B6B',
    name='Human Error',
    showlegend=False
), row=2, col=1)

# Revenue - Machine Error
fig3.add_trace(go.Bar(
    x=revenue_results.index,
    y=revenue_results['machine_error_%'],
    marker_color='#4ECDC4',
    name='Machine Error',
    showlegend=False
), row=2, col=2)

# Add zero line to all subplots
for row in [1, 2]:
    for col in [1, 2]:
        fig3.add_hline(y=0, line_dash='dash', line_color='gray', row=row, col=col)

fig3.update_layout(
    title_text='<b>Human vs Machine Forecast Error Comparison (Jan-Sep 2025)</b>',
    title_font_size=18,
    height=800,
    template='plotly_white'
)

# Update y-axes labels
fig3.update_yaxes(title_text='Error (%)', row=1, col=1)
fig3.update_yaxes(title_text='Error (%)', row=1, col=2)
fig3.update_yaxes(title_text='Error (%)', row=2, col=1)
fig3.update_yaxes(title_text='Error (%)', row=2, col=2)

fig3.show()

# Save
fig3.write_html('../results/forecast_validation_error_comparison.html')
print('✓ Saved: results/forecast_validation_error_comparison.html')
print('\n🎯 THIS IS THE MOST CRUCIAL VISUALIZATION OF THE WHOLE PROJECT!')

✓ Saved: results/forecast_validation_error_comparison.html

🎯 THIS IS THE MOST CRUCIAL VISUALIZATION OF THE WHOLE PROJECT!


In [15]:
# Visualization 4: Cumulative Error Over Time
fig4 = make_subplots(
    rows=1, cols=2,
    subplot_titles=['Total Orders: Cumulative Error', 'Revenue Total: Cumulative Error']
)

# Orders cumulative error
fig4.add_trace(go.Scatter(
    x=orders_results.index,
    y=orders_results['human_error'].cumsum(),
    mode='lines+markers',
    name='Human',
    line=dict(color='#FF6B6B', width=2)
), row=1, col=1)

fig4.add_trace(go.Scatter(
    x=orders_results.index,
    y=orders_results['machine_error'].cumsum(),
    mode='lines+markers',
    name='Machine',
    line=dict(color='#4ECDC4', width=2)
), row=1, col=1)

# Revenue cumulative error
fig4.add_trace(go.Scatter(
    x=revenue_results.index,
    y=revenue_results['human_error'].cumsum(),
    mode='lines+markers',
    name='Human',
    line=dict(color='#FF6B6B', width=2),
    showlegend=False
), row=1, col=2)

fig4.add_trace(go.Scatter(
    x=revenue_results.index,
    y=revenue_results['machine_error'].cumsum(),
    mode='lines+markers',
    name='Machine',
    line=dict(color='#4ECDC4', width=2),
    showlegend=False
), row=1, col=2)

# Add zero lines
fig4.add_hline(y=0, line_dash='dash', line_color='gray', row=1, col=1)
fig4.add_hline(y=0, line_dash='dash', line_color='gray', row=1, col=2)

fig4.update_layout(
    title='Cumulative Forecast Error (Jan-Sep 2025)',
    height=500,
    template='plotly_white'
)

fig4.update_yaxes(title_text='Cumulative Error (Orders)', row=1, col=1)
fig4.update_yaxes(title_text='Cumulative Error (CHF)', row=1, col=2)

fig4.show()

# Save
fig4.write_html('../results/forecast_validation_cumulative_error.html')
print('✓ Saved: results/forecast_validation_cumulative_error.html')

✓ Saved: results/forecast_validation_cumulative_error.html


In [16]:
# Visualization 5: Error Distribution Box Plot
fig5 = go.Figure()

# Orders
fig5.add_trace(go.Box(
    y=orders_results['human_error_%'],
    name='Human (Orders)',
    marker_color='#FF6B6B',
    boxmean='sd'
))

fig5.add_trace(go.Box(
    y=orders_results['machine_error_%'],
    name='Machine (Orders)',
    marker_color='#4ECDC4',
    boxmean='sd'
))

# Revenue
fig5.add_trace(go.Box(
    y=revenue_results['human_error_%'],
    name='Human (Revenue)',
    marker_color='#FFB6C1',
    boxmean='sd'
))

fig5.add_trace(go.Box(
    y=revenue_results['machine_error_%'],
    name='Machine (Revenue)',
    marker_color='#95E1D3',
    boxmean='sd'
))

fig5.add_hline(y=0, line_dash='dash', line_color='gray')

fig5.update_layout(
    title='Forecast Error Distribution (Jan-Sep 2025)',
    yaxis_title='Error (%)',
    height=500,
    template='plotly_white'
)

fig5.show()

# Save
fig5.write_html('../results/forecast_validation_error_distribution.html')
print('✓ Saved: results/forecast_validation_error_distribution.html')

✓ Saved: results/forecast_validation_error_distribution.html


## Section 8: Executive Summary Table

In [17]:
# Create executive summary table
summary_data = {
    'Metric': ['Total Orders', 'Total Orders', 'Revenue Total', 'Revenue Total'],
    'Method': ['Human (2024÷12)', 'Machine (ML)', 'Human (2024÷12)', 'Machine (ML)'],
    'MAPE (%)': [human_mape, machine_mape, human_mape_rev, machine_mape_rev],
    'MAE': [human_mae, machine_mae, human_mae_rev, machine_mae_rev],
    'Cumulative Error': [human_cumulative, machine_cumulative, human_cumulative_rev, machine_cumulative_rev]
}

df_summary = pd.DataFrame(summary_data)

print('\nExecutive Summary: Forecast Validation Results (Jan-Sep 2025)')
print('='*80)
display(df_summary)

# Save to CSV
df_summary.to_csv('../results/forecast_validation_summary.csv', index=False)
print('\n✓ Saved: results/forecast_validation_summary.csv')


Executive Summary: Forecast Validation Results (Jan-Sep 2025)


Unnamed: 0,Metric,Method,MAPE (%),MAE,Cumulative Error
0,Total Orders,Human (2024÷12),4.277402,6231.388889,-48243.5
1,Total Orders,Machine (ML),4.03503,5769.888889,-45464.33
2,Revenue Total,Human (2024÷12),5.388458,732682.422865,-3311197.0
3,Revenue Total,Machine (ML),5.818298,793530.318975,-4892517.0



✓ Saved: results/forecast_validation_summary.csv


## Section 9: Insights and Recommendations

In [18]:
print('\n' + '='*80)
print('FORECAST VALIDATION INSIGHTS')
print('='*80)

print('\n1. ACCURACY COMPARISON:')
print(f'   Total Orders:')
print(f'     - Human MAPE: {human_mape:.2f}%')
print(f'     - Machine MAPE: {machine_mape:.2f}%')
print(f'     - Improvement: {improvement_mape:.1f}%')
print(f'\n   Revenue Total:')
print(f'     - Human MAPE: {human_mape_rev:.2f}%')
print(f'     - Machine MAPE: {machine_mape_rev:.2f}%')
print(f'     - Improvement: {improvement_mape_rev:.1f}%')

print('\n2. CUMULATIVE IMPACT (Jan-Sep 2025):')
print(f'   Total Orders:')
print(f'     - Human cumulative error: {human_cumulative:,.0f} orders')
print(f'     - Machine cumulative error: {machine_cumulative:,.0f} orders')
print(f'     - Difference: {abs(human_cumulative - machine_cumulative):,.0f} orders')
print(f'\n   Revenue Total:')
print(f'     - Human cumulative error: CHF {human_cumulative_rev:,.0f}')
print(f'     - Machine cumulative error: CHF {machine_cumulative_rev:,.0f}')
print(f'     - Difference: CHF {abs(human_cumulative_rev - machine_cumulative_rev):,.0f}')

print('\n3. KEY FINDINGS:')
if machine_mape < human_mape:
    print(f'   ✓ ML models outperform traditional method by {improvement_mape:.1f}% for orders')
else:
    print(f'   ⚠️  Traditional method performs better for orders by {-improvement_mape:.1f}%')

if machine_mape_rev < human_mape_rev:
    print(f'   ✓ ML models outperform traditional method by {improvement_mape_rev:.1f}% for revenue')
else:
    print(f'   ⚠️  Traditional method performs better for revenue by {-improvement_mape_rev:.1f}%')

print('\n4. RECOMMENDATIONS:')
if machine_mape < human_mape and machine_mape_rev < human_mape_rev:
    print('   ✓ ADOPT ML forecasting for both orders and revenue planning')
    print('   ✓ Replace 2024÷12 method with ML-based forecasts')
    print('   ✓ Monitor monthly actuals vs forecasts to track ongoing accuracy')
elif machine_mape < human_mape or machine_mape_rev < human_mape_rev:
    print('   ⚠️  Consider hybrid approach: ML for some metrics, traditional for others')
    print('   ⚠️  Continue validation with additional months of data')
else:
    print('   ⚠️  Further model refinement needed before deployment')
    print('   ⚠️  Investigate why ML models underperform')

print('\n' + '='*80)
print('VALIDATION COMPLETE')
print('='*80)


FORECAST VALIDATION INSIGHTS

1. ACCURACY COMPARISON:
   Total Orders:
     - Human MAPE: 4.28%
     - Machine MAPE: 4.04%
     - Improvement: 5.7%

   Revenue Total:
     - Human MAPE: 5.39%
     - Machine MAPE: 5.82%
     - Improvement: -8.0%

2. CUMULATIVE IMPACT (Jan-Sep 2025):
   Total Orders:
     - Human cumulative error: -48,243 orders
     - Machine cumulative error: -45,464 orders
     - Difference: 2,779 orders

   Revenue Total:
     - Human cumulative error: CHF -3,311,197
     - Machine cumulative error: CHF -4,892,517
     - Difference: CHF 1,581,320

3. KEY FINDINGS:
   ✓ ML models outperform traditional method by 5.7% for orders
   ⚠️  Traditional method performs better for revenue by 8.0%

4. RECOMMENDATIONS:
   ⚠️  Consider hybrid approach: ML for some metrics, traditional for others
   ⚠️  Continue validation with additional months of data

VALIDATION COMPLETE


In [19]:
# Save detailed monthly comparison
monthly_comparison = pd.DataFrame({
    'date': orders_results.index,
    'orders_actual': orders_results['Actual'],
    'orders_human': orders_results['Human'],
    'orders_machine': orders_results['Machine'],
    'orders_human_error_%': orders_results['human_error_%'],
    'orders_machine_error_%': orders_results['machine_error_%'],
    'revenue_actual': revenue_results['Actual'],
    'revenue_human': revenue_results['Human'],
    'revenue_machine': revenue_results['Machine'],
    'revenue_human_error_%': revenue_results['human_error_%'],
    'revenue_machine_error_%': revenue_results['machine_error_%']
})

monthly_comparison.to_csv('../results/forecast_validation_monthly_detail.csv', index=False)
print('\n✓ Saved detailed monthly comparison: results/forecast_validation_monthly_detail.csv')

print('\n' + '='*80)
print('ALL OUTPUTS SAVED TO: /Users/kk/dev/customer_traveco/results/')
print('='*80)
print('Files created:')
print('  1. forecast_validation_orders_comparison.html (interactive)')
print('  2. forecast_validation_revenue_comparison.html (interactive)')
print('  3. forecast_validation_error_comparison.html (MOST CRUCIAL)')
print('  4. forecast_validation_cumulative_error.html (interactive)')
print('  5. forecast_validation_error_distribution.html (interactive)')
print('  6. forecast_validation_summary.csv (executive summary)')
print('  7. forecast_validation_monthly_detail.csv (detailed data)')
print('='*80)


✓ Saved detailed monthly comparison: results/forecast_validation_monthly_detail.csv

ALL OUTPUTS SAVED TO: /Users/kk/dev/customer_traveco/results/
Files created:
  1. forecast_validation_orders_comparison.html (interactive)
  2. forecast_validation_revenue_comparison.html (interactive)
  3. forecast_validation_error_comparison.html (MOST CRUCIAL)
  4. forecast_validation_cumulative_error.html (interactive)
  5. forecast_validation_error_distribution.html (interactive)
  6. forecast_validation_summary.csv (executive summary)
  7. forecast_validation_monthly_detail.csv (detailed data)
