<a href="https://colab.research.google.com/github/john-d-noble/callcenter/blob/main/FRESH_START.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Why Market Data Could Help:**

VIX spikes → Customer anxiety → More service calls
Stock crashes → Portfolio concerns → Support calls surge
Crypto volatility → Trading platform issues → Call spikes
Dollar movements → International customer impacts
Gold rallies → Economic uncertainty → Increased activity

Tommorow Ideas - have a weekly / daily toggle

In [None]:
!nvidia-smi

In [None]:
import torch

# Check if CUDA (GPU) is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU")

# Example: Move a tensor to the GPU
x = torch.randn(10, 10).to(device)

# Example: Move a model to the GPU
# model = YourModel().to(device)

In [None]:
"""
Cell 1: Import all required libraries and configure GPU
"""

!pip install catboost optuna

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Core libraries
from datetime import datetime, timedelta
import json
import pickle
from typing import Dict, List, Tuple, Optional
import holidays

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-v0_8-darkgrid')

# ML Libraries
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import TimeSeriesSplit

# Gradient Boosting
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

# Deep Learning
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# Prophet
from prophet import Prophet

# Hyperparameter tuning
from sklearn.model_selection import RandomizedSearchCV
import optuna

# Configure GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"✅ GPU Available: {len(gpus)} GPU(s) detected")
    except RuntimeError as e:
        print(f"⚠️ GPU initialization error: {e}")
else:
    print("⚠️ No GPU detected, using CPU")

# Set seeds
np.random.seed(42)
tf.random.set_seed(42)

print("✅ All libraries imported successfully!")

In [None]:
# Call Center Volume Forecasting - Fresh Start Approach
# This notebook provides a systematic approach to forecasting call center volumes

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Statistical and time series
from scipy import stats
from scipy.stats import jarque_bera, shapiro
from scipy.signal import find_peaks
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox

# Prophet
try:
    from prophet import Prophet
    PROPHET_AVAILABLE = True
except ImportError:
    print("Prophet not available - install with: pip install prophet")
    PROPHET_AVAILABLE = False

# ML models
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)

print("📞 Call Center Forecasting Notebook - Ready to Roll!")
print("=" * 50)

# ============================================================================
# CELL 1: REGIME CHANGE IMPOSSIBILITY PROOF - Independent Validation
# ============================================================================

print("\n🔬 STEP 1: VALIDATING FORECAST IMPOSSIBILITY")
print("=" * 60)
print("📊 Objective: Prove that frequent regime changes make forecasting impossible")
print("🎯 Hypothesis: '37 changes in 36 days = impossible forecasting'")
print("=" * 60)

def load_data_for_impossibility_analysis():
    """Load data specifically for regime change analysis with market data integration"""
    try:
        print("📁 Loading call center data...")
        df = pd.read_csv('enhanced_eda_data.csv', index_col='Date', parse_dates=True)

        # Auto-detect volume column
        volume_cols = ['calls', 'Calls', 'call_volume', 'Call_Volume', 'volume', 'Volume']
        volume_col = None

        for col in volume_cols:
            if col in df.columns:
                volume_col = col
                break

        if volume_col is None:
            numeric_cols = df.select_dtypes(include=[np.number]).columns
            volume_col = numeric_cols[0] if len(numeric_cols) > 0 else df.columns[0]

        df = df.rename(columns={volume_col: 'calls'})
        df = df[['calls']].sort_index()

        # TRIM FIRST AND LAST ROWS (bad call volume data)
        print("🧹 Trimming first and last rows (bad call volume data)...")
        if len(df) > 2:
            df = df.iloc[1:-1]  # Remove first and last rows
            print(f"   Trimmed to {len(df)} rows")

        print(f"✅ Loaded {len(df)} days of call data ({df.index.min()} to {df.index.max()})")

        # LOAD MARKET DATA
        print("\n📈 Loading market data...")
        market_tickers = {
            '^VIX': ['^VIX_close'],
            'SPY': ['SPY_close', 'SPY_volume'],
            'QQQ': ['QQQ_close', 'QQQ_volume'],
            'DX-Y.NYB': ['DX-Y.NYB_close'],
            'GC=F': ['GC=F_close', 'GC=F_volume'],
            'BTC-USD': ['BTC-USD_close', 'BTC-USD_volume'],
            'ETH-USD': ['ETH-USD_close', 'ETH-USD_volume']
        }

        market_data = {}

        for ticker, columns in market_tickers.items():
            try:
                # Try different possible filenames
                possible_files = [
                    f'{ticker}_data.csv',
                    f'{ticker.replace("^", "").replace("=", "_").replace("-", "_")}_data.csv',
                    f'market_data_{ticker.replace("^", "").replace("=", "_").replace("-", "_")}.csv'
                ]

                ticker_df = None
                for filename in possible_files:
                    try:
                        ticker_df = pd.read_csv(filename, index_col='Date', parse_dates=True)
                        print(f"   ✅ Loaded {ticker} from {filename}")
                        break
                    except FileNotFoundError:
                        continue

                if ticker_df is not None:
                    # Trim first and last rows for market data too
                    if len(ticker_df) > 2:
                        ticker_df = ticker_df.iloc[1:-1]

                    # Select only the columns we want
                    available_cols = [col for col in columns if col in ticker_df.columns]
                    if available_cols:
                        market_data[ticker] = ticker_df[available_cols]
                        print(f"     Columns: {available_cols}")
                    else:
                        print(f"   ⚠️ No matching columns found for {ticker}")
                else:
                    print(f"   ❌ Could not load {ticker} data")

            except Exception as e:
                print(f"   ❌ Error loading {ticker}: {e}")

        # Combine market data with call data
        if market_data:
            print(f"\n🔗 Integrating {len(market_data)} market datasets...")

            for ticker, ticker_df in market_data.items():
                # Align dates and merge
                aligned_data = ticker_df.reindex(df.index, method='ffill')  # Forward fill for weekends
                df = pd.concat([df, aligned_data], axis=1)

            print(f"   ✅ Combined dataset: {df.shape[1]} columns, {len(df)} rows")
            print(f"   📊 Market columns added: {[col for ticker_df in market_data.values() for col in ticker_df.columns]}")
        else:
            print("   ⚠️ No market data loaded - proceeding with call data only")

        # Basic data quality check
        print(f"\n📊 Final Dataset Overview:")
        print(f"   Date range: {df.index.min()} to {df.index.max()}")
        print(f"   Total days: {len(df)}")
        print(f"   Columns: {list(df.columns)}")
        print(f"   Call volume missing: {df['calls'].isna().sum()}")

        return df

    except Exception as e:
        print(f"❌ Error loading data: {e}")
        return None

def detect_regime_changes_proof(df, method='multiple', window=7):
    """Advanced regime change detection for impossibility proof with market data awareness"""

    print(f"\n🔍 DETECTING REGIME CHANGES (Call Center + Market)")
    print("-" * 40)

    df_analysis = df.copy()

    # CALL CENTER REGIME CHANGES
    df_analysis['rolling_mean'] = df_analysis['calls'].rolling(window).mean()
    df_analysis['rolling_std'] = df_analysis['calls'].rolling(window).std()
    df_analysis['rolling_var'] = df_analysis['calls'].rolling(window).var()

    # Detect significant changes in call patterns
    df_analysis['mean_change'] = df_analysis['rolling_mean'].diff().abs()
    df_analysis['std_change'] = df_analysis['rolling_std'].diff().abs()
    df_analysis['var_change'] = df_analysis['rolling_var'].diff().abs()

    # Set thresholds (adaptive based on data)
    mean_threshold = df_analysis['mean_change'].quantile(0.95)  # Top 5% of changes
    std_threshold = df_analysis['std_change'].quantile(0.95)
    var_threshold = df_analysis['var_change'].quantile(0.95)

    # CUSUM Detection for calls
    target = df_analysis['calls'].mean()
    df_analysis['cusum'] = (df_analysis['calls'] - target).cumsum()
    df_analysis['cusum_change'] = df_analysis['cusum'].diff().abs()
    cusum_threshold = df_analysis['cusum_change'].quantile(0.95)

    # MARKET REGIME CHANGES (if market data available)
    market_regime_signals = []

    # VIX regime changes (volatility spikes)
    if '^VIX_close' in df_analysis.columns:
        vix_changes = df_analysis['^VIX_close'].diff().abs()
        vix_threshold = vix_changes.quantile(0.95)
        vix_regime = vix_changes > vix_threshold
        market_regime_signals.append(vix_regime)
        print(f"   📊 VIX regime changes: {vix_regime.sum()}")

    # Stock market regime changes
    if 'SPY_close' in df_analysis.columns:
        spy_returns = df_analysis['SPY_close'].pct_change()
        spy_vol_changes = spy_returns.rolling(window).std().diff().abs()
        spy_threshold = spy_vol_changes.quantile(0.95)
        spy_regime = spy_vol_changes > spy_threshold
        market_regime_signals.append(spy_regime)
        print(f"   📈 SPY regime changes: {spy_regime.sum()}")

    # Crypto regime changes
    if 'BTC-USD_close' in df_analysis.columns:
        btc_returns = df_analysis['BTC-USD_close'].pct_change()
        btc_vol_changes = btc_returns.rolling(window).std().diff().abs()
        btc_threshold = btc_vol_changes.quantile(0.95)
        btc_regime = btc_vol_changes > btc_threshold
        market_regime_signals.append(btc_regime)
        print(f"   ₿ BTC regime changes: {btc_regime.sum()}")

    # Combine call center and market regime detection
    call_regime_mask = (
        (df_analysis['mean_change'] > mean_threshold) |
        (df_analysis['std_change'] > std_threshold) |
        (df_analysis['var_change'] > var_threshold) |
        (df_analysis['cusum_change'] > cusum_threshold)
    )

    # Add market regime signals
    if market_regime_signals:
        market_regime_mask = pd.concat(market_regime_signals, axis=1).any(axis=1)
        print(f"   🌐 Market-driven regime changes: {market_regime_mask.sum()}")

        # Combined regime detection
        combined_regime_mask = call_regime_mask | market_regime_mask
        df_analysis['regime_source'] = 'none'
        df_analysis.loc[call_regime_mask & ~market_regime_mask, 'regime_source'] = 'call_only'
        df_analysis.loc[~call_regime_mask & market_regime_mask, 'regime_source'] = 'market_only'
        df_analysis.loc[call_regime_mask & market_regime_mask, 'regime_source'] = 'both'

        regime_mask = combined_regime_mask

        # Analyze regime change sources
        source_counts = df_analysis['regime_source'].value_counts()
        print(f"   📊 Regime change attribution:")
        for source, count in source_counts.items():
            if source != 'none':
                print(f"     {source}: {count} changes")
    else:
        regime_mask = call_regime_mask
        print(f"   📊 No market data - using call center changes only")

    regime_mask = regime_mask.fillna(False)
    regime_changes = df_analysis[regime_mask].copy()

    print(f"\n📊 DETECTION RESULTS:")
    print(f"   Total regime changes detected: {len(regime_changes)}")
    print(f"   Average changes per day: {len(regime_changes) / len(df):.3f}")

    # Focus on recent period (last 36 days)
    if len(df) >= 36:
        recent_data = df.tail(36)
        recent_changes = regime_changes[regime_changes.index.isin(recent_data.index)]
        print(f"   🚨 CRITICAL: Recent changes (last 36 days): {len(recent_changes)}")
        print(f"   🚨 CRITICAL: Recent change frequency: {len(recent_changes)/36:.3f} per day")

        # Analyze recent change sources if market data available
        if market_regime_signals and len(recent_changes) > 0:
            recent_sources = recent_changes['regime_source'].value_counts()
            print(f"   📊 Recent change sources:")
            for source, count in recent_sources.items():
                print(f"     {source}: {count} ({count/len(recent_changes)*100:.1f}%)")

        if len(recent_changes) >= 30:
            print(f"   ⚠️ WARNING: EXTREME INSTABILITY DETECTED!")
            if market_regime_signals:
                market_driven = recent_changes['regime_source'].isin(['market_only', 'both']).sum()
                print(f"   🌐 Market-influenced changes: {market_driven}/{len(recent_changes)} ({market_driven/len(recent_changes)*100:.1f}%)")

    return regime_changes, df_analysis

def create_impossibility_proof_visualization(df, regime_changes):
    """Create the definitive proof that forecasting is impossible"""

    print(f"\n📊 CREATING IMPOSSIBILITY PROOF")
    print("-" * 40)

    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('EMPIRICAL PROOF: Why Forecasting Is Impossible', fontsize=16, fontweight='bold')

    # 1. Recent period with regime changes
    recent_data = df.tail(50) if len(df) >= 50 else df
    recent_regime = regime_changes[regime_changes.index.isin(recent_data.index)]

    axes[0,0].plot(recent_data.index, recent_data['calls'], 'b-', alpha=0.7, linewidth=2, label='Call Volume')
    axes[0,0].scatter(recent_regime.index, recent_regime['calls'], color='red', s=60, alpha=0.9,
               label=f'Regime Changes ({len(recent_regime)})', zorder=5)
    axes[0,0].set_title(f'Last {len(recent_data)} Days: {len(recent_regime)} Structural Breaks')
    axes[0,0].set_ylabel('Call Volume')
    axes[0,0].legend()
    axes[0,0].grid(True, alpha=0.3)

    # Add impossibility annotation
    if len(recent_regime) > len(recent_data) * 0.8:
        axes[0,0].text(0.05, 0.95, 'CHAOS ZONE\nNo Stable Patterns',
                      transform=axes[0,0].transAxes, fontsize=12, fontweight='bold',
                      color='red', va='top', bbox=dict(boxstyle="round,pad=0.3",
                      facecolor="yellow", alpha=0.8))

    # 2. Impossibility threshold analysis
    if len(df) >= 36:
        days = np.arange(1, min(51, len(df)))  # Up to 50 days
        recent_changes_count = []

        for day in days:
            end_date = df.index[-1]
            start_date = end_date - pd.Timedelta(days=day-1)
            period_changes = len(regime_changes[
                (regime_changes.index >= start_date) &
                (regime_changes.index <= end_date)
            ])
            recent_changes_count.append(period_changes)

        axes[0,1].plot(days, recent_changes_count, 'ro-', linewidth=3, markersize=6, label='Actual Changes')

        # Theoretical impossibility thresholds
        stable_threshold = days * 0.1      # 1 change per 10 days = stable
        challenging_threshold = days * 0.3  # 1 change per 3 days = challenging
        impossible_threshold = days * 0.8   # 4+ changes per 5 days = impossible

        axes[0,1].plot(days, stable_threshold, 'g--', alpha=0.7, linewidth=2, label='Stable (10% change rate)')
        axes[0,1].plot(days, challenging_threshold, 'y--', alpha=0.7, linewidth=2, label='Challenging (30% rate)')
        axes[0,1].plot(days, impossible_threshold, 'r--', alpha=0.7, linewidth=2, label='Impossible (80% rate)')

        # Highlight key finding
        if len(days) >= 36:
            changes_36_days = recent_changes_count[35]  # 36th day (0-indexed)
            impossible_36 = impossible_threshold[35]

            axes[0,1].axhline(y=changes_36_days, color='red', linewidth=4, alpha=0.8)
            axes[0,1].axvline(x=36, color='blue', linewidth=2, alpha=0.5, linestyle=':')

            if changes_36_days > impossible_36:
                status = "IMPOSSIBLE"
                color = "red"
            elif changes_36_days > challenging_threshold[35]:
                status = "EXTREMELY DIFFICULT"
                color = "orange"
            else:
                status = "FEASIBLE"
                color = "green"

            axes[0,1].text(36, changes_36_days + 2, f'{changes_36_days} changes\nin 36 days\n= {status}',
                          fontsize=11, fontweight='bold', ha='center',
                          bbox=dict(boxstyle="round,pad=0.3", facecolor=color, alpha=0.3))

        axes[0,1].set_xlabel('Time Window (Days)')
        axes[0,1].set_ylabel('Number of Regime Changes')
        axes[0,1].set_title('Forecasting Impossibility Threshold Analysis')
        axes[0,1].legend(fontsize=9)
        axes[0,1].grid(True, alpha=0.3)

    # 3. Forecast accuracy simulation
    print("   🔬 Running forecast accuracy simulation...")

    horizons = [1, 2, 3, 5, 7]
    naive_errors = []

    # Quick forecast simulation
    min_train = 10
    for horizon in horizons:
        errors = []
        for i in range(min_train, min(len(df) - horizon, min_train + 50)):  # Limit iterations for speed
            train_data = df.iloc[max(0, i-14):i]['calls']  # Last 14 days only
            test_data = df.iloc[i:i+horizon]['calls']

            if len(train_data) > 0 and len(test_data) == horizon:
                # Simple naive forecast
                pred = [train_data.iloc[-1]] * horizon
                mape = mean_absolute_percentage_error(test_data, pred)
                errors.append(mape)

        naive_errors.append(np.mean(errors) if errors else 100)

    axes[1,0].plot(horizons, naive_errors, 'ro-', linewidth=3, markersize=8, label='Naive Forecast MAPE')
    axes[1,0].axhline(y=50, color='orange', linestyle='--', alpha=0.7, label='50% Error (Poor)')
    axes[1,0].axhline(y=100, color='red', linestyle='--', alpha=0.7, label='100% Error (Random)')
    axes[1,0].set_xlabel('Forecast Horizon (Days)')
    axes[1,0].set_ylabel('MAPE (%)')
    axes[1,0].set_title('Forecast Accuracy Collapse')
    axes[1,0].legend()
    axes[1,0].grid(True, alpha=0.3)
    axes[1,0].set_ylim(0, min(150, max(naive_errors) + 10))

    # 4. Change frequency heatmap
    change_indicator = pd.Series(0, index=df.index)
    change_indicator[regime_changes.index] = 1

    # Create weekly change frequency
    weekly_changes = change_indicator.groupby(change_indicator.index.to_period('W')).sum()

    if len(weekly_changes) > 4:
        # Reshape for heatmap (take last 20 weeks max)
        recent_weeks = weekly_changes.tail(20)
        weeks_per_row = 5
        n_rows = len(recent_weeks) // weeks_per_row

        if n_rows > 0:
            heatmap_data = recent_weeks.iloc[:n_rows * weeks_per_row].values.reshape(n_rows, weeks_per_row)

            im = axes[1,1].imshow(heatmap_data, cmap='Reds', aspect='auto')
            axes[1,1].set_title('Weekly Regime Change Intensity\n(Red = More Changes)')
            axes[1,1].set_xlabel('Week (within row)')
            axes[1,1].set_ylabel('Time Period')

            # Add colorbar
            plt.colorbar(im, ax=axes[1,1], label='Changes per Week')
        else:
            axes[1,1].text(0.5, 0.5, 'Insufficient data\nfor heatmap',
                          transform=axes[1,1].transAxes, ha='center', va='center')
            axes[1,1].set_title('Change Frequency Analysis')

    plt.tight_layout()
    plt.show()

    return recent_changes_count if len(df) >= 36 else None

def deliver_scientific_verdict(df, regime_changes):
    """Deliver the final scientific verdict on forecast impossibility with market insights"""

    print(f"\n⚖️ SCIENTIFIC VERDICT")
    print("=" * 50)

    if len(df) >= 36:
        recent_36_changes = len(regime_changes.tail(36))
        change_rate = recent_36_changes / 36

        print(f"📊 EMPIRICAL EVIDENCE:")
        print(f"   • Regime changes in last 36 days: {recent_36_changes}")
        print(f"   • Change rate: {change_rate:.3f} changes per day")
        print(f"   • Average time between changes: {1/change_rate:.1f} days") if change_rate > 0 else print(f"   • No recent changes detected")

        # MARKET CORRELATION ANALYSIS (if market data available)
        market_columns = [col for col in df.columns if col not in ['calls']]
        if market_columns:
            print(f"\n🌐 MARKET CORRELATION ANALYSIS:")

            # Analyze correlation between market volatility and call spikes
            if '^VIX_close' in df.columns:
                call_returns = df['calls'].pct_change().abs()
                vix_correlation = df['^VIX_close'].corr(call_returns)
                print(f"   📊 VIX vs Call Volatility correlation: {vix_correlation:.3f}")

                # VIX spike days vs high call days
                vix_spikes = df['^VIX_close'] > df['^VIX_close'].quantile(0.9)
                call_spikes = df['calls'] > df['calls'].quantile(0.9)
                spike_overlap = (vix_spikes & call_spikes).sum()
                total_call_spikes = call_spikes.sum()
                if total_call_spikes > 0:
                    overlap_pct = spike_overlap / total_call_spikes * 100
                    print(f"   🔥 High-VIX days with call spikes: {spike_overlap}/{total_call_spikes} ({overlap_pct:.1f}%)")

            # Analyze if market regime changes predict call center changes
            if 'regime_source' in regime_changes.columns:
                market_driven = regime_changes['regime_source'].isin(['market_only', 'both']).sum()
                total_changes = len(regime_changes)
                if total_changes > 0:
                    market_pct = market_driven / total_changes * 100
                    print(f"   📈 Market-influenced regime changes: {market_driven}/{total_changes} ({market_pct:.1f}%)")

                    if market_pct > 50:
                        print(f"   💡 INSIGHT: Majority of instability is market-driven!")
                    elif market_pct > 25:
                        print(f"   💡 INSIGHT: Significant market influence on call patterns")

            # Economic uncertainty impact
            if 'market_uncertainty_index' in df.columns:
                uncertainty_call_corr = df['market_uncertainty_index'].corr(df['calls'])
                print(f"   🌊 Market uncertainty vs calls correlation: {uncertainty_call_corr:.3f}")

        # Scientific thresholds based on forecasting literature
        if change_rate > 0.8:  # More than 4 changes per 5 days
            verdict = "MATHEMATICALLY IMPOSSIBLE"
            confidence = "99.9%"
            color = "🔴"
            recommendation = "ABANDON FORECASTING - FOCUS ON REAL-TIME ADAPTATION"
        elif change_rate > 0.5:  # More than 1 change per 2 days
            verdict = "EXTREMELY DIFFICULT"
            confidence = "95%"
            color = "🟠"
            recommendation = "USE ONLY 1-DAY FORECASTS WITH HOURLY UPDATES"
        elif change_rate > 0.2:  # More than 1 change per 5 days
            verdict = "CHALLENGING BUT POSSIBLE"
            confidence = "80%"
            color = "🟡"
            recommendation = "SHORT-TERM FORECASTS ONLY (1-3 DAYS)"
        else:
            verdict = "FEASIBLE"
            confidence = "High"
            color = "🟢"
            recommendation = "STANDARD FORECASTING APPROACHES CAN WORK"

        print(f"\n{color} FINAL CONCLUSION:")
        print(f"   FORECASTING IS {verdict}")
        print(f"   Confidence Level: {confidence}")
        print(f"   Scientific Basis: Change frequency exceeds model adaptation capacity")

        print(f"\n💡 STRATEGIC RECOMMENDATION:")
        print(f"   {recommendation}")

        # Enhanced recommendations based on market data
        if market_columns:
            print(f"\n🎯 MARKET-INFORMED STRATEGY:")
            if '^VIX_close' in df.columns:
                print(f"   • Monitor VIX for early warning of call volume spikes")
                print(f"   • Implement VIX-based staffing alerts (VIX >25 = prepare for volume)")
            if 'SPY_close' in df.columns:
                print(f"   • Track market drops for customer service demand surges")
                print(f"   • Set up automated alerts for market stress indicators")
            if 'BTC-USD_close' in df.columns:
                print(f"   • Monitor crypto volatility for trading platform support load")

            print(f"   • Use market regime changes as leading indicators")
            print(f"   • Implement market-based capacity planning")

        if change_rate > 0.5:
            print(f"\n🧪 SCIENTIFIC REASONING:")
            print(f"   • Forecasting models require stable patterns lasting longer than forecast horizon")
            print(f"   • With changes every {1/change_rate:.1f} days, patterns expire before use")
            print(f"   • Model training time exceeds pattern stability duration")
            print(f"   • Signal-to-noise ratio insufficient for meaningful prediction")

            if market_columns:
                print(f"   • External market forces create unpredictable regime changes")
                print(f"   • Market volatility cascades into operational instability")

            print(f"\n⚡ BUSINESS IMPLICATIONS:")
            print(f"   • Traditional accuracy metrics become meaningless")
            print(f"   • Focus must shift from prediction to adaptation speed")
            print(f"   • Operational flexibility more valuable than forecast precision")
            print(f"   • Real-time monitoring and rapid response capabilities essential")

            if market_columns:
                print(f"   • Market data provides better leading indicators than historical calls")
                print(f"   • Implement market-driven operational triggers")

    return change_rate if len(df) >= 36 else 0

# Execute the impossibility proof analysis
print("🚀 Starting Impossibility Proof Analysis...")

# Load data
df_proof = load_data_for_impossibility_analysis()

if df_proof is not None:
    # Detect regime changes
    regime_changes_proof, df_analysis_proof = detect_regime_changes_proof(df_proof)

    # Create proof visualization
    recent_changes_count = create_impossibility_proof_visualization(df_proof, regime_changes_proof)

    # Deliver scientific verdict
    change_rate_proof = deliver_scientific_verdict(df_proof, regime_changes_proof)

    print(f"\n✅ IMPOSSIBILITY PROOF COMPLETE")
    print("=" * 50)
    print("🎯 Evidence gathered - proceeding with adaptive modeling approach...")
    print("📈 The following analysis will focus on rapid adaptation rather than accuracy")

else:
    print("❌ Cannot proceed with proof analysis - check data file")
    change_rate_proof = 0

print("\n" + "="*70)
print("PROCEEDING TO FULL ADAPTIVE FORECASTING ANALYSIS")
print("="*70)

# ============================================================================
# CELL 2: Data Loading and Initial Setup
# ============================================================================

def load_and_prepare_data(file_path='enhanced_eda_data.csv', volume_col=None, include_market_data=True):
    """
    Load call center data with market data integration and data cleaning

    Enhanced to include market data and trim problematic first/last rows
    """

    try:
        print(f"📁 Loading call center data from {file_path}...")

        # Load call center data with Date as index
        df = pd.read_csv(file_path, index_col='Date', parse_dates=True)

        # Auto-detect the volume column if not specified
        if volume_col is None:
            possible_cols = ['calls', 'Calls', 'call_volume', 'Call_Volume', 'volume', 'Volume',
                           'call_count', 'Call_Count', 'total_calls', 'Total_Calls']

            volume_col = None
            for col in possible_cols:
                if col in df.columns:
                    volume_col = col
                    break

            if volume_col is None:
                numeric_cols = df.select_dtypes(include=[np.number]).columns
                if len(numeric_cols) > 0:
                    volume_col = numeric_cols[0]
                    print(f"🔍 Auto-detected volume column: '{volume_col}'")
                else:
                    raise ValueError("No numeric columns found for call volume")
            else:
                print(f"🔍 Found volume column: '{volume_col}'")

        # Rename to standardized 'calls' column
        if volume_col != 'calls':
            df = df.rename(columns={volume_col: 'calls'})

        # Keep only the calls column initially
        df = df[['calls']].sort_index()

        # TRIM FIRST AND LAST ROWS (problematic call volume data)
        print("🧹 DATA CLEANING: Removing problematic first and last rows")
        print("   ⚠️  As requested - first and last days have bad call volume data")
        original_len = len(df)
        if len(df) > 2:
            first_row_date = df.index[0].strftime('%Y-%m-%d')
            last_row_date = df.index[-1].strftime('%Y-%m-%d')
            first_row_calls = df['calls'].iloc[0]
            last_row_calls = df['calls'].iloc[-1]

            print(f"   🗑️  Removing first row: {first_row_date} ({first_row_calls:.0f} calls)")
            print(f"   🗑️  Removing last row:  {last_row_date} ({last_row_calls:.0f} calls)")

            df = df.iloc[1:-1]  # Remove first and last rows

            print(f"   ✅ Cleaned dataset: {original_len} → {len(df)} rows ({original_len-len(df)} removed)")
        else:
            print("   ⚠️  Dataset too small to trim (≤2 rows)")

        print(f"   📅 Final date range: {df.index.min().strftime('%Y-%m-%d')} to {df.index.max().strftime('%Y-%m-%d')}")

        # INTEGRATE MARKET DATA
        if include_market_data:
            print(f"\n📈 Loading and integrating market data...")

            market_tickers = {
                '^VIX': ['^VIX_close'],
                'SPY': ['SPY_close', 'SPY_volume'],
                'QQQ': ['QQQ_close', 'QQQ_volume'],
                'DX-Y.NYB': ['DX-Y.NYB_close'],
                'GC=F': ['GC=F_close', 'GC=F_volume'],
                'BTC-USD': ['BTC-USD_close', 'BTC-USD_volume'],
                'ETH-USD': ['ETH-USD_close', 'ETH-USD_volume']
            }

            market_data_loaded = {}

            for ticker, columns in market_tickers.items():
                try:
                    # Try different possible filenames
                    possible_files = [
                        f'{ticker}_data.csv',
                        f'{ticker.replace("^", "").replace("=", "_").replace("-", "_")}_data.csv',
                        f'market_data_{ticker.replace("^", "").replace("=", "_").replace("-", "_")}.csv',
                        f'{ticker.replace("^", "VIX_").replace("=F", "").replace("-", "_")}.csv'
                    ]

                    ticker_df = None
                    for filename in possible_files:
                        try:
                            ticker_df = pd.read_csv(filename, index_col='Date', parse_dates=True)
                            print(f"   ✅ Loaded {ticker} from {filename}")
                            break
                        except FileNotFoundError:
                            continue

                    if ticker_df is not None:
                        # Trim first and last rows for consistency with call data
                        original_market_len = len(ticker_df)
                        if len(ticker_df) > 2:
                            ticker_df = ticker_df.iloc[1:-1]
                            print(f"     🧹 Trimmed {ticker} from {original_market_len} to {len(ticker_df)} rows")

                        # Select only the columns we want
                        available_cols = [col for col in columns if col in ticker_df.columns]
                        if available_cols:
                            market_data_loaded[ticker] = ticker_df[available_cols]
                            print(f"     📊 Columns: {available_cols}")
                        else:
                            print(f"   ⚠️ No matching columns found for {ticker}")
                    else:
                        print(f"   ❌ Could not find data files for {ticker}")

                except Exception as e:
                    print(f"   ❌ Error loading {ticker}: {e}")

            # Merge market data with call data
            if market_data_loaded:
                print(f"\n🔗 Integrating {len(market_data_loaded)} market datasets...")

                market_columns_added = []
                for ticker, ticker_df in market_data_loaded.items():
                    # Align dates (forward fill for weekends/holidays)
                    aligned_data = ticker_df.reindex(df.index, method='ffill')

                    # Merge with main dataset
                    df = pd.concat([df, aligned_data], axis=1)
                    market_columns_added.extend(aligned_data.columns.tolist())

                print(f"   ✅ Market integration complete!")
                print(f"   📊 Added columns: {market_columns_added}")
                print(f"   📏 Final dataset: {df.shape[1]} columns × {len(df)} rows")

                # Create market-derived features
                print(f"\n🔧 Creating market-based features...")

                # VIX-based features (fear/volatility)
                if '^VIX_close' in df.columns:
                    df['vix_high'] = (df['^VIX_close'] > df['^VIX_close'].quantile(0.8)).astype(int)
                    df['vix_spike'] = (df['^VIX_close'].pct_change() > 0.2).astype(int)
                    print("   📈 VIX volatility features created")

                # Stock market stress features
                if 'SPY_close' in df.columns:
                    df['spy_returns'] = df['SPY_close'].pct_change()
                    df['market_stress'] = (df['spy_returns'] < -0.02).astype(int)  # 2%+ daily drop
                    df['spy_volatility'] = df['spy_returns'].rolling(7).std()
                    print("   📉 Stock market stress features created")

                # Crypto volatility features
                if 'BTC-USD_close' in df.columns:
                    df['btc_returns'] = df['BTC-USD_close'].pct_change()
                    df['crypto_volatility'] = df['btc_returns'].rolling(7).std()
                    df['btc_extreme_move'] = (abs(df['btc_returns']) > 0.1).astype(int)  # 10%+ moves
                    print("   ₿ Crypto volatility features created")

                # Economic uncertainty composite
                uncertainty_features = []
                if '^VIX_close' in df.columns:
                    uncertainty_features.append(df['^VIX_close'])
                if 'spy_volatility' in df.columns:
                    uncertainty_features.append(df['spy_volatility'] * 100)  # Scale to match VIX
                if 'crypto_volatility' in df.columns:
                    uncertainty_features.append(df['crypto_volatility'] * 100)

                if uncertainty_features:
                    uncertainty_matrix = pd.concat(uncertainty_features, axis=1)
                    df['market_uncertainty_index'] = uncertainty_matrix.mean(axis=1)
                    print("   🌊 Market uncertainty composite index created")

            else:
                print("   ⚠️ No market data files found - proceeding with call data only")
                print("   💡 Expected files: VIX_data.csv, SPY_data.csv, etc.")

        # Final data quality checks
        print(f"\n📊 Data Overview:")
        print(f"   Date range: {df.index.min()} to {df.index.max()}")
        print(f"   Total days: {len(df)}")
        print(f"   Total columns: {len(df.columns)}")
        print(f"   Call volume missing: {df['calls'].isna().sum()}")

        if len(df.columns) > 1:
            market_missing = df.drop('calls', axis=1).isna().sum().sum()
            print(f"   Market data missing: {market_missing} values")

        # Check for any obvious data issues
        if df['calls'].min() < 0:
            print("⚠️  Warning: Negative call volumes detected")

        if df['calls'].isna().sum() > 0:
            print(f"⚠️  Warning: {df['calls'].isna().sum()} missing values in call volume")

        # Check for duplicated dates
        if df.index.duplicated().any():
            print("⚠️  Warning: Duplicate dates found - removing duplicates")
            df = df[~df.index.duplicated(keep='first')]

        return df

    except FileNotFoundError:
        print(f"❌ File '{file_path}' not found!")
        print("💡 Make sure the file is in the same directory as this notebook")
        return None

    except Exception as e:
        print(f"❌ Error loading data: {e}")
        print("💡 Check your file format and column names")
        return None

# Load your actual data with market integration
df = load_and_prepare_data(
    file_path='enhanced_eda_data.csv',
    volume_col=None,  # Will auto-detect
    include_market_data=True  # Set to False if no market data available
)

# ============================================================================
# CELL 3: Exploratory Data Analysis
# ============================================================================

def comprehensive_eda(df):
    """Comprehensive exploratory data analysis for call center data"""

    print("🔍 EXPLORATORY DATA ANALYSIS")
    print("=" * 50)

    # 1. Time Series Plot
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))

    # Main time series
    axes[0,0].plot(df.index, df['calls'], alpha=0.7, linewidth=0.8)
    axes[0,0].set_title('Call Volume Over Time')
    axes[0,0].set_ylabel('Number of Calls')
    axes[0,0].grid(True, alpha=0.3)

    # Distribution
    axes[0,1].hist(df['calls'], bins=50, alpha=0.7, edgecolor='black')
    axes[0,1].set_title('Call Volume Distribution')
    axes[0,1].set_xlabel('Number of Calls')
    axes[0,1].set_ylabel('Frequency')
    axes[0,1].grid(True, alpha=0.3)

    # Day of week pattern
    df_dow = df.copy()
    df_dow['dow'] = df_dow.index.dayofweek
    dow_means = df_dow.groupby('dow')['calls'].mean()
    dow_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    axes[1,0].bar(range(7), dow_means.values, alpha=0.7)
    axes[1,0].set_xticks(range(7))
    axes[1,0].set_xticklabels(dow_names)
    axes[1,0].set_title('Average Calls by Day of Week')
    axes[1,0].set_ylabel('Average Calls')
    axes[1,0].grid(True, alpha=0.3)

    # Monthly pattern
    df_month = df.copy()
    df_month['month'] = df_month.index.month
    month_means = df_month.groupby('month')['calls'].mean()
    month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                   'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    axes[1,1].bar(range(1, 13), month_means.values, alpha=0.7)
    axes[1,1].set_xticks(range(1, 13))
    axes[1,1].set_xticklabels(month_names, rotation=45)
    axes[1,1].set_title('Average Calls by Month')
    axes[1,1].set_ylabel('Average Calls')
    axes[1,1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # 2. Statistical Summary
    print("\n📈 Statistical Summary:")
    print(f"   Mean: {df['calls'].mean():.1f}")
    print(f"   Median: {df['calls'].median():.1f}")
    print(f"   Std Dev: {df['calls'].std():.1f}")
    print(f"   CV: {df['calls'].std()/df['calls'].mean():.3f}")
    print(f"   Skewness: {stats.skew(df['calls']):.3f}")
    print(f"   Kurtosis: {stats.kurtosis(df['calls']):.3f}")

    # 3. Stationarity Test
    print("\n🔬 Stationarity Tests:")
    adf_result = adfuller(df['calls'])
    print(f"   ADF Statistic: {adf_result[0]:.4f}")
    print(f"   p-value: {adf_result[1]:.4f}")
    print(f"   Critical Values: {adf_result[4]}")

    if adf_result[1] <= 0.05:
        print("   ✅ Series appears stationary")
    else:
        print("   ❌ Series appears non-stationary")

    # 4. Seasonal Decomposition
    print("\n🔄 Seasonal Decomposition:")
    decomposition = seasonal_decompose(df['calls'], model='additive', period=7)

    fig, axes = plt.subplots(4, 1, figsize=(15, 12))
    decomposition.observed.plot(ax=axes[0], title='Original')
    decomposition.trend.plot(ax=axes[1], title='Trend')
    decomposition.seasonal.plot(ax=axes[2], title='Seasonal')
    decomposition.resid.plot(ax=axes[3], title='Residual')

    for ax in axes:
        ax.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # 5. Autocorrelation Analysis
    print("\n📊 Autocorrelation Analysis:")
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))

    plot_acf(df['calls'], ax=axes[0], lags=40)
    plot_pacf(df['calls'], ax=axes[1], lags=40)

    plt.tight_layout()
    plt.show()

    # 6. Outlier Analysis
    print("\n🚨 Outlier Analysis:")
    Q1 = df['calls'].quantile(0.25)
    Q3 = df['calls'].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    outliers = df[(df['calls'] < lower_bound) | (df['calls'] > upper_bound)]
    print(f"   Outliers detected: {len(outliers)} ({len(outliers)/len(df)*100:.1f}%)")
    print(f"   Outlier range: {outliers['calls'].min():.0f} - {outliers['calls'].max():.0f}")

    if len(outliers) > 0:
        print(f"   Top 5 outlier dates:")
        for date, row in outliers.nlargest(5, 'calls').iterrows():
            print(f"     {date.strftime('%Y-%m-%d')}: {row['calls']:.0f} calls")

    return decomposition, outliers

# ============================================================================
# CELL 3.5: Regime Change Analysis (Added for Structural Break Detection)
# ============================================================================

def detect_regime_changes(df, window=7, threshold=2.0):
    """
    Detect potential regime changes in call volume data

    Parameters:
    - window: Rolling window for calculating statistics
    - threshold: Standard deviations for change detection
    """

    print("🔄 REGIME CHANGE ANALYSIS")
    print("=" * 50)

    # Calculate rolling statistics
    df_regime = df.copy()
    df_regime['rolling_mean'] = df_regime['calls'].rolling(window).mean()
    df_regime['rolling_std'] = df_regime['calls'].rolling(window).std()

    # Detect significant changes in mean
    df_regime['mean_change'] = df_regime['rolling_mean'].diff().abs()
    df_regime['std_change'] = df_regime['rolling_std'].diff().abs()

    # Flag potential regime changes
    mean_threshold = df_regime['mean_change'].std() * threshold
    std_threshold = df_regime['std_change'].std() * threshold

    regime_changes = df_regime[
        (df_regime['mean_change'] > mean_threshold) |
        (df_regime['std_change'] > std_threshold)
    ].copy()

    print(f"📊 Detected {len(regime_changes)} potential regime changes")

    if len(regime_changes) > 0:
        # Focus on recent changes (last 60 days)
        recent_changes = regime_changes.tail(60)
        print(f"📅 Recent changes (last 60 days): {len(recent_changes)}")

        # Plot regime changes
        fig, axes = plt.subplots(2, 1, figsize=(15, 10))

        # Main time series with change points
        axes[0].plot(df.index, df['calls'], alpha=0.7, linewidth=1, label='Call Volume')
        axes[0].scatter(regime_changes.index, regime_changes['calls'],
                       color='red', s=50, alpha=0.7, label=f'Regime Changes ({len(regime_changes)})')
        axes[0].set_title('Call Volume with Detected Regime Changes')
        axes[0].set_ylabel('Number of Calls')
        axes[0].legend()
        axes[0].grid(True, alpha=0.3)

        # Recent period zoom
        if len(df) > 60:
            recent_data = df.tail(60)
            recent_regime = regime_changes.tail(60)

            axes[1].plot(recent_data.index, recent_data['calls'], alpha=0.7, linewidth=2, label='Call Volume')
            if len(recent_regime) > 0:
                axes[1].scatter(recent_regime.index, recent_regime['calls'],
                               color='red', s=60, alpha=0.8, label=f'Recent Changes ({len(recent_regime)})')
            axes[1].set_title('Recent Period (Last 60 Days) - Regime Changes')
            axes[1].set_ylabel('Number of Calls')
            axes[1].legend()
            axes[1].grid(True, alpha=0.3)

        plt.tight_layout()
        plt.show()

        # Print recent change dates
        if len(recent_changes) > 0:
            print(f"\n📋 Most Recent Regime Changes:")
            for date, row in recent_changes.tail(10).iterrows():
                print(f"   {date.strftime('%Y-%m-%d')}: {row['calls']:.0f} calls "
                      f"(mean Δ: {row['mean_change']:.1f}, std Δ: {row['std_change']:.1f})")

    return regime_changes

# Analyze regime changes in your data
regime_changes = detect_regime_changes(df, window=7, threshold=1.5)

# ============================================================================
# CELL 3.6: Adaptive Modeling Strategy for High-Change Data
# ============================================================================

def recommend_adaptive_strategy(regime_changes, df):
    """Recommend modeling strategy based on regime change frequency"""

    print(f"\n💡 ADAPTIVE MODELING RECOMMENDATIONS")
    print("=" * 50)

    recent_changes = len(regime_changes.tail(36))  # Last 36 days
    change_frequency = recent_changes / 36

    print(f"📊 Regime Change Frequency: {recent_changes} changes in 36 days ({change_frequency:.2f}/day)")

    if change_frequency > 0.5:  # More than 1 change every 2 days
        print("🚨 EXTREMELY HIGH VOLATILITY - Traditional forecasting not recommended")
        print("\n✅ RECOMMENDED APPROACHES:")
        print("   1. ONLINE LEARNING models (update with each new observation)")
        print("   2. VERY SHORT training windows (3-7 days max)")
        print("   3. ENSEMBLE of simple models with different lookback periods")
        print("   4. MOVING WINDOW forecasts (retrain daily)")
        print("   5. REGIME-SWITCHING models")

        print("\n❌ AVOID:")
        print("   • SARIMA/Prophet with long training periods")
        print("   • Complex seasonal patterns (too unstable)")
        print("   • Any model assuming stationarity")

    elif change_frequency > 0.2:  # 1 change every 5 days
        print("⚠️ HIGH VOLATILITY - Need adaptive approaches")
        print("\n✅ RECOMMENDED APPROACHES:")
        print("   1. SHORT training windows (7-14 days)")
        print("   2. WEIGHTED recent observations more heavily")
        print("   3. ENSEMBLE methods with model combination")
        print("   4. EXPONENTIAL SMOOTHING with high alpha")

    else:
        print("✅ MODERATE VOLATILITY - Standard approaches may work")
        print("   • Use normal forecasting approaches but monitor closely")
        print("   • Consider shorter cross-validation windows")

    print(f"\n🔧 IMMEDIATE ACTIONS:")
    print("   1. Reduce training window to 7-14 days maximum")
    print("   2. Implement daily model retraining")
    print("   3. Use simple, adaptive models (exponential smoothing)")
    print("   4. Focus on 1-3 day forecasts only")
    print("   5. Set up change point detection alerts")

# Get recommendations based on your data
recommend_adaptive_strategy(regime_changes, df)

# Run EDA
decomposition, outliers = comprehensive_eda(df)

# ============================================================================
# CELL 5: Time Series Cross-Validation Framework (BEFORE Feature Engineering)
# ============================================================================

def create_time_series_splits_early(df, n_splits=5, test_size=7, gap=0):
    """
    Create time series cross-validation splits BEFORE feature engineering
    This prevents data leakage by ensuring no future information in features

    Parameters:
    - n_splits: Number of splits
    - test_size: Size of test set in days
    - gap: Gap between train and test (to prevent data leakage)
    """

    print("🔒 PREVENTING DATA LEAKAGE")
    print("=" * 50)
    print("⚠️  Creating splits BEFORE feature engineering to prevent future data leakage")

    splits = []
    total_size = len(df)

    for i in range(n_splits):
        # Calculate split points
        test_end = total_size - i * test_size
        test_start = test_end - test_size
        train_end = test_start - gap

        if train_end < test_size:  # Need minimum training size
            break

        train_idx = df.index[:train_end]
        test_idx = df.index[test_start:test_end]

        splits.append({
            'train_idx': train_idx,
            'test_idx': test_idx,
            'train_size': len(train_idx),
            'test_size': len(test_idx),
            'split_date': test_idx[0] if len(test_idx) > 0 else None
        })

    print(f"✅ Created {len(splits)} data-leakage-free splits:")
    for i, split in enumerate(splits):
        print(f"  Split {i+1}: Train {split['train_size']} days → Test {split['test_size']} days")
        print(f"    Train: {split['train_idx'][0].strftime('%Y-%m-%d')} to {split['train_idx'][-1].strftime('%Y-%m-%d')}")
        print(f"    Test:  {split['test_idx'][0].strftime('%Y-%m-%d')} to {split['test_idx'][-1].strftime('%Y-%m-%d')}")
        print()

    return splits

def evaluate_forecast_relative(y_true, y_pred, seasonal_naive_pred, model_name="Model"):
    """
    Calculate forecast evaluation metrics RELATIVE to Seasonal Naive baseline
    This makes results much easier to explain to business stakeholders
    """

    # Remove any NaN values
    mask = ~(np.isnan(y_true) | np.isnan(y_pred) | np.isnan(seasonal_naive_pred))
    y_true_clean = y_true[mask]
    y_pred_clean = y_pred[mask]
    seasonal_naive_clean = seasonal_naive_pred[mask]

    if len(y_true_clean) == 0:
        return {
            'model': model_name,
            'mae': np.nan,
            'mape': np.nan,
            'rmse': np.nan,
            'r2': np.nan,
            'mae_vs_baseline': np.nan,
            'mape_vs_baseline': np.nan,
            'rmse_vs_baseline': np.nan,
            'improvement_pct': np.nan,
            'n_obs': 0
        }

    # Calculate absolute metrics
    mae = mean_absolute_error(y_true_clean, y_pred_clean)
    mape = np.mean(np.abs((y_true_clean - y_pred_clean) / y_true_clean)) * 100
    rmse = np.sqrt(mean_squared_error(y_true_clean, y_pred_clean))
    r2 = r2_score(y_true_clean, y_pred_clean)

    # Calculate baseline metrics
    baseline_mae = mean_absolute_error(y_true_clean, seasonal_naive_clean)
    baseline_mape = np.mean(np.abs((y_true_clean - seasonal_naive_clean) / y_true_clean)) * 100
    baseline_rmse = np.sqrt(mean_squared_error(y_true_clean, seasonal_naive_clean))

    # Calculate relative performance (positive = better than baseline)
    mae_improvement = (baseline_mae - mae) / baseline_mae * 100
    mape_improvement = (baseline_mape - mape) / baseline_mape * 100
    rmse_improvement = (baseline_rmse - rmse) / baseline_rmse * 100

    # Overall improvement score (based on MAPE)
    improvement_pct = mape_improvement

    return {
        'model': model_name,
        'mae': mae,
        'mape': mape,
        'rmse': rmse,
        'r2': r2,
        'baseline_mae': baseline_mae,
        'baseline_mape': baseline_mape,
        'baseline_rmse': baseline_rmse,
        'mae_vs_baseline': mae_improvement,
        'mape_vs_baseline': mape_improvement,
        'rmse_vs_baseline': rmse_improvement,
        'improvement_pct': improvement_pct,
        'n_obs': len(y_true_clean)
    }

# Create cross-validation splits FIRST (before any feature engineering)
if df is not None:
    print("\n🔒 STEP 1: CREATE DATA SPLITS (Preventing Data Leakage)")
    print("=" * 70)
    cv_splits = create_time_series_splits_early(df, n_splits=5, test_size=7, gap=0)
else:
    print("❌ Cannot create splits - no data loaded")
    cv_splits = []

# ============================================================================
# CELL 6: Feature Engineering (Applied ONLY to Training Data)
# ============================================================================

def create_features_no_leakage(df_train, df_test=None):
    """
    Create features using ONLY training data to prevent data leakage
    Apply the same transformations to test data using training statistics
    """

    print("🛠️ FEATURE ENGINEERING (No Data Leakage)")
    print("=" * 50)
    print("✅ Using ONLY training data statistics for feature creation")

    # Work on training data
    df_features_train = df_train.copy()

    # Time-based features (no leakage risk)
    df_features_train['year'] = df_features_train.index.year
    df_features_train['month'] = df_features_train.index.month
    df_features_train['day'] = df_features_train.index.day
    df_features_train['dayofweek'] = df_features_train.index.dayofweek
    df_features_train['dayofyear'] = df_features_train.index.dayofyear
    df_features_train['quarter'] = df_features_train.index.quarter
    df_features_train['week'] = df_features_train.index.isocalendar().week

    # Cyclical encoding
    df_features_train['month_sin'] = np.sin(2 * np.pi * df_features_train['month'] / 12)
    df_features_train['month_cos'] = np.cos(2 * np.pi * df_features_train['month'] / 12)
    df_features_train['dow_sin'] = np.sin(2 * np.pi * df_features_train['dayofweek'] / 7)
    df_features_train['dow_cos'] = np.cos(2 * np.pi * df_features_train['dayofweek'] / 7)
    df_features_train['doy_sin'] = np.sin(2 * np.pi * df_features_train['dayofyear'] / 365.25)
    df_features_train['doy_cos'] = np.cos(2 * np.pi * df_features_train['dayofyear'] / 365.25)

    # Binary features
    df_features_train['is_weekend'] = (df_features_train['dayofweek'] >= 5).astype(int)
    df_features_train['is_monday'] = (df_features_train['dayofweek'] == 0).astype(int)
    df_features_train['is_friday'] = (df_features_train['dayofweek'] == 4).astype(int)
    df_features_train['is_month_start'] = df_features_train.index.is_month_start.astype(int)
    df_features_train['is_month_end'] = df_features_train.index.is_month_end.astype(int)

    # Lag features (only use training data)
    for lag in [1, 2, 3, 7]:
        df_features_train[f'calls_lag_{lag}'] = df_features_train['calls'].shift(lag)

    # Rolling statistics (only on training data)
    for window in [7, 14]:
        df_features_train[f'calls_mean_{window}d'] = df_features_train['calls'].rolling(window).mean()
        df_features_train[f'calls_std_{window}d'] = df_features_train['calls'].rolling(window).std()

    # Market features (if available, using training statistics only)
    market_features = []
    if '^VIX_close' in df_features_train.columns:
        # VIX thresholds based on training data
        train_vix_high_threshold = df_features_train['^VIX_close'].quantile(0.8)
        df_features_train['vix_high'] = (df_features_train['^VIX_close'] > train_vix_high_threshold).astype(int)
        df_features_train['vix_spike'] = (df_features_train['^VIX_close'].pct_change() > 0.2).astype(int)
        market_features.extend(['vix_high', 'vix_spike'])

    if 'SPY_close' in df_features_train.columns:
        df_features_train['spy_returns'] = df_features_train['SPY_close'].pct_change()
        df_features_train['market_stress'] = (df_features_train['spy_returns'] < -0.02).astype(int)
        market_features.extend(['spy_returns', 'market_stress'])

    print(f"✅ Created {len(df_features_train.columns)-len(df_train.columns)} new features for training data")
    print(f"   Market features: {len(market_features)}")

    # Apply same transformations to test data (if provided)
    if df_test is not None:
        df_features_test = df_test.copy()

        # Apply same time-based features
        df_features_test['year'] = df_features_test.index.year
        df_features_test['month'] = df_features_test.index.month
        df_features_test['day'] = df_features_test.index.day
        df_features_test['dayofweek'] = df_features_test.index.dayofweek
        df_features_test['dayofyear'] = df_features_test.index.dayofyear
        df_features_test['quarter'] = df_features_test.index.quarter
        df_features_test['week'] = df_features_test.index.isocalendar().week

        # Cyclical encoding
        df_features_test['month_sin'] = np.sin(2 * np.pi * df_features_test['month'] / 12)
        df_features_test['month_cos'] = np.cos(2 * np.pi * df_features_test['month'] / 12)
        df_features_test['dow_sin'] = np.sin(2 * np.pi * df_features_test['dayofweek'] / 7)
        df_features_test['dow_cos'] = np.cos(2 * np.pi * df_features_test['dayofweek'] / 7)
        df_features_test['doy_sin'] = np.sin(2 * np.pi * df_features_test['dayofyear'] / 365.25)
        df_features_test['doy_cos'] = np.cos(2 * np.pi * df_features_test['dayofyear'] / 365.25)

        # Binary features
        df_features_test['is_weekend'] = (df_features_test['dayofweek'] >= 5).astype(int)
        df_features_test['is_monday'] = (df_features_test['dayofweek'] == 0).astype(int)
        df_features_test['is_friday'] = (df_features_test['dayofweek'] == 4).astype(int)
        df_features_test['is_month_start'] = df_features_test.index.is_month_start.astype(int)
        df_features_test['is_month_end'] = df_features_test.index.is_month_end.astype(int)

        # For lag features in test data, we need to be careful
        # We can only use data up to the prediction point
        combined_data = pd.concat([df_features_train['calls'], df_features_test['calls']])
        for lag in [1, 2, 3, 7]:
            df_features_test[f'calls_lag_{lag}'] = combined_data.shift(lag).loc[df_features_test.index]

        # Rolling features for test (using expanded window)
        for window in [7, 14]:
            df_features_test[f'calls_mean_{window}d'] = combined_data.rolling(window).mean().loc[df_features_test.index]
            df_features_test[f'calls_std_{window}d'] = combined_data.rolling(window).std().loc[df_features_test.index]

        # Market features for test (using training thresholds)
        if '^VIX_close' in df_features_test.columns and 'vix_high' in market_features:
            df_features_test['vix_high'] = (df_features_test['^VIX_close'] > train_vix_high_threshold).astype(int)
            df_features_test['vix_spike'] = (df_features_test['^VIX_close'].pct_change() > 0.2).astype(int)

        if 'SPY_close' in df_features_test.columns and 'spy_returns' in market_features:
            df_features_test['spy_returns'] = df_features_test['SPY_close'].pct_change()
            df_features_test['market_stress'] = (df_features_test['spy_returns'] < -0.02).astype(int)

        print(f"✅ Applied same transformations to test data")
        return df_features_train, df_features_test

    return df_features_train, None

print("🔒 Feature engineering will be applied to each CV split individually")
print("📊 This prevents any future information from leaking into the models")

# ============================================================================
# CELL 6: Baseline Models
# ============================================================================

class BaselineModels:
    """Collection of simple baseline forecasting models"""

    def __init__(self):
        self.models = {}

    def fit_naive(self, y_train):
        """Naive forecast: last value"""
        self.models['naive'] = y_train.iloc[-1]
        return self

    def fit_seasonal_naive(self, y_train, season_length=7):
        """Seasonal naive: last value from same season"""
        self.models['seasonal_naive'] = {
            'values': y_train.iloc[-season_length:],
            'season_length': season_length
        }
        return self

    def fit_mean(self, y_train):
        """Mean forecast: historical average"""
        self.models['mean'] = y_train.mean()
        return self

    def fit_drift(self, y_train):
        """Drift forecast: linear trend from first to last"""
        n = len(y_train)
        if n > 1:
            slope = (y_train.iloc[-1] - y_train.iloc[0]) / (n - 1)
            self.models['drift'] = {
                'last_value': y_train.iloc[-1],
                'slope': slope
            }
        else:
            self.models['drift'] = {'last_value': y_train.iloc[-1], 'slope': 0}
        return self

    def predict(self, steps, model_type='naive'):
        """Generate forecasts"""
        if model_type == 'naive':
            return np.full(steps, self.models['naive'])

        elif model_type == 'seasonal_naive':
            model_info = self.models['seasonal_naive']
            season_values = model_info['values'].values
            season_length = model_info['season_length']
            forecasts = []
            for i in range(steps):
                forecasts.append(season_values[i % season_length])
            return np.array(forecasts)

        elif model_type == 'mean':
            return np.full(steps, self.models['mean'])

        elif model_type == 'drift':
            model_info = self.models['drift']
            last_value = model_info['last_value']
            slope = model_info['slope']
            return np.array([last_value + slope * (i + 1) for i in range(steps)])

        else:
            raise ValueError(f"Unknown model type: {model_type}")

def fit_traditional_models(y_train, forecast_steps):
    """Fit traditional time series models"""

    results = {}

    # 1. Exponential Smoothing (Holt-Winters)
    try:
        hw_model = ExponentialSmoothing(
            y_train,
            seasonal='add',
            seasonal_periods=7,
            trend='add'
        ).fit()
        hw_forecast = hw_model.forecast(steps=forecast_steps)
        results['holt_winters'] = hw_forecast
    except Exception as e:
        print(f"Holt-Winters failed: {e}")
        results['holt_winters'] = np.full(forecast_steps, y_train.mean())

    # 2. SARIMA
    try:
        # Simple SARIMA(1,1,1)(1,1,1,7) - adjust based on your ACF/PACF analysis
        sarima_model = SARIMAX(
            y_train,
            order=(1, 1, 1),
            seasonal_order=(1, 1, 1, 7),
            enforce_stationarity=False,
            enforce_invertibility=False
        ).fit(disp=False)
        sarima_forecast = sarima_model.forecast(steps=forecast_steps)
        results['sarima'] = sarima_forecast
    except Exception as e:
        print(f"SARIMA failed: {e}")
        results['sarima'] = np.full(forecast_steps, y_train.mean())

    # 3. Prophet (if available)
    if PROPHET_AVAILABLE:
        try:
            prophet_df = pd.DataFrame({
                'ds': y_train.index,
                'y': y_train.values
            })

            prophet_model = Prophet(
                daily_seasonality=False,
                weekly_seasonality=True,
                yearly_seasonality=True,
                changepoint_prior_scale=0.05
            )

            prophet_model.fit(prophet_df)

            future_dates = pd.date_range(
                start=y_train.index[-1] + pd.Timedelta(days=1),
                periods=forecast_steps,
                freq='D'
            )

            future_df = pd.DataFrame({'ds': future_dates})
            prophet_forecast = prophet_model.predict(future_df)['yhat'].values
            results['prophet'] = prophet_forecast

        except Exception as e:
            print(f"Prophet failed: {e}")
            results['prophet'] = np.full(forecast_steps, y_train.mean())

    return results

# ============================================================================
# CELL 7: Machine Learning Models
# ============================================================================

def prepare_ml_features(df_features, target_col='calls'):
    """Prepare features for ML models"""

    # Select feature columns (exclude target and non-predictive columns)
    feature_cols = [col for col in df_features.columns
                   if col != target_col and not col.startswith('calls_lag')]

    # Add lag features back (they're predictive)
    lag_cols = [col for col in df_features.columns if col.startswith('calls_lag')]
    feature_cols.extend(lag_cols)

    X = df_features[feature_cols].copy()
    y = df_features[target_col].copy()

    # Remove rows with any NaN values
    complete_mask = ~(X.isna().any(axis=1) | y.isna())
    X_clean = X[complete_mask]
    y_clean = y[complete_mask]

    return X_clean, y_clean, feature_cols

def fit_ml_models(X_train, y_train, X_test):
    """Fit machine learning models"""

    results = {}

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 1. Linear Regression
    try:
        lr_model = LinearRegression()
        lr_model.fit(X_train_scaled, y_train)
        lr_pred = lr_model.predict(X_test_scaled)
        results['linear_regression'] = lr_pred
    except Exception as e:
        print(f"Linear Regression failed: {e}")
        results['linear_regression'] = np.full(len(X_test), y_train.mean())

    # 2. Random Forest
    try:
        rf_model = RandomForestRegressor(
            n_estimators=100,
            max_depth=10,
            random_state=42,
            n_jobs=-1
        )
        rf_model.fit(X_train, y_train)
        rf_pred = rf_model.predict(X_test)
        results['random_forest'] = rf_pred
    except Exception as e:
        print(f"Random Forest failed: {e}")
        results['random_forest'] = np.full(len(X_test), y_train.mean())

    # 3. Gradient Boosting
    try:
        gb_model = GradientBoostingRegressor(
            n_estimators=100,
            max_depth=6,
            learning_rate=0.1,
            random_state=42
        )
        gb_model.fit(X_train, y_train)
        gb_pred = gb_model.predict(X_test)
        results['gradient_boosting'] = gb_pred
    except Exception as e:
        print(f"Gradient Boosting failed: {e}")
        results['gradient_boosting'] = np.full(len(X_test), y_train.mean())

    return results, scaler

print("🤖 MODEL DEFINITIONS READY")
print("=" * 50)
print("✅ Baseline models: Naive, Seasonal Naive, Mean, Drift")
print("✅ Traditional models: Holt-Winters, SARIMA, Prophet")
print("✅ ML models: Linear Regression, Random Forest, Gradient Boosting")

# ============================================================================
# CELL 7.5: Adaptive Models for High Regime Change Data
# ============================================================================

class AdaptiveModels:
    """Models specifically designed for high regime change environments"""

    def __init__(self):
        self.models = {}

    def fit_exponential_smoothing_adaptive(self, y_train, alpha=0.8):
        """High-alpha exponential smoothing for rapid adaptation"""
        if len(y_train) < 2:
            self.models['exp_smooth_adaptive'] = y_train.iloc[-1]
            return self

        # Simple exponential smoothing with high alpha
        smoothed = [y_train.iloc[0]]
        for i in range(1, len(y_train)):
            smoothed.append(alpha * y_train.iloc[i] + (1 - alpha) * smoothed[-1])

        self.models['exp_smooth_adaptive'] = smoothed[-1]
        return self

    def fit_moving_window_mean(self, y_train, window=3):
        """Very short moving window mean"""
        if len(y_train) < window:
            self.models['moving_window'] = y_train.mean()
        else:
            self.models['moving_window'] = y_train.tail(window).mean()
        return self

    def fit_weighted_recent(self, y_train, weights=None):
        """Weighted average with heavy emphasis on recent data"""
        if weights is None:
            # Exponentially decaying weights
            n = min(7, len(y_train))  # Use last 7 days max
            weights = np.exp(np.linspace(-2, 0, n))
            weights = weights / weights.sum()

        if len(y_train) >= len(weights):
            recent_data = y_train.tail(len(weights))
            self.models['weighted_recent'] = np.average(recent_data, weights=weights)
        else:
            self.models['weighted_recent'] = y_train.mean()
        return self

    def fit_linear_trend_short(self, y_train, window=5):
        """Linear trend on very short window"""
        if len(y_train) < window:
            window = len(y_train)

        recent_data = y_train.tail(window)
        if len(recent_data) > 1:
            x = np.arange(len(recent_data))
            slope, intercept = np.polyfit(x, recent_data.values, 1)
            # Forecast one step ahead
            self.models['linear_trend_short'] = slope * len(recent_data) + intercept
        else:
            self.models['linear_trend_short'] = recent_data.iloc[-1]
        return self

    def predict(self, steps, model_type):
        """Generate adaptive forecasts"""
        base_value = self.models.get(model_type, 0)

        if model_type == 'linear_trend_short':
            # For linear trend, we already computed one step ahead
            return np.full(steps, base_value)
        else:
            # For other models, repeat the fitted value
            return np.full(steps, base_value)

def fit_adaptive_models(y_train, forecast_steps):
    """Fit adaptive models designed for regime change environments"""

    results = {}

    # 1. High-alpha exponential smoothing
    adaptive = AdaptiveModels()
    adaptive.fit_exponential_smoothing_adaptive(y_train, alpha=0.9)
    results['adaptive_exp_smooth'] = adaptive.predict(forecast_steps, 'exp_smooth_adaptive')

    # 2. Very short moving window
    adaptive.fit_moving_window_mean(y_train, window=3)
    results['adaptive_moving_3d'] = adaptive.predict(forecast_steps, 'moving_window')

    # 3. Weighted recent observations
    adaptive.fit_weighted_recent(y_train)
    results['adaptive_weighted'] = adaptive.predict(forecast_steps, 'weighted_recent')

    # 4. Short linear trend
    adaptive.fit_linear_trend_short(y_train, window=5)
    results['adaptive_linear_trend'] = adaptive.predict(forecast_steps, 'linear_trend_short')

    # 5. Damped trend (conservative)
    try:
        from statsmodels.tsa.holtwinters import ExponentialSmoothing
        damped_model = ExponentialSmoothing(
            y_train.tail(14),  # Use only last 14 days
            trend='add',
            damped_trend=True
        ).fit()
        results['adaptive_damped_trend'] = damped_model.forecast(steps=forecast_steps)
    except:
        results['adaptive_damped_trend'] = np.full(forecast_steps, y_train.tail(7).mean())

    return results

print("🔄 ADAPTIVE MODELS READY")
print("=" * 50)
print("✅ Adaptive models for high regime change environments:")
print("   • High-alpha exponential smoothing (α=0.9)")
print("   • Very short moving windows (3 days)")
print("   • Weighted recent observations")
print("   • Short-term linear trends (5 days)")
print("   • Damped trend models")

# ============================================================================
# CELL 8: Model Evaluation and Comparison
# ============================================================================

def run_comprehensive_evaluation():
    """Run all models on all CV splits with proper data leakage prevention and relative performance"""

    print("🎯 RUNNING LEAKAGE-FREE MODEL EVALUATION")
    print("=" * 50)
    print("🔒 Features engineered separately for each split")
    print("📊 Results shown relative to Seasonal Naive baseline")

    all_results = []

    for split_idx, split in enumerate(cv_splits):
        print(f"\n📊 Evaluating Split {split_idx + 1}/{len(cv_splits)}")
        print("-" * 30)

        # Get raw train/test data (before feature engineering)
        train_data_raw = df.loc[split['train_idx']]
        test_data_raw = df.loc[split['test_idx']]

        # Limit training data for LONG-TERM regime changes (not daily volatility)
        if len(train_data_raw) > 90:  # Use only last 90 days for structural changes
            train_data_raw = train_data_raw.tail(90)
            print(f"  ⚠️  Limited training to last 90 days due to long-term regime changes")
        elif len(train_data_raw) > 60:  # Fallback to 60 days
            train_data_raw = train_data_raw.tail(60)
            print(f"  ⚠️  Limited training to last 60 days due to regime changes")

        print(f"  📅 Using {len(train_data_raw)} days for training (recent regime only)")

        # Apply feature engineering ONLY to this split's data
        train_features, test_features = create_features_no_leakage(train_data_raw, test_data_raw)

        y_train = train_features['calls']
        y_test = test_data_raw['calls'].values
        forecast_steps = len(test_data_raw)

        # 1. BASELINE MODELS (including Seasonal Naive for reference)
        print("  🔵 Fitting baseline models...")
        baseline = BaselineModels()
        baseline.fit_naive(y_train)
        baseline.fit_seasonal_naive(y_train, season_length=7)
        baseline.fit_mean(y_train)
        baseline.fit_drift(y_train)

        # Get seasonal naive prediction for relative comparison
        seasonal_naive_pred = baseline.predict(forecast_steps, 'seasonal_naive')

        # Evaluate all baseline models
        baseline_models = ['naive', 'seasonal_naive', 'mean', 'drift']
        for model_name in baseline_models:
            pred = baseline.predict(forecast_steps, model_name)
            metrics = evaluate_forecast_relative(y_test, pred, seasonal_naive_pred, f"baseline_{model_name}")
            metrics['split'] = split_idx + 1
            all_results.append(metrics)

        # 2. ADAPTIVE MODELS (designed for regime changes)
        print("  🟠 Fitting adaptive models...")
        adaptive_results = fit_adaptive_models(y_train, forecast_steps)

        for model_name, pred in adaptive_results.items():
            metrics = evaluate_forecast_relative(y_test, pred, seasonal_naive_pred, model_name)
            metrics['split'] = split_idx + 1
            all_results.append(metrics)

        # 3. TRADITIONAL MODELS (with short windows)
        print("  🟢 Fitting traditional models (short window)...")
        try:
            traditional_results = fit_traditional_models(y_train, forecast_steps)

            for model_name, pred in traditional_results.items():
                metrics = evaluate_forecast_relative(y_test, pred, seasonal_naive_pred, f"{model_name}_short")
                metrics['split'] = split_idx + 1
                all_results.append(metrics)
        except Exception as e:
            print(f"    Traditional models failed: {e}")

        # 4. ML MODELS (with market-enhanced features)
        print("  🟡 Fitting market-enhanced ML models...")
        try:
            # Prepare ML features INCLUDING MARKET DATA (FIXED VERSION)
            all_feature_cols = [col for col in train_features.columns if col not in ['calls']]

            # Show what features are available
            market_cols = [col for col in all_feature_cols if any(market in col for market in
                          ['^VIX', 'SPY', 'QQQ', 'BTC', 'ETH', 'DX-Y', 'GC=F', 'vix_', 'market_', 'spy_', 'btc_', 'crypto_'])]

            print(f"    📊 Available features: {len(all_feature_cols)} total")
            print(f"    📈 Market features found: {len(market_cols)}")
            if market_cols:
                print(f"    🎯 Market features: {market_cols[:5]}{'...' if len(market_cols) > 5 else ''}")

            # Use ALL available features (including market data)
            X_train_ml = train_features[all_feature_cols].dropna()
            y_train_ml = train_features.loc[X_train_ml.index, 'calls']
            X_test_ml = test_features[all_feature_cols].reindex(test_data_raw.index).dropna()

            print(f"    🔧 ML training data: {X_train_ml.shape} (features: {X_train_ml.shape[1]})")

            if len(X_train_ml) > 5 and len(X_test_ml) > 0:
                y_test_aligned = test_data_raw.loc[X_test_ml.index, 'calls'].values
                seasonal_naive_aligned = seasonal_naive_pred[:len(y_test_aligned)]

                ml_results, scaler = fit_ml_models(X_train_ml, y_train_ml, X_test_ml)

                for model_name, pred in ml_results.items():
                    if len(pred) == len(y_test_aligned):
                        # Clear naming to show market enhancement
                        enhanced_model_name = f"{model_name}_MARKET_ENHANCED"
                        metrics = evaluate_forecast_relative(y_test_aligned, pred, seasonal_naive_aligned, enhanced_model_name)
                        metrics['split'] = split_idx + 1
                        all_results.append(metrics)

                print(f"    ✅ Market-enhanced ML models: {len(ml_results)} completed")
            else:
                print(f"    ❌ Insufficient ML data: train={len(X_train_ml)}, test={len(X_test_ml)}")

        except Exception as e:
            print(f"    ❌ Market-enhanced ML failed: {e}")
            import traceback
            print(f"    🔍 Error details: {traceback.format_exc()[-200:]}")  # Last 200 chars of error

    # Convert to DataFrame and analyze
    results_df = pd.DataFrame(all_results)

    if len(results_df) == 0:
        print("❌ No results generated!")
        return None, None

    # Calculate average performance across splits
    avg_results = results_df.groupby('model').agg({
        'mae': ['mean', 'std'],
        'mape': ['mean', 'std'],
        'rmse': ['mean', 'std'],
        'r2': ['mean', 'std'],
        'improvement_pct': ['mean', 'std'],
        'mape_vs_baseline': ['mean', 'std']
    }).round(3)

    # Flatten column names
    avg_results.columns = [f"{metric}_{stat}" for metric, stat in avg_results.columns]

    # Sort by improvement over baseline
    avg_results = avg_results.sort_values('improvement_pct_mean', ascending=False)

    print("\n🏆 FINAL MODEL RANKINGS (Relative to Seasonal Naive)")
    print("=" * 60)

    # Business-friendly results table
    display_results = pd.DataFrame({
        'Model': avg_results.index,
        'MAPE (%)': avg_results['mape_mean'].round(1),
        'vs Seasonal Naive': avg_results['improvement_pct_mean'].round(1).astype(str) + '%',
        'Consistency (± std)': avg_results['improvement_pct_std'].round(1).astype(str) + '%',
        'Status': ['Better' if x > 0 else 'Worse' for x in avg_results['improvement_pct_mean']]
    })

    print(display_results.to_string(index=False))

    # Highlight key findings
    print(f"\n📋 KEY BUSINESS INSIGHTS:")
    print("=" * 30)

    # Find best model
    best_model = avg_results.index[0]
    best_improvement = avg_results.iloc[0]['improvement_pct_mean']

    if best_improvement > 10:
        print(f"✅ WINNER: {best_model}")
        print(f"   • {best_improvement:.1f}% better than seasonal naive")
        print(f"   • Clear performance advantage")
    elif best_improvement > 0:
        print(f"✅ MODEST WINNER: {best_model}")
        print(f"   • {best_improvement:.1f}% better than seasonal naive")
        print(f"   • Small but consistent improvement")
    else:
        print(f"❌ NO CLEAR WINNER")
        print(f"   • Best model: {best_model} ({best_improvement:.1f}% vs baseline)")
        print(f"   • Seasonal naive is hard to beat!")

    # Count how many models beat baseline
    better_models = (avg_results['improvement_pct_mean'] > 0).sum()
    total_models = len(avg_results)

    print(f"\n📊 OVERALL ASSESSMENT:")
    print(f"   • Models beating seasonal naive: {better_models}/{total_models}")
    print(f"   • Average improvement range: {avg_results['improvement_pct_mean'].min():.1f}% to {avg_results['improvement_pct_mean'].max():.1f}%")

    if better_models < total_models / 2:
        print(f"   ⚠️ WARNING: Most complex models underperform simple baseline")
        print(f"   💡 INSIGHT: High regime changes favor simple adaptive models")

    # Adaptive models performance
    adaptive_models = [idx for idx in avg_results.index if 'adaptive' in idx]
    if adaptive_models:
        adaptive_improvements = [avg_results.loc[model, 'improvement_pct_mean'] for model in adaptive_models]
        avg_adaptive_improvement = np.mean(adaptive_improvements)
        print(f"\n🔄 ADAPTIVE MODELS ASSESSMENT:")
        print(f"   • Average improvement: {avg_adaptive_improvement:.1f}%")

        if avg_adaptive_improvement > 5:
            print(f"   ✅ Adaptive models are working well for regime changes")
        elif avg_adaptive_improvement > 0:
            print(f"   ⚠️ Adaptive models show modest improvement")
        else:
            print(f"   ❌ Even adaptive models struggle with this data")

    return results_df, avg_results

def display_business_summary(avg_results):
    """Create a business-friendly summary of results"""

    print(f"\n💼 EXECUTIVE SUMMARY")
    print("=" * 40)

    if avg_results is None:
        print("❌ No results to summarize")
        return

    best_model = avg_results.index[0]
    best_improvement = avg_results.iloc[0]['improvement_pct_mean']
    best_mape = avg_results.iloc[0]['mape_mean']

    # Find seasonal naive performance for context
    seasonal_naive_row = avg_results[avg_results.index.str.contains('seasonal_naive', case=False)]
    if len(seasonal_naive_row) > 0:
        baseline_mape = seasonal_naive_row.iloc[0]['mape_mean']
    else:
        baseline_mape = "Unknown"

    print(f"📊 BOTTOM LINE:")
    print(f"   • Best performing model: {best_model}")
    print(f"   • Performance: {best_mape:.1f}% MAPE")
    print(f"   • Improvement over simple baseline: {best_improvement:.1f}%")
    print(f"   • Seasonal naive baseline: {baseline_mape:.1f}% MAPE" if baseline_mape != "Unknown" else "")

    print(f"\n🎯 BUSINESS RECOMMENDATION:")
    if best_improvement > 15:
        print(f"   ✅ IMPLEMENT {best_model}")
        print(f"   • Significant improvement over baseline")
        print(f"   • Justifies implementation complexity")
    elif best_improvement > 5:
        print(f"   ⚠️ CONSIDER {best_model}")
        print(f"   • Modest improvement over baseline")
        print(f"   • Evaluate implementation cost vs benefit")
    elif best_improvement > 0:
        print(f"   ⚠️ STICK WITH SEASONAL NAIVE")
        print(f"   • Complex models provide minimal benefit")
        print(f"   • Simple baseline is nearly optimal")
    else:
        print(f"   ❌ STICK WITH SEASONAL NAIVE")
        print(f"   • Complex models perform worse than baseline")
        print(f"   • High regime changes make forecasting extremely difficult")

    print(f"\n📈 OPERATIONAL INSIGHT:")
    print(f"   • Your data has extreme volatility (confirmed by regime analysis)")
    print(f"   • Focus on rapid adaptation rather than prediction accuracy")
    print(f"   • Use forecasts for directional guidance, not precise planning")

# Run the comprehensive evaluation
print("🚀 Starting comprehensive leakage-free evaluation...")
if cv_splits:
    results_df, avg_results = run_comprehensive_evaluation()

    if avg_results is not None:
        display_business_summary(avg_results)
else:
    print("❌ No CV splits available - cannot run evaluation")
    results_df, avg_results = None, None

# ============================================================================
# CELL 9: Ensemble Methods (FIXED - No Data Leakage)
# ============================================================================

def create_ensemble_forecasts():
    """Create ensemble forecasts combining multiple models - FULLY FIXED VERSION"""

    print("\n🔗 CREATING ENSEMBLE FORECASTS (LEAKAGE-FREE)")
    print("=" * 50)

    if results_df is None or len(results_df) == 0:
        print("❌ No base model results available for ensembling")
        return None, None

    print("⚠️  Using short-window ensembles due to regime changes")
    print("🔒 Applying same leakage-free approach as main evaluation")

    ensemble_results = []

    for split_idx, split in enumerate(cv_splits):
        print(f"\n📊 Ensemble for Split {split_idx + 1}")

        # Get RAW train/test data (same approach as main evaluation)
        train_data_raw = df.loc[split['train_idx']]
        test_data_raw = df.loc[split['test_idx']]

        # Limit training data to last 10 days for ensembles
        if len(train_data_raw) > 10:
            train_data_raw = train_data_raw.tail(10)
            print(f"  📅 Limited to last {len(train_data_raw)} days for training")

        # Apply feature engineering ONLY to this split (no data leakage)
        train_features, test_features = create_features_no_leakage(train_data_raw, test_data_raw)

        y_train = train_features['calls']
        y_test = test_data_raw['calls'].values
        forecast_steps = len(test_data_raw)

        # Collect all model predictions
        predictions = {}

        print("  🔵 Baseline models for ensemble...")
        # Baseline models
        baseline = BaselineModels()
        baseline.fit_naive(y_train)
        baseline.fit_seasonal_naive(y_train, season_length=7)
        baseline.fit_mean(y_train)
        baseline.fit_drift(y_train)

        predictions['naive'] = baseline.predict(forecast_steps, 'naive')
        predictions['seasonal_naive'] = baseline.predict(forecast_steps, 'seasonal_naive')
        predictions['mean'] = baseline.predict(forecast_steps, 'mean')
        predictions['drift'] = baseline.predict(forecast_steps, 'drift')

        print("  🟠 Adaptive models for ensemble...")
        # Adaptive models
        try:
            adaptive_results = fit_adaptive_models(y_train, forecast_steps)
            predictions.update(adaptive_results)
        except Exception as e:
            print(f"    Adaptive models failed: {e}")

        print("  🟢 Traditional models for ensemble...")
        # Traditional models (if they work)
        try:
            traditional_results = fit_traditional_models(y_train, forecast_steps)
            for model_name, pred in traditional_results.items():
                predictions[f"{model_name}_short"] = pred
        except Exception as e:
            print(f"    Traditional models failed: {e}")

        # Get seasonal naive baseline for relative evaluation
        seasonal_naive_pred = predictions['seasonal_naive']

        # Create ensembles
        valid_predictions = {name: pred for name, pred in predictions.items()
                           if len(pred) == forecast_steps and not np.any(np.isnan(pred))}

        if len(valid_predictions) > 1:
            pred_array = np.array(list(valid_predictions.values()))

            print(f"  🔗 Creating ensembles from {len(valid_predictions)} models...")

            # 1. Simple average ensemble
            ensemble_mean = np.mean(pred_array, axis=0)
            metrics = evaluate_forecast_relative(y_test, ensemble_mean, seasonal_naive_pred, "ensemble_mean")
            metrics['split'] = split_idx + 1
            ensemble_results.append(metrics)

            # 2. Median ensemble (robust to outliers)
            ensemble_median = np.median(pred_array, axis=0)
            metrics = evaluate_forecast_relative(y_test, ensemble_median, seasonal_naive_pred, "ensemble_median")
            metrics['split'] = split_idx + 1
            ensemble_results.append(metrics)

            # 3. Adaptive-only ensemble (if we have adaptive models)
            adaptive_preds = {name: pred for name, pred in valid_predictions.items() if 'adaptive' in name}
            if len(adaptive_preds) > 1:
                adaptive_array = np.array(list(adaptive_preds.values()))
                ensemble_adaptive = np.mean(adaptive_array, axis=0)
                metrics = evaluate_forecast_relative(y_test, ensemble_adaptive, seasonal_naive_pred, "ensemble_adaptive_only")
                metrics['split'] = split_idx + 1
                ensemble_results.append(metrics)
        else:
            print(f"  ⚠️  Not enough valid models for ensembling")

    # Add ensemble results to main results
    if ensemble_results:
        ensemble_df = pd.DataFrame(ensemble_results)
        combined_results = pd.concat([results_df, ensemble_df], ignore_index=True)

        # Recalculate averages including ensembles
        avg_with_ensemble = combined_results.groupby('model').agg({
            'mae': ['mean', 'std'],
            'mape': ['mean', 'std'],
            'rmse': ['mean', 'std'],
            'r2': ['mean', 'std'],
            'improvement_pct': ['mean', 'std'],
            'mape_vs_baseline': ['mean', 'std']
        }).round(3)

        avg_with_ensemble.columns = [f"{metric}_{stat}" for metric, stat in avg_with_ensemble.columns]
        avg_with_ensemble = avg_with_ensemble.sort_values('improvement_pct_mean', ascending=False)

        print("\n🏆 UPDATED RANKINGS WITH ENSEMBLES")
        print("=" * 50)

        # Business-friendly display
        display_results = pd.DataFrame({
            'Model': avg_with_ensemble.index,
            'MAPE (%)': avg_with_ensemble['mape_mean'].round(1),
            'vs Seasonal Naive': avg_with_ensemble['improvement_pct_mean'].round(1).astype(str) + '%',
            'Consistency (± std)': avg_with_ensemble['improvement_pct_std'].round(1).astype(str) + '%',
            'Status': ['Better' if x > 0 else 'Worse' for x in avg_with_ensemble['improvement_pct_mean']]
        })

        print(display_results.to_string(index=False))

        # Highlight ensemble performance
        ensemble_models = [idx for idx in avg_with_ensemble.index if 'ensemble' in idx]
        if ensemble_models:
            print(f"\n🔗 ENSEMBLE ANALYSIS:")
            best_ensemble = None
            best_improvement = -999

            for model in ensemble_models:
                improvement = avg_with_ensemble.loc[model, 'improvement_pct_mean']
                print(f"   • {model}: {improvement:.1f}% vs seasonal naive")
                if improvement > best_improvement:
                    best_improvement = improvement
                    best_ensemble = model

            if best_improvement > 5:
                print(f"\n✅ ENSEMBLE SUCCESS: {best_ensemble} shows {best_improvement:.1f}% improvement")
                print("   💡 Model combination is adding value!")
            elif best_improvement > 0:
                print(f"\n⚠️ MODEST ENSEMBLE GAIN: {best_ensemble} shows {best_improvement:.1f}% improvement")
                print("   💡 Small but consistent ensemble benefit")
            else:
                print(f"\n❌ ENSEMBLE UNDERPERFORMING: Best ensemble {best_improvement:.1f}% vs baseline")
                print("   💡 Individual models may be better than combinations")

        return combined_results, avg_with_ensemble

    else:
        print("❌ No ensemble results generated")
        return results_df, avg_results if 'avg_results' in globals() else None

# Create ensembles
if results_df is not None:
    final_results_df, final_avg_results = create_ensemble_forecasts()
else:
    final_results_df, final_avg_results = None, None

# ============================================================================
# CELL 10: Model Diagnostics and Residual Analysis
# ============================================================================

def analyze_best_model_residuals():
    """Analyze residuals of the best performing model - FIXED for leakage-free approach"""

    print("\n🔬 RESIDUAL ANALYSIS OF BEST MODEL")
    print("=" * 50)

    if final_avg_results is None:
        print("❌ No model results available for analysis")
        return

    # Get best model
    best_model = final_avg_results.index[0]
    print(f"🏆 Analyzing best model: {best_model}")

    # Use the raw dataset for final model fitting and residual analysis
    # Split into train/test (last 30 days for testing)
    split_date = df.index[-31] if len(df) > 31 else df.index[len(df)//2]  # Fallback if less than 31 days
    train_data_raw = df[df.index < split_date]
    test_data_raw = df[df.index >= split_date]

    # Apply feature engineering to this final split (consistent with CV approach)
    train_features, test_features = create_features_no_leakage(train_data_raw, test_data_raw)

    y_train = train_features['calls']
    y_test = test_data_raw['calls']

    print(f"📊 Using {len(y_train)} days for training, {len(y_test)} days for testing")

    # Fit the best model using same approach as CV
    if best_model.startswith('baseline_'):
        model_type = best_model.replace('baseline_', '')
        baseline = BaselineModels()
        baseline.fit_naive(y_train)
        baseline.fit_seasonal_naive(y_train, season_length=7)
        baseline.fit_mean(y_train)
        baseline.fit_drift(y_train)

        # Get in-sample predictions for residual analysis
        if model_type == 'seasonal_naive':
            # For seasonal naive, use the pattern on training data
            residuals = []
            season_values = y_train.iloc[-7:].values
            for i in range(len(y_train)):
                expected = season_values[i % 7]
                residuals.append(y_train.iloc[i] - expected)
            residuals = np.array(residuals)
        else:
            # Simple residuals for other baseline models
            if model_type == 'naive':
                pred_train = np.full(len(y_train), y_train.iloc[-1])
            elif model_type == 'mean':
                pred_train = np.full(len(y_train), y_train.mean())
            else:  # drift
                n = len(y_train)
                slope = (y_train.iloc[-1] - y_train.iloc[0]) / (n - 1) if n > 1 else 0
                pred_train = np.array([y_train.iloc[0] + slope * i for i in range(n)])

            residuals = y_train.values - pred_train

        test_pred = baseline.predict(len(y_test), model_type)

    elif 'adaptive' in best_model:
        # Adaptive model residuals
        adaptive_results = fit_adaptive_models(y_train, len(y_test))
        test_pred = adaptive_results.get(best_model, np.full(len(y_test), y_train.mean()))

        # Simplified residuals for adaptive models
        residuals = y_train.values - y_train.mean()

    elif best_model in ['holt_winters_short', 'sarima_short', 'prophet_short']:
        base_model = best_model.replace('_short', '')
        traditional_results = fit_traditional_models(y_train, len(y_test))
        test_pred = traditional_results.get(base_model, np.full(len(y_test), y_train.mean()))

        # Simplified residuals for traditional models
        residuals = y_train.values - y_train.mean()

    elif 'market_enhanced' in best_model:
        # Market-enhanced ML model
        try:
            # Use same feature preparation as in CV
            feature_cols = [col for col in train_features.columns if col not in ['calls']]
            market_feature_priorities = ['^VIX_close', 'SPY_close', 'QQQ_close', 'BTC-USD_close']
            market_derived_features = [col for col in train_features.columns if any(x in col.lower() for x in
                                     ['vix_high', 'market_stress', 'spy_returns'])]
            feature_cols.extend(market_derived_features)

            X_train_ml = train_features[feature_cols].dropna()
            y_train_ml = train_features.loc[X_train_ml.index, 'calls']
            X_test_ml = test_features[feature_cols].reindex(test_data_raw.index).dropna()

            if len(X_train_ml) > 5 and len(X_test_ml) > 0:
                ml_results, scaler = fit_ml_models(X_train_ml, y_train_ml, X_test_ml)
                base_model_name = best_model.replace('_market_enhanced', '')

                if base_model_name in ml_results:
                    test_pred = ml_results[base_model_name]
                    # Use model's fitted values for residuals if available
                    residuals = y_train_ml.values - y_train_ml.mean()  # Simplified
                else:
                    test_pred = np.full(len(y_test), y_train.mean())
                    residuals = y_train.values - y_train.mean()
            else:
                test_pred = np.full(len(y_test), y_train.mean())
                residuals = y_train.values - y_train.mean()
        except Exception as e:
            print(f"⚠️ ML model analysis failed: {e}")
            test_pred = np.full(len(y_test), y_train.mean())
            residuals = y_train.values - y_train.mean()

    else:
        # Fallback for other models
        residuals = y_train.values - y_train.mean()
        test_pred = np.full(len(y_test), y_train.mean())

    # Remove NaN residuals
    residuals_clean = residuals[~np.isnan(residuals)]

    if len(residuals_clean) == 0:
        print("❌ No valid residuals for analysis")
        return

    # Residual analysis plots
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))

    # 1. Residuals over time
    axes[0,0].plot(residuals_clean)
    axes[0,0].set_title('Residuals Over Time')
    axes[0,0].set_ylabel('Residuals')
    axes[0,0].grid(True, alpha=0.3)
    axes[0,0].axhline(y=0, color='r', linestyle='--', alpha=0.5)

    # 2. Residual distribution
    axes[0,1].hist(residuals_clean, bins=30, alpha=0.7, edgecolor='black')
    axes[0,1].set_title('Residual Distribution')
    axes[0,1].set_xlabel('Residuals')
    axes[0,1].set_ylabel('Frequency')
    axes[0,1].grid(True, alpha=0.3)

    # 3. Q-Q plot
    stats.probplot(residuals_clean, dist="norm", plot=axes[0,2])
    axes[0,2].set_title('Q-Q Plot (Normality Check)')
    axes[0,2].grid(True, alpha=0.3)

    # 4. ACF of residuals
    if len(residuals_clean) > 20:
        plot_acf(residuals_clean, ax=axes[1,0], lags=min(20, len(residuals_clean)//4))
        axes[1,0].set_title('ACF of Residuals')
    else:
        axes[1,0].text(0.5, 0.5, 'Insufficient data\nfor ACF plot', ha='center', va='center', transform=axes[1,0].transAxes)
        axes[1,0].set_title('ACF of Residuals (insufficient data)')

    # 5. Residuals vs fitted (simplified)
    fitted_approx = y_train.values - residuals_clean[:len(y_train)]
    if len(fitted_approx) == len(residuals_clean):
        axes[1,1].scatter(fitted_approx, residuals_clean, alpha=0.6)
        axes[1,1].set_xlabel('Fitted Values')
        axes[1,1].set_ylabel('Residuals')
        axes[1,1].set_title('Residuals vs Fitted')
        axes[1,1].grid(True, alpha=0.3)
        axes[1,1].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    else:
        axes[1,1].text(0.5, 0.5, 'Cannot align\nfitted values', ha='center', va='center', transform=axes[1,1].transAxes)
        axes[1,1].set_title('Residuals vs Fitted (alignment issue)')

    # 6. Forecast vs actual
    axes[1,2].plot(y_test.index, y_test.values, label='Actual', linewidth=2)
    if len(test_pred) == len(y_test):
        axes[1,2].plot(y_test.index, test_pred, label='Forecast', linewidth=2, alpha=0.8)
    axes[1,2].set_title('Forecast vs Actual (Test Set)')
    axes[1,2].set_ylabel('Calls')
    axes[1,2].legend()
    axes[1,2].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # Statistical tests
    print(f"\n📊 RESIDUAL STATISTICS:")
    print(f"   Mean: {np.mean(residuals_clean):.3f}")
    print(f"   Std: {np.std(residuals_clean):.3f}")
    print(f"   Skewness: {stats.skew(residuals_clean):.3f}")
    print(f"   Kurtosis: {stats.kurtosis(residuals_clean):.3f}")

    # Normality tests
    if len(residuals_clean) > 8:  # Minimum for Shapiro test
        try:
            shapiro_stat, shapiro_p = shapiro(residuals_clean)
            jb_stat, jb_p = jarque_bera(residuals_clean)

            print(f"\n🧪 NORMALITY TESTS:")
            print(f"   Shapiro-Wilk: statistic={shapiro_stat:.4f}, p-value={shapiro_p:.4f}")
            print(f"   Jarque-Bera: statistic={jb_stat:.4f}, p-value={jb_p:.4f}")

            if shapiro_p > 0.05 and jb_p > 0.05:
                print("   ✅ Residuals appear normally distributed")
            else:
                print("   ❌ Residuals are not normally distributed")
        except Exception as e:
            print(f"\n🧪 Normality tests failed: {e}")

    # Test set performance
    if len(test_pred) == len(y_test):
        seasonal_naive_baseline = BaselineModels()
        seasonal_naive_baseline.fit_seasonal_naive(y_train, season_length=7)
        baseline_pred = seasonal_naive_baseline.predict(len(y_test), 'seasonal_naive')

        test_metrics = evaluate_forecast_relative(y_test.values, test_pred, baseline_pred, best_model)
        print(f"\n🎯 TEST SET PERFORMANCE:")
        print(f"   MAPE: {test_metrics['mape']:.2f}%")
        print(f"   MAE: {test_metrics['mae']:.1f}")
        print(f"   vs Seasonal Naive: {test_metrics['improvement_pct']:.1f}%")
        print(f"   R²: {test_metrics['r2']:.3f}")
    else:
        print(f"\n🎯 TEST SET PERFORMANCE: Unable to calculate (prediction length mismatch)")

# Run residual analysis
if final_avg_results is not None:
    analyze_best_model_residuals()

# ============================================================================
# CELL 11: Final Recommendations and Next Steps
# ============================================================================

def generate_recommendations():
    """Generate final recommendations based on results - ADAPTED FOR REGIME CHANGE DATA"""

    print("\n💡 REGIME CHANGE ADAPTED RECOMMENDATIONS")
    print("=" * 50)

    if final_avg_results is None:
        print("❌ No results available for recommendations")
        return

    best_model = final_avg_results.index[0]
    best_mape = final_avg_results.loc[best_model, 'mape_mean']
    best_r2 = final_avg_results.loc[best_model, 'r2_mean']

    print(f"🏆 BEST MODEL FOR REGIME CHANGE DATA: {best_model}")
    print(f"   MAPE: {best_mape:.2f}%")
    print(f"   R²: {best_r2:.3f}")
    print()

    # Performance assessment adapted for regime change environment
    if best_mape <= 15:
        print("✅ EXCELLENT performance given extreme regime changes")
    elif best_mape <= 25:
        print("✅ GOOD performance for high volatility environment")
    elif best_mape <= 35:
        print("⚠️ ACCEPTABLE given structural instability")
    else:
        print("❌ Consider even simpler models or different approach")

    print(f"\n📋 REGIME CHANGE SPECIFIC RECOMMENDATIONS:")

    # Check if adaptive models performed well
    adaptive_in_top3 = any('adaptive' in model for model in final_avg_results.index[:3])

    if adaptive_in_top3:
        print("   ✅ Adaptive models are working - continue with short-window approaches")
        print("   • Implement DAILY model retraining")
        print("   • Use EXPONENTIAL SMOOTHING with high alpha (0.8-0.9)")
        print("   • Focus on 1-3 day forecasts maximum")
    else:
        print("   ⚠️ Even adaptive models struggling - consider:")
        print("   • HOURLY forecasting instead of daily")
        print("   • REAL-TIME updating with streaming data")
        print("   • BUSINESS RULE-BASED forecasts during unstable periods")

    # Specific tactical recommendations
    print(f"\n🎯 IMMEDIATE TACTICAL ACTIONS:")
    print("   1. ABANDON long-term forecasting (>3 days)")
    print("   2. SET UP change point detection alerts")
    print("   3. IMPLEMENT rolling window validation (max 7 days)")
    print("   4. CREATE forecast confidence bands (very wide)")
    print("   5. ESTABLISH business escalation for detected regime changes")

    # Model-specific guidance
    if 'adaptive' in best_model:
        print(f"\n🔧 ADAPTIVE MODEL OPTIMIZATION:")
        print("   • Tune smoothing parameters daily")
        print("   • Monitor forecast accuracy hourly")
        print("   • Have fallback rules for extreme changes")

    elif 'baseline' in best_model:
        print(f"\n🔧 BASELINE MODEL SUCCESS INDICATES:")
        print("   • Data has extremely high noise-to-signal ratio")
        print("   • Simple is better in chaos")
        print("   • Focus on business process improvements vs. forecasting")

    # Risk management recommendations
    print(f"\n⚠️ RISK MANAGEMENT FOR REGIME CHANGE ENVIRONMENT:")
    print("   • BUFFER INVENTORY: Increase safety stock significantly")
    print("   • STAFFING FLEXIBILITY: Cross-train agents for rapid scaling")
    print("   • SCENARIO PLANNING: Prepare for sudden volume changes")
    print("   • EARLY WARNING SYSTEM: Real-time anomaly detection")
    print("   • BUSINESS INTELLIGENCE: Identify regime change triggers")

    # Technology recommendations
    print(f"\n🔧 TECHNOLOGY IMPROVEMENTS:")
    print("   • STREAMING ANALYTICS: Real-time model updates")
    print("   • ENSEMBLE FORECASTING: Combine multiple short-term models")
    print("   • ONLINE LEARNING: Models that adapt with each observation")
    print("   • CHANGE DETECTION: Automated structural break identification")

    print(f"\n🎯 SUCCESS METRICS FOR UNSTABLE DATA:")
    print("   • DIRECTIONAL ACCURACY: % of correct up/down predictions")
    print("   • FORECAST HORIZON: How many days ahead remain useful")
    print("   • CHANGE DETECTION SPEED: How quickly models adapt")
    print("   • BUSINESS VALUE: Impact on operational decisions")

    print(f"\n⚡ REGIME CHANGE REALITY CHECK:")
    print("   • With 37 changes in 36 days, perfect forecasting is impossible")
    print("   • Focus on RAPID ADAPTATION over accuracy")
    print("   • Invest in FLEXIBILITY over prediction precision")
    print("   • Consider if the underlying business process can be stabilized")

# Update ensemble creation for regime change data
def create_ensemble_forecasts():
    """Create ensemble forecasts - ADAPTED for regime change environments with no data leakage"""

    print("\n🔗 CREATING ADAPTIVE ENSEMBLE FORECASTS")
    print("=" * 50)

    if results_df is None or len(results_df) == 0:
        print("❌ No base model results available for ensembling")
        return results_df, avg_results if 'avg_results' in globals() else None

    print("⚠️  Using short-window ensembles due to regime changes")
    print("🔒 Applying leakage-free feature engineering for ensembles")

    ensemble_results = []

    for split_idx, split in enumerate(cv_splits):
        print(f"\n📊 Adaptive Ensemble for Split {split_idx + 1}")

        # Get raw train/test data (same as main evaluation)
        train_data_raw = df.loc[split['train_idx']]
        test_data_raw = df.loc[split['test_idx']]

        # Limit training to last 10 days for ensembles
        if len(train_data_raw) > 10:
            train_data_raw = train_data_raw.tail(10)

        # Apply feature engineering to this split's data
        train_features, test_features = create_features_no_leakage(train_data_raw, test_data_raw)

        y_train = train_features['calls']
        y_test = test_data_raw['calls'].values
        forecast_steps = len(test_data_raw)

        # Collect predictions from adaptive models only
        predictions = {}

        # Adaptive models
        adaptive_results = fit_adaptive_models(y_train, forecast_steps)
        predictions.update(adaptive_results)

        # Add best baseline models
        baseline = BaselineModels()
        baseline.fit_naive(y_train)
        baseline.fit_seasonal_naive(y_train, season_length=7)
        predictions['naive'] = baseline.predict(forecast_steps, 'naive')
        predictions['seasonal_naive'] = baseline.predict(forecast_steps, 'seasonal_naive')

        # Get seasonal naive for relative comparison
        seasonal_naive_pred = predictions['seasonal_naive']

        # Create ensembles focused on recent performance
        pred_array = np.array([pred for pred in predictions.values() if len(pred) == forecast_steps])

        if len(pred_array) > 0:
            # Median ensemble (robust to outliers - important for regime changes)
            ensemble_median = np.median(pred_array, axis=0)
            metrics = evaluate_forecast_relative(y_test, ensemble_median, seasonal_naive_pred, "ensemble_adaptive_median")
            metrics['split'] = split_idx + 1
            ensemble_results.append(metrics)

            # Weighted ensemble based on recent performance only
            if split_idx > 0:
                try:
                    # Use only adaptive models for weighting
                    adaptive_model_names = [name for name in predictions.keys() if 'adaptive' in name]
                    if len(adaptive_model_names) > 0:
                        weights = np.ones(len(adaptive_model_names)) / len(adaptive_model_names)  # Equal weights
                        adaptive_preds = [predictions[name] for name in adaptive_model_names]

                        ensemble_adaptive = np.average(adaptive_preds, axis=0, weights=weights)
                        metrics = evaluate_forecast_relative(y_test, ensemble_adaptive, seasonal_naive_pred, "ensemble_adaptive_only")
                        metrics['split'] = split_idx + 1
                        ensemble_results.append(metrics)
                except Exception as e:
                    print(f"    Adaptive ensemble failed: {e}")

    # Combine results
    if ensemble_results:
        ensemble_df = pd.DataFrame(ensemble_results)
        combined_results = pd.concat([results_df, ensemble_df], ignore_index=True)

        # Recalculate averages
        avg_with_ensemble = combined_results.groupby('model').agg({
            'mae': ['mean', 'std'],
            'mape': ['mean', 'std'],
            'rmse': ['mean', 'std'],
            'r2': ['mean', 'std'],
            'improvement_pct': ['mean', 'std'],
            'mape_vs_baseline': ['mean', 'std']
        }).round(3)

        avg_with_ensemble.columns = [f"{metric}_{stat}" for metric, stat in avg_with_ensemble.columns]
        avg_with_ensemble = avg_with_ensemble.sort_values('improvement_pct_mean', ascending=False)

        print("\n🏆 FINAL RANKINGS WITH ADAPTIVE ENSEMBLES")
        print("=" * 50)

        # Business-friendly display with ensembles
        display_results_ensemble = pd.DataFrame({
            'Model': avg_with_ensemble.index,
            'MAPE (%)': avg_with_ensemble['mape_mean'].round(1),
            'vs Seasonal Naive': avg_with_ensemble['improvement_pct_mean'].round(1).astype(str) + '%',
            'Consistency (± std)': avg_with_ensemble['improvement_pct_std'].round(1).astype(str) + '%',
            'Status': ['Better' if x > 0 else 'Worse' for x in avg_with_ensemble['improvement_pct_mean']]
        })

        print(display_results_ensemble.to_string(index=False))

        # Highlight ensemble performance
        ensemble_models = [idx for idx in avg_with_ensemble.index if 'ensemble' in idx]
        if ensemble_models:
            print(f"\n🔗 ENSEMBLE PERFORMANCE:")
            for model in ensemble_models:
                improvement = avg_with_ensemble.loc[model, 'improvement_pct_mean']
                print(f"   {model}: {improvement:.1f}% vs seasonal naive")

        return combined_results, avg_with_ensemble

    return results_df, avg_results if 'avg_results' in globals() else None

# Generate final recommendations
generate_recommendations()

print(f"\n🎉 ANALYSIS COMPLETE!")
print("=" * 50)
print("Ready to forecast the future! 📞📈")