<a href="https://colab.research.google.com/github/john-d-noble/callcenter/blob/main/XGB_Tuned.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_absolute_error
import numpy as np

# --- Configuration ---
DATA_FILE = 'fully_enriched_training_data.csv'

print("🚀 Starting the model tuning process...")
print("This may take several minutes depending on your computer's power.")

try:
    # 1. Load and Prepare Data
    df = pd.read_csv(DATA_FILE, parse_dates=['Date'])
    df.set_index('Date', inplace=True)
    df.fillna(0, inplace=True) # Fill any remaining NaNs

    # Define features (X) and target (y)
    X = df.drop('adjusted_call_volume', axis=1)
    y = df['adjusted_call_volume']

    # 2. Define the Hyperparameter Grid to Search
    # These are the "knobs" we are tuning
    param_grid = {
        'n_estimators': [100, 200, 300, 500, 700],
        'max_depth': [3, 4, 5, 6, 7],
        'learning_rate': [0.01, 0.05, 0.1, 0.2],
        'subsample': [0.7, 0.8, 0.9, 1.0],
        'colsample_bytree': [0.7, 0.8, 0.9, 1.0]
    }

    # 3. Set up the XGBoost Model and the Search
    xgb_model = xgb.XGBRegressor(objective='reg:squarederror', eval_metric='mae')

    # Use a time-series-aware cross-validation
    tscv = TimeSeriesSplit(n_splits=5)

    # RandomizedSearchCV will test 50 different combinations from the grid
    random_search = RandomizedSearchCV(
        estimator=xgb_model,
        param_distributions=param_grid,
        n_iter=50,  # Number of combinations to try
        scoring='neg_mean_absolute_error',
        cv=tscv,
        n_jobs=-1,  # Use all available CPU cores
        verbose=1,
        random_state=42
    )

    # 4. Run the Automated Tuning
    print("\n--- Searching for the best hyperparameters ---")
    random_search.fit(X, y)

    # 5. Report the Results
    print("\n--- Tuning Complete ---")
    print(f"✅ Best MAE Score found: {-random_search.best_score_:.2f}")
    print("✅ Best Hyperparameters found:")
    print(random_search.best_params_)

except FileNotFoundError:
    print(f"❌ Error: The file '{DATA_FILE}' was not found.")
except Exception as e:
    print(f"❌ An error occurred: {e}")