<a href="https://colab.research.google.com/github/john-d-noble/callcenter/blob/main/XGB_High_Performance_Model_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install yfinance



In [5]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.4-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting nvidia-nccl-cu12 (from xgboost)
  Downloading nvidia_nccl_cu12-2.27.7-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.0 kB)
Downloading xgboost-3.0.4-py3-none-manylinux_2_28_x86_64.whl (94.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading nvidia_nccl_cu12-2.27.7-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (322.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.5/322.5 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nvidia-nccl-cu12, xgboost
Successfully installed nvidia-nccl-cu12-2.27.7 xgboost-3.0.4


In [6]:
#
# MASTER SCRIPT: From Data Collection to Live Forecasting
# This script combines all steps into a single, sequential process.
#

import pandas as pd
import numpy as np
import yfinance as yf
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
from datetime import date, timedelta

# --- MASTER CONFIGURATION ---
# Data Collection & Simulation
TICKERS = ['BTC-USD', 'ETH-USD', 'SOL-USD', '^VIX']
START_DATE = '2022-06-25'

# Feature Engineering
VOLATILITY_WINDOW = 14
SPIKE_WINDOW = 30
SPIKE_THRESHOLD = 2.0

# Model Tuning
TUNING_ITERATIONS = 50 # Number of hyperparameter combinations to test

# Final Output Files
MARKET_DATA_FILE = 'crypto_price_and_volume_data_complete.csv'
SIMULATED_CALLS_FILE = 'final_simulated_data_with_volatility.csv'
ADVANCED_TRAINING_FILE = 'advanced_feature_training_data.csv'
FINAL_MODEL_FILENAME = 'final_advanced_xgboost_model.json'


# ==============================================================================
# STEP 1: DATA COLLECTION - Fetch all required price and volume data
# ==============================================================================
print("--- [STEP 1/7] Starting: Data Collection ---")
try:
    data = yf.download(TICKERS, start=START_DATE)
    prices = data['Close'].rename(columns={'BTC-USD': 'btc_price', 'ETH-USD': 'eth_price', 'SOL-USD': 'sol_price', '^VIX': 'vix_price'})
    volumes = data['Volume'].rename(columns={'BTC-USD': 'btc_volume', 'ETH-USD': 'eth_volume', 'SOL-USD': 'sol_volume', '^VIX': 'vix_volume'})
    market_data_df = pd.concat([prices, volumes], axis=1)
    market_data_df.to_csv(MARKET_DATA_FILE)
    print(f"✅ Success! Market data saved to '{MARKET_DATA_FILE}'\n")
except Exception as e:
    print(f"❌ FAILED: {e}")
    exit()

# ==============================================================================
# STEP 2: SIMULATION - Create the intelligent, volatility-driven call volume
# ==============================================================================
print(f"--- [STEP 2/7] Starting: Call Volume Simulation ---")
try:
    df_sim = pd.read_csv(MARKET_DATA_FILE, parse_dates=['Date'])

    # Baseline simulation parameters
    NUMBER_OF_AGENTS = 3000
    AVG_WEEKDAY_CALLS_PER_AGENT = 110
    AVG_WEEKEND_CALLS_PER_AGENT = 50
    RANDOMNESS_FACTOR = 15000

    def simulate_base_volume(d):
        mean_vol = NUMBER_OF_AGENTS * (AVG_WEEKDAY_CALLS_PER_AGENT if d.dayofweek < 5 else AVG_WEEKEND_CALLS_PER_AGENT)
        return abs(int(np.random.normal(loc=mean_vol, scale=RANDOMNESS_FACTOR)))
    df_sim['base_call_volume'] = df_sim['Date'].apply(simulate_base_volume)

    # Volatility adjustment parameters
    VOLATILITY_THRESHOLD = 0.05
    VOLATILITY_MULTIPLIER = 1.8
    VIX_FEAR_THRESHOLD = 30
    VIX_MULTIPLIER = 1.4

    for crypto in ['btc', 'eth', 'sol']:
        df_sim[f'{crypto}_price_pct_change'] = df_sim[f'{crypto}_price'].pct_change()

    def adjust_volume_for_volatility(row):
        final_volume = row['base_call_volume']
        crypto_volatile = any(abs(row[f'{c}_price_pct_change']) > VOLATILITY_THRESHOLD for c in ['btc', 'eth', 'sol'])
        vix_high = row['vix_price'] > VIX_FEAR_THRESHOLD
        if crypto_volatile: final_volume *= VOLATILITY_MULTIPLIER
        if vix_high: final_volume *= VIX_MULTIPLIER
        return int(final_volume)

    df_sim.fillna(0, inplace=True)
    df_sim['adjusted_call_volume'] = df_sim.apply(adjust_volume_for_volatility, axis=1)

    # Save the intermediate simulation file
    simulated_df = df_sim[['Date', 'btc_price', 'eth_price', 'sol_price', 'vix_price', 'adjusted_call_volume']]
    simulated_df.to_csv(SIMULATED_CALLS_FILE, index=False)
    print(f"✅ Success! Simulated call volume data saved to '{SIMULATED_CALLS_FILE}'\n")
except Exception as e:
    print(f"❌ FAILED: {e}")
    exit()

# ==============================================================================
# STEP 3: ADVANCED FEATURE ENGINEERING - Create the final training data
# ==============================================================================
print(f"--- [STEP 3/7] Starting: Advanced Feature Engineering ---")
try:
    df_market = pd.read_csv(MARKET_DATA_FILE, parse_dates=['Date'])
    df_calls = pd.read_csv(SIMULATED_CALLS_FILE, parse_dates=['Date'])

    for crypto in ['btc', 'eth', 'sol']:
        df_market[f'{crypto}_price_pct_change'] = df_market[f'{crypto}_price'].pct_change()
        df_market[f'{crypto}_volatility_index'] = df_market[f'{crypto}_price_pct_change'].rolling(window=VOLATILITY_WINDOW).std()

        rolling_vol_mean = df_market[f'{crypto}_volume'].rolling(window=SPIKE_WINDOW).mean()
        rolling_vol_std = df_market[f'{crypto}_volume'].rolling(window=SPIKE_WINDOW).std()
        df_market[f'{crypto}_volume_spike'] = (df_market[f'{crypto}_volume'] > (rolling_vol_mean + SPIKE_THRESHOLD * rolling_vol_std)).astype(int)

        rolling_price_mean = df_market[f'{crypto}_price_pct_change'].rolling(window=SPIKE_WINDOW).mean()
        rolling_price_std = df_market[f'{crypto}_price_pct_change'].rolling(window=SPIKE_WINDOW).std()
        df_market[f'{crypto}_price_shock'] = (abs(df_market[f'{crypto}_price_pct_change']) > (rolling_price_mean + SPIKE_THRESHOLD * rolling_price_std)).astype(int)

    df_calls_subset = df_calls[['Date', 'adjusted_call_volume']]
    combined_df = pd.merge(df_market, df_calls_subset, on='Date', how='inner')
    combined_df.drop(columns=[col for col in combined_df.columns if 'pct_change' in col], inplace=True)
    combined_df.fillna(0, inplace=True)
    combined_df.to_csv(ADVANCED_TRAINING_FILE, index=False)
    print(f"✅ Success! Advanced training file saved as '{ADVANCED_TRAINING_FILE}'\n")
except Exception as e:
    print(f"❌ FAILED: {e}")
    exit()

# ==============================================================================
# STEP 4: MODEL TUNING - Find the best hyperparameters
# ==============================================================================
print(f"--- [STEP 4/7] Starting: Automated Model Tuning (this may take several minutes) ---")
try:
    df_tune = pd.read_csv(ADVANCED_TRAINING_FILE, parse_dates=['Date'])
    df_tune.set_index('Date', inplace=True)
    df_tune.fillna(0, inplace=True)
    X = df_tune.drop('adjusted_call_volume', axis=1)
    y = df_tune['adjusted_call_volume']

    param_grid = {
        'n_estimators': [100, 300, 500, 700], 'max_depth': [3, 4, 5, 6, 7],
        'learning_rate': [0.01, 0.05, 0.1], 'subsample': [0.7, 0.8, 0.9, 1.0],
        'colsample_bytree': [0.7, 0.8, 0.9, 1.0]
    }
    xgb_model = xgb.XGBRegressor(objective='reg:squarederror', eval_metric='mae')
    tscv = TimeSeriesSplit(n_splits=5)
    random_search = RandomizedSearchCV(estimator=xgb_model, param_distributions=param_grid, n_iter=TUNING_ITERATIONS,
                                   scoring='neg_mean_absolute_error', cv=tscv, n_jobs=-1, verbose=1, random_state=42)
    random_search.fit(X, y)

    best_params = random_search.best_params_
    best_score = -random_search.best_score_

    print("\n--- Tuning Complete ---")
    print(f"✅ Best MAE Score found: {best_score:.2f}")
    print(f"✅ Best Hyperparameters found: {best_params}\n")
except Exception as e:
    print(f"❌ FAILED: {e}")
    exit()

# ==============================================================================
# STEP 5: FINAL MODEL TRAINING - Build the model with the best settings
# ==============================================================================
print(f"--- [STEP 5/7] Starting: Final Model Training ---")
try:
    final_model = xgb.XGBRegressor(
        objective='reg:squarederror',
        eval_metric='mae',
        **best_params,  # Use the best parameters found in the search
        random_state=42
    )
    final_model.fit(X, y)
    print("✅ Success! Final model trained on all available data.\n")
except Exception as e:
    print(f"❌ FAILED: {e}")
    exit()

# ==============================================================================
# STEP 6: SAVE THE MODEL - Create the final, predictive asset
# ==============================================================================
print(f"--- [STEP 6/7] Starting: Saving the Final Model ---")
try:
    final_model.save_model(FINAL_MODEL_FILENAME)
    print(f"✅ Success! Final model has been saved as '{FINAL_MODEL_FILENAME}'.\n")
except Exception as e:
    print(f"❌ FAILED: {e}")
    exit()

# ==============================================================================
# STEP 7: MAKE A PREDICTION - Use the model for a live forecast
# ==============================================================================
print(f"--- [STEP 7/7] Starting: Making a Live Forecast ---")
try:
    latest_day = date.today() - timedelta(days=1)
    hist_data = yf.download(TICKERS, period='35d', progress=False)
    latest_data = yf.download(TICKERS, start=latest_day, period='1d', progress=False)

    full_data = pd.concat([hist_data, latest_data])
    prices = full_data['Close'].rename(columns={'BTC-USD': 'btc_price', 'ETH-USD': 'eth_price', 'SOL-USD': 'sol_price', '^VIX': 'vix_price'})
    volumes = full_data['Volume'].rename(columns={'BTC-USD': 'btc_volume', 'ETH-USD': 'eth_volume', 'SOL-USD': 'sol_volume', '^VIX': 'vix_volume'})
    market_df = pd.concat([prices, volumes], axis=1)

    # Re-create the exact same features we trained on
    for crypto in ['btc', 'eth', 'sol']:
        market_df[f'{crypto}_price_pct_change'] = market_df[f'{crypto}_price'].pct_change()
        market_df[f'{crypto}_volatility_index'] = market_df[f'{crypto}_price_pct_change'].rolling(window=VOLATILITY_WINDOW).std()
        rolling_vol_mean = market_df[f'{crypto}_volume'].rolling(window=SPIKE_WINDOW).mean()
        rolling_vol_std = market_df[f'{crypto}_volume'].rolling(window=SPIKE_WINDOW).std()
        market_df[f'{crypto}_volume_spike'] = (market_df[f'{crypto}_volume'] > (rolling_vol_mean + SPIKE_THRESHOLD * rolling_vol_std)).astype(int)
        rolling_price_mean = market_df[f'{crypto}_price_pct_change'].rolling(window=SPIKE_WINDOW).mean()
        rolling_price_std = market_df[f'{crypto}_price_pct_change'].rolling(window=SPIKE_WINDOW).std()
        market_df[f'{crypto}_price_shock'] = (abs(market_df[f'{crypto}_price_pct_change']) > (rolling_price_mean + SPIKE_THRESHOLD * rolling_price_std)).astype(int)

    prediction_input = market_df.tail(1)
    training_columns = [col for col in X.columns] # Ensure exact column order
    prediction_input = prediction_input[training_columns]

    # Load the model we just saved
    loaded_model = xgb.XGBRegressor()
    loaded_model.load_model(FINAL_MODEL_FILENAME)

    prediction = loaded_model.predict(prediction_input)
    predicted_volume = int(prediction[0])

    print("\n--- Forecast Complete ---")
    print(f"✅ Based on the latest market data for {latest_day.strftime('%Y-%m-%d')}, the model forecasts a call volume of: {predicted_volume:,}")
except Exception as e:
    print(f"❌ FAILED: {e}")

--- [STEP 1/7] Starting: Data Collection ---


  data = yf.download(TICKERS, start=START_DATE)
[*********************100%***********************]  4 of 4 completed

✅ Success! Market data saved to 'crypto_price_and_volume_data_complete.csv'

--- [STEP 2/7] Starting: Call Volume Simulation ---
✅ Success! Simulated call volume data saved to 'final_simulated_data_with_volatility.csv'

--- [STEP 3/7] Starting: Advanced Feature Engineering ---
✅ Success! Advanced training file saved as 'advanced_feature_training_data.csv'

--- [STEP 4/7] Starting: Automated Model Tuning (this may take several minutes) ---
Fitting 5 folds for each of 50 candidates, totalling 250 fits






--- Tuning Complete ---
✅ Best MAE Score found: 103798.44
✅ Best Hyperparameters found: {'subsample': 1.0, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 0.7}

--- [STEP 5/7] Starting: Final Model Training ---
✅ Success! Final model trained on all available data.

--- [STEP 6/7] Starting: Saving the Final Model ---
✅ Success! Final model has been saved as 'final_advanced_xgboost_model.json'.

--- [STEP 7/7] Starting: Making a Live Forecast ---


  hist_data = yf.download(TICKERS, period='35d', progress=False)
  latest_data = yf.download(TICKERS, start=latest_day, period='1d', progress=False)



--- Forecast Complete ---
✅ Based on the latest market data for 2025-08-23, the model forecasts a call volume of: 435,261


  market_df[f'{crypto}_price_pct_change'] = market_df[f'{crypto}_price'].pct_change()
  market_df[f'{crypto}_price_pct_change'] = market_df[f'{crypto}_price'].pct_change()
  market_df[f'{crypto}_price_pct_change'] = market_df[f'{crypto}_price'].pct_change()
