<a href="https://colab.research.google.com/github/john-d-noble/callcenter/blob/main/CB_Step_3_Classical_Time_Series_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install numpy pmdarima prophet statsmodels

Collecting pmdarima
  Downloading pmdarima-2.0.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (7.8 kB)
Downloading pmdarima-2.0.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pmdarima
Successfully installed pmdarima-2.0.4


In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import TimeSeriesSplit
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Load the updated dataset
df = pd.read_csv('enhanced_eda_data.csv', index_col='Date', parse_dates=True)

# Assume 'Calls' is the target column
target = 'calls'

# Prepare data: Sort by date if not already
df = df.sort_index()

# Define forecast horizon (e.g., 7 days for weekly)
horizon = 7

# Time series cross-validation: 5 splits
tscv = TimeSeriesSplit(n_splits=5)

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100  # As percentage
    return {'MAE': mae, 'RMSE': rmse, 'MAPE': mape}

# Dictionary to store average metrics for each model
model_metrics = {}

# 1. ARIMA (using SARIMAX with fixed order (1,1,1), no seasonality)
arima_preds = []
arima_trues = []
for train_idx, test_idx in tscv.split(df):
    train = df.iloc[train_idx][target]
    test = df.iloc[test_idx][target]

    # Fit ARIMA (1,1,1)
    model = SARIMAX(train, order=(1,1,1))
    fit = model.fit(disp=False)

    # Forecast
    pred = fit.forecast(steps=len(test))

    arima_preds.extend(pred)
    arima_trues.extend(test)

arima_metrics = calculate_metrics(arima_trues, arima_preds)
model_metrics['ARIMA'] = arima_metrics

# 2. SARIMA (using SARIMAX with order (1,1,1) and seasonal_order (1,1,1,7))
sarima_preds = []
sarima_trues = []
for train_idx, test_idx in tscv.split(df):
    train = df.iloc[train_idx][target]
    test = df.iloc[test_idx][target]

    # Fit SARIMA (1,1,1)(1,1,1)[7]
    model = SARIMAX(train, order=(1,1,1), seasonal_order=(1,1,1,7))
    fit = model.fit(disp=False)

    # Forecast
    pred = fit.forecast(steps=len(test))

    sarima_preds.extend(pred)
    sarima_trues.extend(test)

sarima_metrics = calculate_metrics(sarima_trues, sarima_preds)
model_metrics['SARIMA'] = sarima_metrics

# 3. Exponential Smoothing (Holt-Winters, additive seasonality)
ets_preds = []
ets_trues = []
for train_idx, test_idx in tscv.split(df):
    train = df.iloc[train_idx][target]
    test = df.iloc[test_idx][target]

    # Fit ETS with additive trend and seasonality (period=7)
    model = ExponentialSmoothing(train, trend='add', seasonal='add', seasonal_periods=7)
    fit = model.fit(optimized=True)

    # Forecast
    pred = fit.forecast(steps=len(test))

    ets_preds.extend(pred)
    ets_trues.extend(test)

ets_metrics = calculate_metrics(ets_trues, ets_preds)
model_metrics['ETS'] = ets_metrics

# Summarize performance
print("\nModel Performance Summary:")
metrics_df = pd.DataFrame(model_metrics).T
print(metrics_df)

# Pick winner: Lowest MAE (primary metric)
winner = metrics_df['MAE'].idxmin()
print(f"\nChampion Classical Model: {winner}")
print(f"Metrics: {metrics_df.loc[winner].to_dict()}")

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)



Model Performance Summary:
                MAE         RMSE       MAPE
ARIMA   3126.216632  3902.570129  41.321259
SARIMA  2099.066063  2602.582483  26.491808
ETS     1954.659857  2503.621301  24.763894

Champion Classical Model: ETS
Metrics: {'MAE': 1954.6598569590553, 'RMSE': 2503.6213006004446, 'MAPE': 24.76389395416084}
