In [1]:
import pandas as pd
import numpy as np
import time
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import acf

from methods.naive_1 import *
from methods.naive_2 import *
from methods.ses import *
from methods.damped import *

In [2]:
# h:  the forecasting horizon
# m:  time interval between successive observations considered
#     by the organizers for each data frequency

file_paths = {
    'yearly': {
        'train': '../archive/Yearly-train.csv',
        'test': '../archive/Yearly-test.csv',
        'm': 1,
        'h': 6
    },
    'quarterly': {
        'train': '../archive/Quarterly-train.csv',
        'test': '../archive/Quarterly-test.csv',
        'm': 4,
        'h': 8
    },
    'monthly': {
        'train': '../archive/Monthly-train.csv',
        'test': '../archive/Monthly-test.csv',
        'm': 12,
        'h': 18
    },
    'weekly': {
        'train': '../archive/Weekly-train.csv',
        'test': '../archive/Weekly-test.csv',
        'm': 1,
        'h': 13
    },
    'daily': {
        'train': '../archive/Daily-train.csv',
        'test': '../archive/Daily-test.csv',
        'm': 1,
        'h': 14
    },
    'hourly': {
        'train': '../archive/Hourly-train.csv',
        'test': '../archive/Hourly-test.csv',
        'm': 24,
        'h': 48
    }
}


In [9]:
def calculate_smape(actual, predicted, epsilon=1e-8):
    absolute_diff = np.abs(actual - predicted)
    sum_absolute = np.abs(actual) + np.abs(predicted)
    smape = np.mean(200 * (absolute_diff / (sum_absolute + epsilon)))
    
    return smape


def calculate_mase(actual, predicted, m):
    num = np.mean(np.abs(actual - predicted))
    denom = np.mean(np.abs(actual[m:] - actual[:-m]))  
    mase = num / denom if denom != 0 else np.inf 
    
    return mase


def calculate_owa(smape_method, mase_method, smape_naive2, mase_naive2):
    relative_smape = smape_method / smape_naive2
    relative_mase = mase_method / mase_naive2
    return (relative_smape + relative_mase) / 2

In [28]:
ses_alpha = 0.3

damped_alpha = 0.5
damped_beta = 0.3
damped_phi = 0.9

method_name_func = {
    'Naïve 2': {'func': evaluate_naive_2, 'extra_args': {}},
    'Naïve 1': {'func': evaluate_naive_1, 'extra_args': {}},
    'SES': {'func': evaluate_ses, 'extra_args': {'alpha': ses_alpha}},
    'SES (Statsmodels)': {
        'func': evaluate_ses_statsmodels, 
        'extra_args': {'alpha': ses_alpha}  
    },
    'Damped': {'func': evaluate_damped, 'extra_args': {'alpha': damped_alpha, 'beta': damped_beta, 'phi': damped_phi}},
    # 'Damped (Statsmodels)': {
    #     'func': evaluate_damped_statsmodels, 
    #     'extra_args': {'alpha': damped_alpha, 'beta': damped_beta, 'phi': damped_phi}
    # },
}

method_to_freq_res = {}
method_to_overall_res = {}

# Naïve 2 benchmark values from the M4 competition report
naive_2_smape = 13.564  # from Table 4
naive_2_mase = 1.912    # from Table 4



for method_name, method_func_args in method_name_func.items():
    print(f"Evaluating {method_name}...")

    freq_to_res = {}
    smape_list = []
    mase_list = []
    execution_times = []

    for freq, freq_info in file_paths.items():
        print(f"\tProcessing {freq} data...")
        train_data = pd.read_csv(freq_info['train'])
        test_data = pd.read_csv(freq_info['test'])
        method_func = method_func_args['func']
        method_extra_args = method_func_args['extra_args']
        
        start_time = time.time()
        results = method_func(train_data, test_data, freq_info['m'], **method_extra_args)
        freq_to_res[freq] = results
        
        end_time = time.time()
        execution_time = end_time - start_time
        execution_times.append(execution_time)
        
        # Calculate performance metrics
        y_true = results['Actual'].values
        y_pred = results['Forecast'].values
        smape = calculate_smape(y_true, y_pred)
        mase = calculate_mase(y_true, y_pred, freq_info['m']) 
        
        smape_list.append(smape)
        mase_list.append(mase)
        
        print(f"\tFinished processing {freq} data.")

    method_to_freq_res[method_name] = freq_to_res

    # Calculate overall metrics
    overall_smape = np.mean(smape_list)
    overall_mase = np.mean(mase_list)
    overall_owa = calculate_owa(overall_smape, overall_mase, naive_2_smape, naive_2_mase)

    overall_results = {
        'sMAPE': overall_smape,
        'MASE': overall_mase,
        'OWA': overall_owa,
        'Avg Execution Time (s)': np.mean(execution_times),
        'Method': method_name
    }

    print(f"{method_name} Results:")
    display(overall_results)
    method_to_overall_res[method_name] = overall_results


Evaluating Damped...
	Processing yearly data...
	Finished processing yearly data.
	Processing quarterly data...
	Finished processing quarterly data.
	Processing monthly data...
	Finished processing monthly data.
	Processing weekly data...
	Finished processing weekly data.
	Processing daily data...
	Finished processing daily data.
	Processing hourly data...
	Finished processing hourly data.
Damped Results:


{'sMAPE': 17.161006489942597,
 'MASE': 0.4803910997996914,
 'OWA': 0.7582191570362012,
 'Avg Execution Time (s)': 31.69542948404948,
 'Method': 'Damped'}

{'sMAPE': 16.40307495113328,
 'MASE': 0.4828177285310617,
 'OWA': 0.7309146491705939,
 'Avg Execution Time (s)': 9.592811663945517,
 'Method': 'Naïve 1'}

(damped_phi=0.3)
{'sMAPE': 17.45852854941639,
 'MASE': 0.5037549911744407,
 'OWA': 0.7752963034760332,
 'Avg Execution Time (s)': 30.096329887708027,
 'Method': 'Damped'}

(damped_phi=0.9)
 {'sMAPE': 14.615595847762558,
 'MASE': 0.48629093374207916,
 'OWA': 0.665932354437933,
 'Avg Execution Time (s)': 335.98619671662647,
 'Method': 'Damped (Statsmodels)'}