In [1]:
import pandas as pd
import numpy as np
import time
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import acf

In [2]:
file_paths = {
    'yearly': {
        'train': '../archive/Yearly-train.csv',
        'test': '../archive/Yearly-test.csv',
        'm': 1,
        'h': 6
    },
    'quarterly': {
        'train': '../archive/Quarterly-train.csv',
        'test': '../archive/Quarterly-test.csv',
        'm': 4,
        'h': 8
    },
    'monthly': {
        'train': '../archive/Monthly-train.csv',
        'test': '../archive/Monthly-test.csv',
        'm': 12,
        'h': 18
    },
    'weekly': {
        'train': '../archive/Weekly-train.csv',
        'test': '../archive/Weekly-test.csv',
        'm': 1,
        'h': 13
    },
    'daily': {
        'train': '../archive/Daily-train.csv',
        'test': '../archive/Daily-test.csv',
        'm': 1,
        'h': 14
    },
    'hourly': {
        'train': '../archive/Hourly-train.csv',
        'test': '../archive/Hourly-test.csv',
        'm': 24,
        'h': 48
    }
}


In [None]:
def calculate_smape(actual, predicted, epsilon=1e-8):
    absolute_diff = np.abs(actual - predicted)
    sum_absolute = np.abs(actual) + np.abs(predicted)
    smape = np.mean(200 * (absolute_diff / (sum_absolute + epsilon)))
    
    return smape


def calculate_mase(actual, forecast, m):
    actual = np.array(actual)
    forecast = np.array(forecast)
    
    n = len(actual)
    h = len(forecast)
    
    numerator = np.mean(np.abs(actual[-h:] - forecast))
    
    scale = np.mean(np.abs(actual[m:] - actual[:-m]))
    
    mase_value = numerator / scale
    return mase_value


def calculate_owa(smape_method, mase_method, smape_naive2, mase_naive2):
    relative_smape = smape_method / smape_naive2
    relative_mase = mase_method / mase_naive2
    return (relative_smape + relative_mase) / 2

In [3]:
# Function to implement Naïve 1 forecasting
def naive_1_forecast(series):
    return series.iloc[-1]
    
# Evaluate Naïve 1 method on the dataset
def evaluate_naive_1(train, test, m):
    results = []
    for i in range(len(train)):
        series_id = train.iloc[i, 0]
        train_series = train.iloc[i, 1:].dropna().astype(float)
        test_series = test.iloc[i, 1:].dropna().astype(float)  # Use the entire test set

        if len(train_series) > 0 and len(test_series) > 0:
            forecast = naive_1_forecast(train_series)
            forecasts = [forecast] * len(test_series)  # Extend the last observed value
            
            for j in range(len(test_series)):
                actual = test_series.iloc[j]
                error = forecasts[j] - actual
                results.append({
                    'Series': series_id,
                    'Forecast': forecasts[j],
                    'Actual': actual,
                    'Error': error,
                    'TrainSeries': train_series,
                    'm': m
                })
    
    results_df = pd.DataFrame(results)
    return results_df


In [None]:
def naive_2_forecast(series, m):
    # Perform the 90% autocorrelation test
    acf_values = acf(series, nlags=m)
    if acf_values[m] > 1.645 / np.sqrt(len(series)):  # 1.645 is the z-value for 90% confidence
        # Data is seasonal, apply multiplicative decomposition
        decomposition = seasonal_decompose(series, model='multiplicative', period=m)
        seasonal = decomposition.seasonal
        adjusted_series = series / seasonal
        forecast = adjusted_series.iloc[-m:] * seasonal.iloc[-m:]
        forecast = forecast.iloc[-1]  # Use the last seasonal value for forecasting
    else:
        # Data is not seasonal, use last value
        forecast = series.iloc[-1]
    
    return forecast

def evaluate_naive_2(train, test, m):
    results = []
    for i in range(len(train)):
        series_id = train.iloc[i, 0]
        train_series = train.iloc[i, 1:].dropna().astype(float)
        test_series = test.iloc[i, 1:].dropna().astype(float)  # Use the entire test set

        if len(train_series) > 0 and len(test_series) > 0:
            forecast = naive_2_forecast(train_series, m)
            
            for j in range(len(test_series)):
                actual = test_series.iloc[j]
                error = forecast - actual
                results.append({
                    'Series': series_id,
                    'Forecast': float(forecast),  
                    'Actual': float(actual),  
                    'Error': float(error),  
                    'TrainSeries': train_series.to_dict(),  
                    'm': m
                })
    
    results_df = pd.DataFrame(results)
    return results_df


In [11]:
method_name_func = {
    'Naïve 1': evaluate_naive_1,
    'Naïve 2': evaluate_naive_2
}

results_dict = {}

# Naïve 2 benchmark values from the M4 competition report
naive_2_smape = 13.564  # from Table 4
naive_2_mase = 1.912    # from Table 4

smape_list = []
mase_list = []
execution_times = []

freq_to_res = {}

for method_name, method_func in method_name_func.items():
    print(f"Evaluating {method_name}...")
    
    for freq, paths in file_paths.items():
        print(f"\tProcessing {freq} data...")
        train_data = pd.read_csv(paths['train'])
        test_data = pd.read_csv(paths['test'])
        
        start_time = time.time()
        
        results = method_func(train_data, test_data, paths['m'])
        freq_to_res[freq] = results
        
        end_time = time.time()
        execution_time = end_time - start_time
        execution_times.append(execution_time)
        
        # Calculate performance metrics
        y_true = results['Actual']
        y_pred = results['Forecast']
        smape = calculate_smape(y_true, y_pred)
        mase = calculate_mase(y_true, y_pred, paths['m']) # results.iloc[0]['TrainSeries']
        
        smape_list.append(smape)
        mase_list.append(mase)
        
        print(f"\tFinished processing {freq} data.")

    # Calculate overall metrics
    overall_smape = np.mean(smape_list)
    overall_mase = np.mean(mase_list)
    overall_owa = calculate_owa(overall_smape, overall_mase, naive_2_smape, naive_2_mase)

    overall_results = {
        'sMAPE': overall_smape,
        'MASE': overall_mase,
        'OWA': overall_owa,
        'Avg Execution Time (s)': np.mean(execution_times),
        'Method': method_name
    }

    print(f"{method_name} Results:")
    display(overall_results)


Evaluating Naïve 2...
	Processing yearly data...
	Finished processing yearly data.
	Processing quarterly data...
	Finished processing quarterly data.
	Processing monthly data...
	Finished processing monthly data.
	Processing weekly data...
	Finished processing weekly data.
	Processing daily data...
	Finished processing daily data.
	Processing hourly data...
	Finished processing hourly data.
Naïve 2 Results:


{'sMAPE': nan,
 'MASE': nan,
 'OWA': nan,
 'Avg Execution Time (s)': 24.928833882013958,
 'Method': 'Naïve 2'}