In [15]:
from naive_methods.last_value import predict_last_value
from naive_methods.only_mean import mean_naive
from naive_methods.random_walk import random_walk

In [16]:
import pandas as pd 
import numpy as np 

In [17]:
train_df = pd.read_csv('Dataset/Yearly-train.csv')
test_df = pd.read_csv('Dataset/Yearly-test.csv')

In [18]:
#Testing on only the very first timeseries
first_timeseries_train = train_df.loc[0]
first_timeseries_test = test_df.loc[0]

In [19]:
# Need to skip the first column because it is Y1, Y2 etc. 
mean_naive_predictions = mean_naive(first_timeseries_train[1:], len(first_timeseries_test[1:]))

In [6]:
print(mean_naive_predictions)

[6523.738709677418, 6523.738709677418, 6523.738709677418, 6523.738709677418, 6523.738709677418, 6523.738709677418]


In [20]:
# Here we need to drop na because the majority of entries are nan! 
# This could be done more harsely for the dataset we are testing with, however, it is better to generalize for the application
last_value_predictions = predict_last_value(first_timeseries_train[1:].dropna(), len(first_timeseries_test[1:]))

In [10]:
print(last_value_predictions)

[7261.1, 7261.1, 7261.1, 7261.1, 7261.1, 7261.1]


In [21]:
# Here we need to drop na because the last entry might be nan! 
random_walk_predictions = random_walk(first_timeseries_train[1:].dropna(), len(first_timeseries_test[1:]))

In [13]:
print(random_walk_predictions)

[7261.1, 7356.02, 7467.070000000001, 7576.570000000001, 7684.340000000001, 7031.740000000001]


** Evaluation ** 

In [35]:
from evaluation_protocol.grubbs import grubbs_score 
from evaluation_protocol.mape import mape
from evaluation_protocol.smape import smape
from evaluation_protocol.shape_similarity import dtw

In [37]:
predictions = [random_walk_predictions, last_value_predictions, mean_naive_predictions]
naive_method_names = ['Random Walk', 'Last value', 'Mean']
for i in range(len(naive_method_names)):
    predicted = predictions[i]
    real = first_timeseries_test[1:]
    alpha = 0.05
    grubbs_test_score = grubbs_score(predicted, real, alpha)
    smape_score = smape(real, predicted)
    shape_similarity_score = dtw(predicted, real)
    mape_score = mape(real, predicted)

    print(f"Grubbs score for {naive_method_names[i]} is : {grubbs_test_score}")
    print(f"SMAPE score for {naive_method_names[i]} is : {smape_score}")
    print(f"MAPE score for {naive_method_names[i]} is : {mape_score}")
    print(f"Shape similarity score for {naive_method_names[i]} is : {shape_similarity_score}")
    print("===================================================================================")

No outliers detected.
No outliers detected.
DTW Distance 2036.3999999999987
Optimal Alignment Path: [(0, 0), (1, 0), (2, 1), (2, 2), (3, 3), (4, 4), (5, 5)]
Grubbs score for Random Walk is : None
SMAPE score for Random Walk is : 4.76081842782954
MAPE score for Random Walk is : 4.607188060292931
Shape similarity score for Random Walk is : (2036.3999999999987, [(0, 0), (1, 0), (2, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
No outliers detected.
No outliers detected.
DTW Distance 1782.4999999999982
Optimal Alignment Path: [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]
Grubbs score for Last value is : None
SMAPE score for Last value is : 3.9799255872447654
MAPE score for Last value is : 3.874990013989825
Shape similarity score for Last value is : (1782.4999999999982, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
No outliers detected.
No outliers detected.
DTW Distance 6206.667741935491
Optimal Alignment Path: [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]
Grubbs score for Mean is : None
