In [None]:
import pandas as pd
import numpy as np 
import os
from handle_dataset.transform import create_df_with_datetimes
from ml_models.lightgbm import lightgbm


In [None]:
from evaluation_protocol.grubbs import grubbs_score
from evaluation_protocol.mape import mape
from evaluation_protocol.smape import smape
from evaluation_protocol.shape_similarity import dtw

In [None]:
from naive_methods.last_value import predict_last_value
from naive_methods.only_mean import mean_naive
from naive_methods.random_walk import random_walk

In [None]:
#Dataset
train_df = pd.read_csv('Dataset/Yearly-train.csv')
test_df = pd.read_csv('Dataset/Yearly-test.csv')

In [None]:
# Testing the new date thing
for counter in range(2):
    datetime_df = create_df_with_datetimes(train_df, counter)

# The above works!
# It will create a new dataframe with two columns 'datetime' and 'target' for as many times as you put in range 

In [None]:
# Loading just the first timeseries 
first_timeseries_train = create_df_with_datetimes(train_df, 0)
first_timeseries_test = create_df_with_datetimes(test_df, 0)

In [None]:
# Naive methods
last_value_forecasts = predict_last_value(first_timeseries_train['target'].tolist(), len(first_timeseries_test))
mean_naive_forecasts = mean_naive(first_timeseries_train['target'].tolist(), len(first_timeseries_test))
random_walk_forecasts = random_walk(first_timeseries_train['target'].tolist(), len(first_timeseries_test))

In [None]:
# New metrics:

from evaluation_protocol.performance_metrics import rmse, nme, mae, mse

real = first_timeseries_test['target']

# def rmse(predicted, real):
#     predicted = np.array(predicted)
#     real = np.array(real)
#     return np.sqrt(((predicted-real)**2).mean())

# def nme(predicted, real):
#     predicted = np.array(predicted)
#     real = np.array(real)
#     absolute_percentage_errors = np.abs((real - predicted) / real)
#     return absolute_percentage_errors.mean()

# def mae(predicted, real):
#     predicted = np.array(predicted)
#     real = np.array(real)
#     return np.abs(predicted - real).mean()

# def mse(predicted, real):
#     predicted = np.array(predicted)
#     real = np.array(real)
#     return ((predicted - real) ** 2).mean()

print(f"RMSE \n\
    Last value: {round(rmse(last_value_forecasts, real),2)}\n\
    Mean: {round(rmse(mean_naive_forecasts, real),2)}\n\
    Random Walk: {round(rmse(random_walk_forecasts, real),2)}")

print(f"NME \n\
    Last value: {round(nme(last_value_forecasts, real),2)}\n\
    Mean: {round(nme(mean_naive_forecasts, real),2)}\n\
    Random Walk: {round(nme(random_walk_forecasts, real),2)}")

print(f"MAE \n\
    Last value: {round(mae(last_value_forecasts, real),2)}\n\
    Mean: {round(mae(mean_naive_forecasts, real),2)}\n\
    Random Walk: {round(mae(random_walk_forecasts, real),2)}")

print(f"MSE \n\
    Last value: {round(mse(last_value_forecasts, real),2)}\n\
    Mean: {round(mse(mean_naive_forecasts, real),2)}\n\
    Random Walk: {round(mse(random_walk_forecasts, real),2)}")


In [None]:
#Traditional models
from traditional_models.arima import arima_model
from traditional_models.theta_model import theta_model_forecast
from traditional_models.ets import ets_method

arima_forecasts = arima_model(series=first_timeseries_train['target'].astype(float), forecast_periods=6)
theta_forecasts = theta_model_forecast(series=first_timeseries_train['target'], h=6)
ets_forecasts_2 = ets_method(train=first_timeseries_train, test=first_timeseries_test, periods=2)
ets_forecasts_6 = ets_method(train=first_timeseries_train, test=first_timeseries_test, periods=6)
ets_forecasts_12 = ets_method(train=first_timeseries_train, test=first_timeseries_test, periods=12)

In [None]:
#Machine Learning models
from ml_models.prophet_model import prophet_model

#LightGBM
# for counter in range(3):
#     timeseries_df = create_df_with_datetimes(train_df, counter)
#     lightgbm(timeseries_df)

# Prophet
prophet_forecasts = prophet_model(train=first_timeseries_train, test=first_timeseries_test)


In [None]:
# Evaluation Protocol

alpha = 0.05
predicted_dictionary = {"arima": arima_forecasts,
                        "theta": theta_forecasts,
                        "prophet": prophet_forecasts,
                        "ets_2": ets_forecasts_2,
                        "ets_6": ets_forecasts_6,
                        "ets_12": ets_forecasts_12}


# For some reason the column names change to ds and y and cannot change back....
real = first_timeseries_test['y']

# for key in predicted_dictionary:
#     predicted = predicted_dictionary[key]
#     grubbs_test_score = grubbs_score(predicted, real, alpha)
#     smape_score = smape(real, predicted)
#     shape_similarity_score = dtw(predicted, real)
#     mape_score = mape(real, predicted)

#     print(f"Grubbs score for {key} is : {grubbs_test_score}")
#     print(f"SMAPE score for {key} is : {smape_score}")
#     print(f"MAPE score for {key} is : {mape_score}")
#     print(f"Shape similarity score for {key} is : {shape_similarity_score}")
#     print("===================================================================================")

# print('\n')
# print('\n')

# Printing the new string:
scores_dict = {
    'arima': [],
    'theta': [],
    'prophet': [],
    'ets_2': [],
    'ets_6': [],
    'ets_12': []
}

for key in predicted_dictionary:
    predicted = predicted_dictionary[key]
    grubbs_test_score = grubbs_score(predicted, real, alpha)
    smape_score = smape(real, predicted)
    shape_similarity_score = dtw(predicted, real)
    mape_score = mape(real, predicted)

    # MAPE | sMAPE | Grubbs | tShape similarity
    scores_dict[key].append(round(mape_score,2))
    scores_dict[key].append(round(smape_score,2))
    scores_dict[key].append(round(grubbs_test_score))
    scores_dict[key].append(round(shape_similarity_score))

from evaluation_protocol.evaluation_protocol_string import eval_string

evaluation_string = eval_string(scores_dict)
print(evaluation_string)