In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from tqdm import tqdm
import warnings

In [25]:
from evaluation_protocol.grubbs import grubbs_score
from evaluation_protocol.mape import mape
from evaluation_protocol.smape import smape
from evaluation_protocol.shape_similarity import dtw
from handle_dataset.transform import create_df_with_datetimes


In [26]:
# Models
from naive_methods.last_value import predict_last_value
from naive_methods.m4_naive import m4_naive
from naive_methods.only_mean import mean_naive
from naive_methods.random_walk import random_walk

from ml_models.lightgbm import lightgbm
from ml_models.prophet_model import prophet_model

from traditional_models.arima import arima_model
from traditional_models.theta_model import theta_model_forecast

In [27]:
m4_train_dataset_df = pd.read_csv('Dataset/Yearly-train.csv')
m4_test_dataset_df = pd.read_csv('Dataset/Yearly-test.csv')
info_df = pd.read_csv('../Dataset/M4-info.csv')

In [28]:
warnings.filterwarnings('ignore')

In [29]:
info_yearly = info_df[info_df['SP']=='Yearly']

In [23]:
info_yearly['category'].unique()

array(['Macro', 'Micro', 'Demographic', 'Industry', 'Finance', 'Other'],
      dtype=object)

In [30]:
output_file = open("m4_general_results.csv", "w")
output_file.write("model,timeseries,sMAPE,MAPE,ShapeSimilarity,Grubbs")
output_file.write("\n")

1

In [31]:
def create_string(model,index, predicted, real):
    alpha = 0.05
    smape_score = round(smape(real, predicted),2)
    mape_score = round(mape(real, predicted),2)
    dtw_score = round(dtw(predicted,real),2)
    outliers_score = round(grubbs_score(predicted, real, alpha),2)
    string_to_return = f"{model},{index},{smape_score},{mape_score},{dtw_score},{outliers_score}\n"

    return string_to_return

In [32]:
def naive_methods(timeseries_index, timeseries_train_df, timeseries_test_df):
    target_column_name = timeseries_test_df.columns[1]
    real = timeseries_test_df[target_column_name]

    last_value_forecasts = predict_last_value(timeseries_train_df['target'].tolist(), len(timeseries_test_df))
    mean_naive_forecasts = mean_naive(timeseries_train_df['target'].tolist(), len(timeseries_test_df))
    random_walk_forecasts = random_walk(timeseries_train_df['target'].tolist(), len(timeseries_test_df))

    string_to_return = ''
    string_to_return += create_string('Last Value', timeseries_index, last_value_forecasts, real)
    string_to_return += create_string('Mean Value', timeseries_index, mean_naive_forecasts, real)
    string_to_return += create_string('Random Walk', timeseries_index, random_walk_forecasts, real)

    return string_to_return


def traditional_methods(timeseries_index, timeseries_train_df, timeseries_test_df):
    target_column_name = timeseries_test_df.columns[1]
    real = timeseries_test_df[target_column_name]
    arima_forecasts = arima_model(series=timeseries_train_df['target'].astype(float), forecast_periods=len(timeseries_test_df))
    theta_forecasts = theta_model_forecast(series=timeseries_train_df['target'], h=len(timeseries_test_df))

    string_to_return = ''
    string_to_return += create_string('Arima', timeseries_index, arima_forecasts, real)
    string_to_return += create_string('Theta', timeseries_index, theta_forecasts, real)

    return string_to_return


def ml_models(timeseries_index, timeseries_train_df, timeseries_test_df):
    target_column_name = timeseries_test_df.columns[1]
    real = timeseries_test_df[target_column_name]
    prophet_forecasts = prophet_model(train=timeseries_train_df, test=timeseries_test_df)

    string_to_return = ''
    string_to_return += create_string('Prophet', timeseries_index, prophet_forecasts, real)

    return string_to_return

In [33]:
naive_dict = {"timeseries":"last_value, mean_naive, random_walk"}

# for i in tqdm(range(len(info_yearly['category'].unique()))):
    
index = 0
macro_length = len(info_yearly[info_yearly['category']=='Macro'])


for i in tqdm(range(macro_length)):
    timeseries_train_df = create_df_with_datetimes(m4_train_dataset_df, i)
    timeseries_test_df = create_df_with_datetimes(m4_test_dataset_df, i)

    timeseries_index = m4_test_dataset_df['V1'][i]

    output_file.write(naive_methods(timeseries_index, timeseries_train_df, timeseries_test_df))
    output_file.write(traditional_methods(timeseries_index, timeseries_train_df, timeseries_test_df))
    output_file.write(ml_models(timeseries_index, timeseries_train_df, timeseries_test_df))

output_file.close()

index += macro_length


TypeError: object of type 'int' has no len()

In [None]:
micro_length = len(info_yearly[info_yearly['category']=='Micro'])

output_file = open("m4_general_results.csv", "a")

for i in tqdm(range(index,micro_length)):
    timeseries_train_df = create_df_with_datetimes(m4_train_dataset_df, i)
    timeseries_test_df = create_df_with_datetimes(m4_test_dataset_df, i)

    timeseries_index = m4_test_dataset_df['V1'][i]

    output_file.write(naive_methods(timeseries_index, timeseries_train_df, timeseries_test_df))
    output_file.write(traditional_methods(timeseries_index, timeseries_train_df, timeseries_test_df))
    output_file.write(ml_models(timeseries_index, timeseries_train_df, timeseries_test_df))

output_file.close()

index += micro_length
