In [1]:
import pandas as pd
import numpy as np 
import os
from handle_dataset.transform import create_df_with_datetimes
from ml_models.lightgbm import lightgbm

In [2]:
from evaluation_protocol.grubbs import grubbs_score
from evaluation_protocol.mape import mape
from evaluation_protocol.smape import smape
from evaluation_protocol.shape_similarity import dtw

In [3]:
#Dataset
train_df = pd.read_csv('Dataset/Yearly-train.csv')
test_df = pd.read_csv('Dataset/Yearly-test.csv')

In [4]:
# Testing the new date thing
for counter in range(2):
    datetime_df = create_df_with_datetimes(train_df, counter)

# The above works!
# It will create a new dataframe with two columns 'datetime' and 'target' for as many times as you put in range 

In [5]:
first_timeseries_train = create_df_with_datetimes(train_df, 0)
first_timeseries_test = create_df_with_datetimes(test_df, 0)

In [6]:
first_timeseries_train['target'].astype(float)

1     5172.1
2     5133.5
3     5186.9
4     5084.6
5     5182.0
6     5414.3
7     5576.2
8     5752.9
9     5955.2
10    6087.8
11    6238.9
12    6317.2
13    6262.7
14    6361.0
15    6427.4
16    6654.9
17    6835.4
18    6925.5
19    7073.5
20    7144.0
21    7230.6
22    7349.6
23    7339.2
24    7250.8
25    7294.6
26    7393.9
27    7560.9
28    7651.4
29    7587.3
30    7530.5
31    7261.1
Name: target, dtype: float64

In [None]:
#Traditional models
from traditional_models.arima import arima_model
from traditional_models.theta_model import theta_model_forecast

arima_forecasts = arima_model(series=first_timeseries_train['target'].astype(float), forecast_periods=6)
theta_forecasts = theta_model_forecast(series=first_timeseries_train['target'], h=6)

In [None]:
#Machine Learning models

for counter in range(3):
    timeseries_df = create_df_with_datetimes(train_df, counter)
    lightgbm(timeseries_df)

In [16]:
# Evaluation Protocol

alpha = 0.05
predicted_dictionary = {"arima": arima_forecasts,
                        "theta": theta_forecasts}

real = first_timeseries_test['target']
for key in predicted_dictionary:
    predicted = predicted_dictionary[key]
    grubbs_test_score = grubbs_score(predicted, real, alpha)
    smape_score = smape(real.tolist(), predicted.tolist())
    shape_similarity_score = dtw(predicted, real)
    mape_score = mape(real.tolist(), predicted.tolist())

    print(f"Grubbs score for {key} is : {grubbs_test_score}")
    print(f"SMAPE score for {key} is : {smape_score}")
    print(f"MAPE score for {key} is : {mape_score}")
    print(f"Shape similarity score for {key} is : {shape_similarity_score}")
    print("===================================================================================")

No outliers detected.
No outliers detected.
Grubbs score for arima is : 0
SMAPE score for arima is : 9.64446964462614
MAPE score for arima is : 9.121537475242345
Shape similarity score for arima is : 4178.792879752498
No outliers detected.
No outliers detected.
Grubbs score for theta is : 0
SMAPE score for theta is : 40.00131259796595
MAPE score for theta is : 33.29745638084341
Shape similarity score for theta is : 15127.100000000006
