Tudo da vida segue uma distribuição gaussiana.
-V.C.A Marcelo

## Loading data

In [None]:
import pandas as pd
import numpy as np

df_gold = pd.read_csv('../data/d_kilo_gold_price_in_currency.csv')
df_gold['date'] = pd.to_datetime(df_gold['date'], infer_datetime_format=True)

data_x = np.array(df_gold['date'])
data_y = np.array(df_gold['China'])

## Forecasting with sktime

[source](https://www.sktime.org/en/latest/index.html)
[example1](https://towardsdatascience.com/sktime-a-unified-python-library-for-time-series-machine-learning-3c103c139a55)
[example2](https://github.com/alan-turing-institute/sktime/blob/master/examples/01_forecasting.ipynb)

In [None]:
import numpy as np
from math import sqrt
from sktime.performance_metrics.forecasting import *
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.utils import check_array

def RMSE(y_test, y_pred):
    return MSE(y_test, y_pred, squared=False)

# https://github.com/alan-turing-institute/sktime/blob/master/sktime/performance_metrics/forecasting/_functions.py
def print_metrics(title, y_pred, y_test, y_train):
    print(f'metrics for [{title}]:')
    # https://otexts.com/fpp2/accuracy.html#scale-dependent-errors
    print('    MAE', MAE(y_pred, y_test))
    print('    RMSE', RMSE(y_pred, y_test))
    # https://otexts.com/fpp2/accuracy.html#percentage-errors
    print('    MAPE', mape_loss(y_pred, y_test))
    print('    sMAPE', smape_loss(y_pred, y_test))
    # https://otexts.com/fpp2/accuracy.html#scaled-errors
    print('    MASE', mase_loss(y_pred, y_test, y_train))

In [None]:
from sktime.forecasting.all import *
from sktime.forecasting.arima import ARIMA, AutoARIMA
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.theta import ThetaForecaster
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.forecasting.fbprophet import Prophet

# preparing data
y = pd.Series(data_y, index=data_x)
y.index.freq = 'd'
plot_series(y)

# defining the forecasting horizon
days_to_predict = 7
y_train, y_test = temporal_train_test_split(y, test_size=days_to_predict)
fh = ForecastingHorizon(y_test.index, is_relative=False)

In [None]:
# naive forecaster (last)
# https://www.sktime.org/en/latest/api_reference/modules/auto_generated/sktime.forecasting.naive.NaiveForecaster.html
forecaster = NaiveForecaster(strategy="last")
forecaster.fit(y_train)
y_pred_naive_last = forecaster.predict(fh)
print_metrics('naive_last', y_pred_naive_last, y_test, y_train)

# naive forecaster (mean)
# https://www.sktime.org/en/latest/api_reference/modules/auto_generated/sktime.forecasting.naive.NaiveForecaster.html
forecaster = NaiveForecaster(strategy="mean")
forecaster.fit(y_train)
y_pred_naive_mean = forecaster.predict(fh)
print_metrics('naive_mean', y_pred_naive_mean, y_test, y_train)

# naive forecaster (drift)
# https://www.sktime.org/en/latest/api_reference/modules/auto_generated/sktime.forecasting.naive.NaiveForecaster.html
forecaster = NaiveForecaster(strategy="drift")
forecaster.fit(y_train)
y_pred_naive_drift = forecaster.predict(fh)
print_metrics('naive_drift', y_pred_naive_drift, y_test, y_train)

# arima forecaster
# https://www.sktime.org/en/latest/api_reference/modules/auto_generated/sktime.forecasting.arima.ARIMA.html
forecaster = ARIMA(method='powell')
forecaster.fit(y_train)
y_pred_arima = forecaster.predict(fh)
print_metrics('arima', y_pred_arima, y_test, y_train)

# auto arima forecaster
# https://www.sktime.org/en/latest/api_reference/modules/auto_generated/sktime.forecasting.arima.AutoARIMA.html
forecaster = AutoARIMA(method='powell')
forecaster.fit(y_train)
y_pred_auto = forecaster.predict(fh)
print_metrics('auto', y_pred_auto, y_test, y_train)

# exponential smoothing forecaster
# https://www.sktime.org/en/latest/api_reference/modules/auto_generated/sktime.forecasting.exp_smoothing.ExponentialSmoothing.html
forecaster = ExponentialSmoothing()
forecaster.fit(y_train)
y_pred_expo = forecaster.predict(fh)
print_metrics('expo', y_pred_expo, y_test, y_train)

# theta forecaster
# https://www.sktime.org/en/v0.4.2/modules/auto_generated/sktime.forecasting.theta.ThetaForecaster.html
forecaster = ThetaForecaster()
forecaster.fit(y_train)
y_pred_theta = forecaster.predict(fh)
print_metrics('theta', y_pred_theta, y_test, y_train)

# polynomial trend forecaster
# https://www.sktime.org/en/v0.4.2/modules/auto_generated/sktime.forecasting.trend.PolynomialTrendForecaster.html
custom_degree = { 1 : 5, 7 : 5, 30 : 6 }
forecaster = PolynomialTrendForecaster(degree=custom_degree[days_to_predict])
forecaster.fit(y_train)
y_pred_poly = forecaster.predict(fh)
print_metrics('poly', y_pred_poly, y_test, y_train)

# prophet forecaster
# https://www.sktime.org/en/latest/api_reference/modules/auto_generated/sktime.forecasting.fbprophet.Prophet.html
forecaster = Prophet()
forecaster.fit(y_train)
y_pred_prophet = forecaster.predict(fh)
print_metrics('prophet', y_pred_prophet, y_test, y_train)

In [None]:
import custom_plot as cp

# plot all results
cp.plot_series(y_train, y_test, y_pred_naive_last, y_pred_naive_mean, y_pred_naive_drift, y_pred_arima, y_pred_auto, y_pred_expo, y_pred_theta, y_pred_poly, y_pred_prophet, 
            labels=["y_train", "y_test", "y_pred_naive_last", "y_pred_naive_mean", "y_pred_naive_drift", "y_pred_arima", "y_pred_auto", "y_pred_expo", "y_pred_theta", "y_pred_poly", "y_pred_prophet"])