## Time Series Forecasting

In [1]:
from google.colab import drive

drive.mount("./gdrive", force_remount=True)
%cd "./gdrive/My Drive/Colab Notebooks/sampco"

Mounted at ./gdrive
/content/gdrive/My Drive/Colab Notebooks/sampco


## Setup

In [79]:
!pip install plotly statsmodels prophet neuralprophet -q -U

[K     |████████████████████████████████| 61kB 4.3MB/s 
[K     |████████████████████████████████| 748.8MB 24kB/s 
[31mERROR: torchvision 0.9.1+cu101 has requirement torch==1.8.1, but you'll have torch 1.6.0 which is incompatible.[0m
[31mERROR: torchtext 0.9.1 has requirement torch==1.8.1, but you'll have torch 1.6.0 which is incompatible.[0m
[31mERROR: neuralprophet 0.2.7 has requirement tqdm>=4.50.2, but you'll have tqdm 4.41.1 which is incompatible.[0m
[?25h

In [80]:
from prophet import Prophet
from neuralprophet import NeuralProphet

from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error

from prophet.plot import plot_plotly
import plotly.graph_objects as go
import pandas as pd

In [4]:
class Plot():
    def __init__(self, df, pred=None, time=None):
        time = [i for i in range(0, len(df if pred is None else pred))] if time is None else time

        self.df = pd.DataFrame({"Time": time})
        self.df = self.df.merge(df, left_index=True, right_index=True, how="outer")

        if pred is not None:
            self.df = self.df.merge(pred, left_index=True, right_index=True)

    def prediction(self, y_col, p_col=None, p_lower_col=None, p_upper_col=None, only_future=False):
        df = self.df[self.df.isna().any(axis=1)] if only_future else self.df
        data = []

        data.append(go.Scatter(name="Actual", x=df["Time"], y=df[y_col],
                               mode="markers", marker=dict(color="black", size=4)))

        if p_lower_col in df.keys() and p_lower_col is not None:
            data.append(go.Scatter(x=df["Time"], y=df[p_lower_col],
                                   mode="lines", line=dict(width=0),
                                   hoverinfo="skip", showlegend=False))

        if p_col in df.keys():
            data.append(go.Scatter(name="Predicted", x=df["Time"], y=df[p_col],
                                   mode="lines", fill="tonexty", fillcolor="rgba(0,114,178,0.2)",
                                   line=dict(color="#0072B2", width=2)))

        if p_upper_col in df.keys() and p_upper_col is not None:
            data.append(go.Scatter(x=df["Time"], y=self.df[p_upper_col],
                                   mode="lines", fill="tonexty", fillcolor="rgba(0,114,178,0.2)",
                                   line=dict(width=0), hoverinfo="skip", showlegend=False))

        layout = go.Layout(
            yaxis=dict(title=y_col),
            title=f"{y_col} prediction",
            xaxis=dict(
                title="Time",
                rangeselector=dict(
                    buttons=list([
                        dict(count=1, label="1m", step="month", stepmode="backward"),
                        dict(count=6, label="6m", step="month", stepmode="backward"),
                        dict(count=1, label="1y", step="year", stepmode="backward"),
                        dict(step="all"),
                    ]),
                ),
                rangeslider=dict(visible=True),
            ),
            showlegend=True)

        fig = go.Figure(data=data, layout=layout)
        return fig

    def accuracy(self, y_col, p_col, p_lower_col=None, p_upper_col=None):
        df = self.df[~self.df.isna().any(axis=1)]

        match_predict = (df[y_col].astype("int") == df[p_col].astype("int"))

        lower_than_p = (df[y_col] <= df[p_col])
        higher_than_p = (df[y_col] >= df[p_col])

        acc = pd.DataFrame({"Mode": ["match"], "Accuracy": [(match_predict).mean()]})

        if p_lower_col is not None:
            higher_than_p_lower = (df[y_col] >= df[p_lower_col])
            h = pd.DataFrame({
                "Mode": ["± lower"],
                "Accuracy": [(lower_than_p & higher_than_p_lower).mean()],
            })
            acc = pd.concat([acc, h])

        if p_upper_col is not None:
            lower_than_p_upper = (df[y_col] <= df[p_upper_col])
            l = pd.DataFrame({
                "Mode": ["± upper"],
                "Accuracy": [(higher_than_p & lower_than_p_upper).mean()],
            })
            acc = pd.concat([acc, l])

        if p_lower_col is not None and p_upper_col is not None:
            b = pd.DataFrame({
                "Mode": ["± lower/upper"],
                "Accuracy": [(higher_than_p_lower & lower_than_p_upper).mean()],
            })
            acc = pd.concat([acc, b])

        acc["Accuracy"] = (acc["Accuracy"] * 100).round(2)

        data = [go.Bar(x=acc["Mode"], y=acc["Accuracy"],
                       text=acc["Accuracy"], textposition="auto",
                       width=0.5, marker_color="#0072B2")]

        layout = go.Layout(yaxis=dict(title="Accuracy (%)"),
                           title=f"{y_col} prediction accuracy",
                           xaxis=dict(title="Mode"))

        fig = go.Figure(data=data, layout=layout)
        return fig

    def error(self, y_col, p_col):
        df = self.df[~self.df.isna().any(axis=1)]

        mean, std = df[y_col].mean(), df[y_col].std()
        df[y_col][:][:] = (df[y_col] - mean) / std
        df[p_col][:][:] = (df[p_col] - mean) / std

        error = pd.DataFrame({
            "Metric": [
                "Mean Absolute Error (MAE)",
                "Mean Squared Error (MSE)",
                "Root Mean Squared Error (RMSE)",
            ],
            "Error": [
                mean_absolute_error(df[y_col], df[p_col]),
                mean_squared_error(df[y_col], df[p_col], squared=True),
                mean_squared_error(df[y_col], df[p_col], squared=False),
            ]})

        data = [go.Bar(x=error["Metric"], y=error["Error"],
                       text=error["Error"], textposition="auto", marker_color="#0072B2")]

        layout = go.Layout(yaxis=dict(title="Error (lower is better)"),
                           title=f"{y_col} prediction error", xaxis=dict(title="Error"))

        fig = go.Figure(data=data, layout=layout)
        return fig

## Variables

In [13]:
date_column = "LastDateFiscalWeek"
columns = ["LastDateFiscalWeek", "SoldVolume"]

## Diário
path, prefix = "Daily-Datasets/", "Daily-FactSales-"
periods, freq = 30, "D"

# ## Semanal
# path, prefix = "Weekly-Datasets/", "Weekly-FactSales-"
# periods, freq = 4, "W"

interval_width = 0.8


def prepare(csv):
    ## Dataset
    df = pd.read_csv(f"{path}{prefix}{csv}", parse_dates=[date_column], usecols=columns)

    ## Mensal
    # df[date_column] = df[date_column].apply(lambda x: x.strftime("%Y-%m"))

    ## Preprocessing data
    dfc = df.groupby([date_column]).size().reset_index(name="Sales")
    dfg = df.groupby(by=[date_column], as_index=False)[df.drop([date_column], axis=1).columns].sum()

    df = pd.merge(dfc, dfg, how="left", on=[date_column])
    df.sort_values(by=[date_column], ascending=True, ignore_index=True, inplace=True)

    ## Prophet
    df_pr = df[columns].copy()
    df_pr.columns = ["ds","y"]

    return df_pr

## Prophet

In [75]:
df_pr = prepare("900457119.csv")

m = Prophet(growth="linear", interval_width=interval_width)
m.fit(df_pr)

future = m.make_future_dataframe(periods=periods, freq=freq, include_history=True)
forecast = m.predict(future)

## Plot
plot = Plot(df_pr[["y"]], pred=forecast, time=forecast["ds"])

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [76]:
plot.prediction(y_col="y", p_col="yhat", p_lower_col="yhat_lower", p_upper_col="yhat_upper")

In [77]:
plot.accuracy(y_col="y", p_col="yhat", p_lower_col="yhat_lower", p_upper_col="yhat_upper")

## ARIMA

In [72]:
X = df_pr["y"].values
train, test = X, X

history = [x for x in train]
predictions = []

for t in range(len(test)):
    model = ARIMA(history, order=(5,1,0))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast(alpha=0.2)
    predictions.append(list(output[0]) + list(output[2][0]))
    history.append(test[t])


predictions = pd.DataFrame(predictions, columns=["yhat", "yhat_lower", "yhat_upper"])

plot = Plot(df_pr[["y"]], pred=predictions, time=forecast["ds"])



statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:





In [73]:
plot.prediction(y_col="y", p_col="yhat", p_lower_col="yhat_lower", p_upper_col="yhat_upper")

In [74]:
plot.accuracy(y_col="y", p_col="yhat", p_lower_col="yhat_lower", p_upper_col="yhat_upper")

## Neural Prophet

In [92]:
m = NeuralProphet(growth="linear")
metrics = m.fit(df_pr, freq="D")

future = m.make_future_dataframe(df_pr, periods=periods, n_historic_predictions=len(df_pr))
forecast = m.predict(future)

INFO: nprophet.utils - set_auto_seasonalities: Disabling weekly seasonality. Run NeuralProphet with weekly_seasonality=True to override this.
INFO:nprophet.utils:Disabling weekly seasonality. Run NeuralProphet with weekly_seasonality=True to override this.
INFO: nprophet.utils - set_auto_seasonalities: Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO:nprophet.utils:Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO: nprophet.config - set_auto_batch_epoch: Auto-set batch_size to 8
INFO:nprophet.config:Auto-set batch_size to 8
INFO: nprophet.config - set_auto_batch_epoch: Auto-set epochs to 264
INFO:nprophet.config:Auto-set epochs to 264


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

INFO: nprophet - _lr_range_test: learning rate range test found optimal lr: 2.31E-01
INFO:nprophet:learning rate range test found optimal lr: 2.31E-01
Epoch[264/264]: 100%|██████████| 264/264 [00:07<00:00, 34.43it/s, SmoothL1Loss=0.0358, MAE=1.01e+4, RegLoss=0]


In [107]:
plot = Plot(df_pr[["y"]], pred=forecast["yhat1"], time=forecast["ds"])

In [108]:
plot.prediction(y_col="y", p_col="yhat1")