In [None]:
from pathlib import Path
from statsmodels.tsa.vector_ar.var_model import VAR

import matplotlib.pyplot as plt
import pandas as pd
import warnings

from config.config_modeling import D
from src.data_preprocessing.data_loader import load_data, time_split

from src.modeling.multivariate_modeling import adfuller_test, grid_search_var
from src.modeling.evaluation import smape

warnings.filterwarnings("ignore")

In [None]:
DATA_DIR = (
    Path("..")
    / ".."
    / ".."
    / "hfactory_magic_folders"
    / "plastic_cost_prediction"
    / "data"
)
MAIN_FILE = "PA6_cleaned_dataset.csv"

In [None]:
df = load_data(DATA_DIR / MAIN_FILE)

### Check for stationarity

In [None]:
df_diff = df.copy()
for column, value in D.items():
    for i in range(value):
        df_diff[column] = df_diff[column].diff()

df_diff = df_diff.dropna()
df_diff

In [None]:
for name, column in df_diff.items():
    adfuller_test(column, name=name)

### VAR Modelling

In [None]:
best_order, best_sMAPE, best_MAE, best_model, best_preds = grid_search_var(
    df_diff
)

In [None]:
best_sMAPE

In [None]:
best_order

In [None]:
spl = time_split(df_diff)

# iterate over cv folds
for train_idx, test_idx in spl:
    train = df_diff.iloc[train_idx]
    test = df_diff.iloc[test_idx]

    model = VAR(train)
    model_fit = model.fit(best_order)

    # calculate smape foe each fold
    forecast_input = train.values[-best_order:]
    pred_values = model_fit.forecast(y=forecast_input, steps=len(test))
    preds = pd.DataFrame(
        pred_values, index=test.index[-len(test) :], columns=df_diff.columns
    )

    sMAPE = smape(
        test["best_price_compound"][2::3], preds["best_price_compound"][2::3]
    )
    print(sMAPE)

In [None]:
best_preds = best_preds.add_suffix("_forecast")

fig, axes = plt.subplots(
    nrows=int(len(df_diff.columns) / 2), ncols=2, dpi=150, figsize=(20, 20)
)
for i, (col, ax) in enumerate(zip(df_diff.columns, axes.flatten())):
    best_preds[col + "_forecast"].plot(legend=True, ax=ax)
    df_diff[col].plot(legend=True, ax=ax)
    ax.set_title(col + ": Forecast vs Actuals")
    ax.xaxis.set_ticks_position("none")
    ax.yaxis.set_ticks_position("none")
    ax.spines["top"].set_alpha(0)
    ax.tick_params(labelsize=6)

plt.tight_layout()