# Univariate Models

In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd

from src.data_preprocessing.data_loader import load_data
from src.modeling.univariate_modeling import get_arima_model, get_ets_model
from src.modeling.evaluation import smape, mae

In [None]:
DATA_DIR = (
    Path("..")
    / ".."
    / ".."
    / "hfactory_magic_folders"
    / "plastic_cost_prediction"
    / "data"
)
MAIN_FILE = "PA6_cleaned_dataset.csv"

In [None]:
df = load_data(DATA_DIR / MAIN_FILE)

In [None]:
models = get_arima_model(df)

In [None]:
ets_models = get_ets_model(df)

In [None]:
for (col_ets, ets_model), (col, model) in zip(
    ets_models.items(), models.items()
):
    # ETS model
    series_ets = df[col_ets].copy().dropna()
    y_true_ets = series_ets
    y_pred_ets = ets_model.fittedvalues

    print(f"*** {col_ets} - ETS Model ***")
    print(f"sMAPE: {smape(y_true_ets.values, y_pred_ets.values):.2f}%")
    print(f"MAE: {mae(y_true_ets.values, y_pred_ets.values):.2f}")

    # Model summary
    print(ets_model.summary())

    # Plotting the original series and the fitted values
    plt.figure(figsize=(10, 6))
    plt.plot(y_true_ets.index, y_true_ets, label="Actual Series")
    plt.plot(
        y_true_ets.index, y_pred_ets, label="Fitted Values", linestyle="dashed"
    )
    plt.xlabel("Time")
    plt.ylabel("Values")
    plt.legend()
    plt.title(f"{col_ets} - Actual vs Fitted Values (ETS Model)")
    plt.show()

    # Line plot of residuals
    residuals_ets = y_true_ets - y_pred_ets
    plt.figure(figsize=(10, 4))
    plt.plot(y_true_ets.index, residuals_ets, label="Residuals")
    plt.axhline(
        0, color="red", linestyle="--", linewidth=2, label="Zero Residuals"
    )
    plt.xlabel("Time")
    plt.ylabel("Residuals")
    plt.legend()
    plt.title(f"{col_ets} - Residuals (ETS Model)")
    plt.show()

    print("--------------------------------")

    # ARIMA model
    d = model.model.order[1]
    y_true_arima = df[col].dropna().iloc[d:]
    time_arima = y_true_arima.index
    y_pred_arima = model.fittedvalues.iloc[d:]

    print(f"*** {col} - ARIMA Model ***")
    print(f"sMAPE: {smape(y_true_arima.values, y_pred_arima.values):.2f}%")
    print(f"MAE: {mae(y_true_arima.values, y_pred_arima.values):.2f}")

    # Model summary
    print(model.summary())

    # Plotting the original series and the fitted values
    plt.figure(figsize=(10, 6))
    plt.plot(time_arima, y_true_arima, label="Actual Series")
    plt.plot(
        time_arima, y_pred_arima, label="Fitted Values", linestyle="dashed"
    )
    plt.xlabel("Time")
    plt.ylabel("Values")
    plt.legend()
    plt.title(f"{col} - Actual vs Fitted Values (ARIMA Model)")
    plt.show()

    # Line plot of residuals
    residuals_arima = pd.DataFrame(model.resid)
    residuals_arima.plot()
    plt.title(f"{col} - Residuals (ARIMA Model)")
    plt.show()

    print("--------------------------------")