In [None]:
import matplotlib.pyplot as plt

import constants.constants as cst
from src.preprocessing import preprocess_data
from src.utils.load_data import load_data
from src.utils.plot_predictions import plot_predictions

# Predictions 

## Data Loading

In [None]:
train, test, submission = load_data()

In [None]:
train = preprocess_data(train)

## [Stats Model](https://www.statsmodels.org/stable/index.html)

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf

### ARIMA model

In [None]:
from statsmodels.tsa.arima.model import ARIMA

In [None]:
endog = train[cst.O3].tail(500)

In [None]:
plot_pacf(endog, lags=40, method="ywm")
plt.title("Partial Autocorrelation Function (PACF)")
plt.show()

In [None]:
p = 2
d = 0
q = 2

In [None]:
model = ARIMA(endog, order=(p, d, q))
fitted_model = model.fit()
print(fitted_model.summary())

In [None]:
forecast = fitted_model.get_forecast(steps=len(test))
forecast_df = forecast.predicted_mean.to_frame().reset_index(drop=True)

forecast_df[cst.DATE] = test[cst.RAW_DATE]
forecast_df = forecast_df.rename(columns={"predicted_mean": endog.name})

In [None]:
forecast_df

In [None]:
plot_predictions(train, forecast_df, endog.name)

In [None]:
# 1) Model summary & params
print(fitted_model.summary())

# 2) Are parameters statistically significant?
print(fitted_model.params)
print(fitted_model.bse)  # std errors

# 3) Residual diagnostics
fitted_model.plot_diagnostics(figsize=(10, 8))
plt.show()

# 4) Check last training values
print(endog.tail(10))

# 5) Stationarity test (maybe you don't need d=1)
from statsmodels.tsa.stattools import adfuller

stat, pvalue, *_ = adfuller(endog.dropna())
print("ADF p-value:", pvalue)
