# 04 â€” ARIMA Fit + Residual Extraction
Fit selected ARIMA models and compute residuals: (observed correlation - ARIMA prediction).

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

corr_df = pd.read_parquet("../data/processed/rolling_corr_sample.parquet")
orders_df = pd.read_csv("../data/processed/arima_orders_sample.csv")
orders_df.head()

In [None]:
# Fit + residuals for each series
def fit_arima_and_residuals(y, order):
    y = pd.Series(y).dropna()
    model = sm.tsa.ARIMA(y, order=order)
    res = model.fit()
    pred = res.predict(start=y.index[0], end=y.index[-1])
    resid = y - pred
    return pred, resid

resid_df = pd.DataFrame(index=corr_df.index)
pred_df  = pd.DataFrame(index=corr_df.index)

for _, row in orders_df.iterrows():
    series = row["series"]
    if pd.isna(row["p"]):
        continue
    order = (int(row["p"]), int(row["d"]), int(row["q"]))
    try:
        pred, resid = fit_arima_and_residuals(corr_df[series], order)
        pred_df[series] = pred.reindex(corr_df.index)
        resid_df[series] = resid.reindex(corr_df.index)
    except Exception:
        continue

resid_df = resid_df.dropna()
pred_df  = pred_df.loc[resid_df.index, resid_df.columns]
print(resid_df.shape, pred_df.shape)

In [None]:
resid_df.to_parquet("../data/processed/arima_residuals_sample.parquet")
pred_df.to_parquet("../data/processed/arima_pred_sample.parquet")