In [9]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.api import OLS
from statsmodels.regression.linear_model import RegressionResultsWrapper
from arch import arch_model

with open("../output/significant_alphas_list.txt", "r") as f:
    SIGNIFICANT_ALPHAS_LIST: list[str] = f.read().splitlines()

# data
test_ticker: str = "BLDR"

test_returns: pd.Series = pd.read_csv("../output/stock_returns.csv", index_col=0, parse_dates=True)[test_ticker].dropna()
factor_returns: pd.DataFrame = pd.read_csv("../output/factor_returns.csv", index_col=0, parse_dates=True).drop(columns=["rf"])
alphas: pd.DataFrame = pd.read_csv(f"../output/alphas/{test_ticker}.csv", index_col=0, parse_dates=True)[SIGNIFICANT_ALPHAS_LIST]

# concat data to make sure they are aligned
concat_data = pd.concat([test_returns, factor_returns, alphas], axis=1, join="inner")
test_returns = concat_data.iloc[:, 0]
test_residuals = concat_data.iloc[:, 1]
factor_returns = concat_data.iloc[:, 2:2 + len(factor_returns.columns)]
alphas = concat_data.iloc[:, 2 + len(factor_returns.columns):]


## Residual against alphas OLS

- residuals: t
- alphas: t-1

## Find Best ARMA model

loop p: 0-5  
loop q: 0-5  
    fit ARMA(p,q) to residuals  
    save AIC, BIC, HQIC  
end loop  

select best model based on AIC, BIC, HQIC

In [10]:
# import ARMA
from statsmodels.tsa.arima.model import ARIMA

def get_p_q(y: pd.Series, X: pd.DataFrame) -> tuple[int, int]:
    """
    Get p and q for GARCH model
    """
    AICs = []
    for p in range(1, 4):
        for q in range(1, 4):
            try:
                model = ARIMA(y, order=(p, 0, q), exog=X)
                model_fit = model.fit()
                AICs.append((p, q, model_fit.aic))
            except:
                continue
    # return the p and q with the lowest AIC
    
    p, q, _ = min(AICs, key=lambda x: x[2])
    return p, q