In [6]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

# Maximum Likelihood Estimation of ARMA-GARCH

One possible approach of fitting an ARMA-GARCH model is to perform a maximum likelihood estimation (MLE) for the conditional mean (ARMA), then an MLE of the conditional variance (GARCH). However, joint estimation is preferred. In the first stage of ARMA estimation, there is an implicit assumption of conditional homoskedasticity. It is contradicted in the second stage when you explicitly model conditional heteroskedasticity using GARCH.

## An Example: Normal Distribution

### Theory

Conditional log-likelihood of a normal random variable:

\begin{equation}
    L_t = - \frac{1}{2} \sum_{t=1}^T \left( \ln 2 \pi + \ln \sigma_t^2 + \left( \frac{\epsilon_t}{\sigma_t} \right)^2 \right)
\end{equation}

Recall the ARMA model,

\begin{equation}
    r_t = c + \sum_{i=1}^{p} \phi_i r_{t-i} + \sum_{i=1}^{q} \theta_i \epsilon_{t-i} + a_t,
\end{equation}

and simply rearrange:

\begin{equation}
    \epsilon_t = r_t - c - \sum_{i=1}^{p} \phi_i r_{t-i} - \sum_{i=1}^{q} \theta_i \epsilon_{t-i}.
\end{equation}

Substituting the above expression for $\epsilon_t$ as well as:

\begin{equation}
    \sigma_t^2 = \omega + \sum_{i=1}^{y} \alpha_i \epsilon_{t-i}^2 + \sum_{i=1}^{z} \beta_i \sigma_{t-i}^2,
\end{equation}

into the conditional log-likelihood expression:

\begin{equation}
    L_t = - \frac{1}{2} \sum_{t=1}^T \left( \ln 2 \pi + \ln \left( \omega + \sum_{i=1}^{y} \alpha_i \epsilon_{t-i}^2 + \sum_{i=1}^{z} \beta_i \sigma_{t-i}^2 \right)
    + \frac{\left(r_t - c - \sum_{i=1}^{p} \phi_i r_{t-i} - \sum_{i=1}^{q} \theta_i \epsilon_{t-i}\right)^2}{\omega + \sum_{i=1}^{y} \alpha_i \epsilon_{t-i}^2 + \sum_{i=1}^{z} \beta_i \sigma_{t-i}^2} \right)\\
    \\
    \omega > 0, \quad \alpha, \beta \geq 0, \quad \sum_{i=1}^{y} \alpha_i + \sum_{i=1}^{z} \beta_i < 1
\end{equation}

In [148]:
import numpy as np

def arma(phi0, phi1, theta, r):
    T = len(r)
    epsilon = np.zeros(T)
    for t in range(T):
        if t == 0:
            epsilon[t] = 0
        else:
            epsilon[t] = r[t] - phi0 - phi1*r[t-1] - theta*epsilon[t-1]
        return epsilon
    

def garch(alpha0, alpha1, beta, epsilon):
    T = len(epsilon)
    sigma_2 = np.zeros(T)
    sigma_2 = np.where(sigma_2==0, 1e-99, sigma_2)
    for t in range(T):
        if t == 0:
            sigma_2[t] = alpha0 / (1 - alpha1 - beta) # at t=0, initialize as unconditional variance
        else:
            sigma_2[t] = alpha0 + alpha1*epsilon[t-1]**2 + beta*sigma_2[t-1]
    return sigma_2

def arma_garch_loglikelihood(params, returns):
    phi0, phi1, theta, alpha0, alpha1, beta = params
    epsilon = arma(phi0, phi1, theta, returns)
    sigma_2 = garch(alpha0, alpha1, beta, epsilon)
    neg_llhs = -0.5 * (np.log(2*np.pi) + np.log(sigma_2) + (epsilon**2) / sigma_2)
    neg_llh = np.sum(neg_llhs)
    return neg_llh

def constrain0(params):
    phi0, phi1, theta, alpha0, alpha1, beta = params
    return alpha0 - 1e-99

def constrain1(params):
    phi0, phi1, theta, alpha0, alpha1, beta = params
    return alpha1

def constrain2(params):
    phi0, phi1, theta, alpha0, alpha1, beta = params
    return beta

def constrain3(params):
    phi0, phi1, theta, alpha0, alpha1, beta = params
    return -(1.0000000000000000001+alpha1+beta)

In [158]:
cons = ({'type': 'ineq', 'fun': constrain0},
        {'type': 'ineq', 'fun': constrain1},
        {'type': 'ineq', 'fun': constrain2},
        {'type': 'ineq', 'fun': constrain3})
vP0 = (0.01, 0.01, 0.01, 0.01, 0.01, 0.01)
bounds = [(0.0, None) for i in range(6)]
res = minimize(arma_garch_loglikelihood, vP0, args=(scaled_returns.values),
               bounds=bounds, constraints=cons, options={'disp':True}, method='SLSQP')

  sigma_2[t] = alpha0 + alpha1*epsilon[t-1]**2 + beta*sigma_2[t-1]
  neg_llhs = -0.5 * (np.log(2*np.pi) + np.log(sigma_2) + (epsilon**2) / sigma_2)


Inequality constraints incompatible    (Exit mode 4)
            Current function value: -23484.454951896816
            Iterations: 79
            Function evaluations: 765
            Gradient evaluations: 75
