In [1]:
from datetime import datetime
import numpy as np
import pandas as pd
from scipy.stats import norm
import scipy.optimize as opt

import yfinance as yf
#from vol_model.volatility_model import VolatilityModel

In [None]:
class GARCH:
    '''
    This class defines the GARCH model object which contains, functions
    for estimation and VaR forecasting.
    '''

    def __init__(self, params=None, mu=None):
        # Initialize parameters
        if (params != None):
            self.params = np.array(params)
        else:
            self.params = np.array([1.e-06, 0.09, 0.9])
        if (mu == None):
            self.mu = 0
        else:
            self.mu = mu

    def __repr__(self):
        return "omega = {:.3g}\nalpha = {:.3g}\nbeta  = {:.3g}".format(*self.params)

    def train(self, init_params, y, x=None, callback_func=None):
        self.n_obs = len(y)
        #self.start_date = str(y.index[0])
        #self.end_date = str(y.index[-1])
        opt_result = opt.minimize(self.log_likelihood,
                           x0=self.inv_repam(init_params),
                           args=(y, x, True),  # arguments for function to be minimized (y, fmin=True)
                           method='BFGS',
                           callback=callback_func,
                           options={'maxiter': 100})
        self.params = self.repam(opt_result.x)
        print('\nResults of BFGS minimization\n{}\n{}'.format(''.join(['-']*28), opt_result))
        print('\nResulting params = {}'.format(self.params))


    def log_likelihood(self, params_repam, y, x=None, fmin=False):
        '''
        Takes the reparametrized 3X1 numpy array gamma = log((omega,alpha,beta))
        as input (if given or else uses the ones in self namespace).
        And returns either sum of all likelihood contributions that is a 1X1
        numpy array or both the likelihood and the (t_max,) numpy array of estimated conditional variances.
        '''
        self.params = self.repam(params_repam)
        omega = self.params[0]
        alpha = self.params[1]
        beta = self.params[2]
        if x is not None:
            gammas = self.params[3:]

        t_max = len(y)
        avg_log_like = 0
        sigma2 = np.zeros(t_max + 1)
        sigma2[0] = np.var(y)
        for t in range(1, t_max):
            if x is not None:
                sigma2[t] = omega + alpha * y[t - 1] ** 2 + beta * sigma2[t - 1] + (gammas * x[t]).sum() 
            else:
                sigma2[t] = omega + alpha * y[t - 1] ** 2 + beta * sigma2[t - 1]
            avg_log_like += (np.log (sigma2[t]) + (y[t] - self.mu)**2 / sigma2[t]) / t_max
        if fmin:
            return avg_log_like
        else:
            return [avg_log_like, sigma2]

    def filter(self, y):
        omega = self.params[0]
        alpha = self.params[1]
        beta = self.params[2]

        t_max = len(y)
        sigma2 = np.zeros(t_max + 1)
        sigma2[0] = np.var(y)
        for t in range(1, t_max):
            sigma2[t] = omega + alpha * y[t - 1] ** 2 + beta * sigma2[t - 1]
        return sigma2

    def repam(self, params_repam):
        return np.exp(params_repam)

    def inv_repam(self, params):
        return np.log(params)

    def VaR(self, y, pct=(0.01, 0.025, 0.05)):
        est_variance = self.log_likelihood(y=y, fmin=False)[1]
        VaR = {}
        for alpha in pct:
            VaR[str(alpha)] = self.mu + norm.ppf(alpha) * np.sqrt(est_variance)
        return VaR

In [4]:
# Step 1: Download S&P 500 data for the last year
start = datetime(2010, 1, 1)
end = datetime(2024, 9, 10)
snp = yf.Ticker('^GSPC')
data = snp.history(start=start, end=end)

# Step 2: Compute log returns
data['Log_Returns'] = np.log(data['Close'] / data['Close'].shift(1)).dropna()
log_returns = data['Log_Returns'].dropna()

data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Log_Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04 00:00:00-05:00,1116.560059,1133.869995,1116.560059,1132.98999,3991400000,0.0,0.0,
2010-01-05 00:00:00-05:00,1132.660034,1136.630005,1129.660034,1136.52002,2491020000,0.0,0.0,0.003111
2010-01-06 00:00:00-05:00,1135.709961,1139.189941,1133.949951,1137.140015,4972660000,0.0,0.0,0.000545
2010-01-07 00:00:00-05:00,1136.27002,1142.459961,1131.319946,1141.689941,5270680000,0.0,0.0,0.003993
2010-01-08 00:00:00-05:00,1140.52002,1145.390015,1136.219971,1144.97998,4389590000,0.0,0.0,0.002878


In [134]:
# Load the data
sentiment_df = pd.read_csv('../data/nyt_sentiment.csv')
sentiment_df.index = pd.DatetimeIndex(sentiment_df['adjusted_date'])

In [135]:
data.index = pd.DatetimeIndex(data.index.tz_localize(None))
data_with_sentiment = data.join(sentiment_df, how='inner')

log_returns = data_with_sentiment['Log_Returns']
exo_sentiment = data_with_sentiment[['mean_pos_sentiment',	'mean_neg_sentiment', 'mean_neutral_sentiment']].to_numpy()

In [5]:
garch_baseline = GARCH()
garch_baseline.train([0.5]*3, 100*log_returns)

  sigma2[t] = omega + alpha * y[t - 1] ** 2 + beta * sigma2[t - 1]
  avg_log_like += (np.log (sigma2[t]) + y[t]**2 / sigma2[t]) / t_max



Results of BFGS minimization
----------------------------
  message: Optimization terminated successfully.
  success: True
   status: 0
      fun: 0.7441101867261453
        x: [-3.359e+00 -1.857e+00 -2.062e-01]
      nit: 17
      jac: [ 2.764e-06 -5.364e-07  1.013e-06]
 hess_inv: [[ 3.231e+01  1.040e+01 -3.396e+00]
            [ 1.040e+01  1.591e+01 -2.801e+00]
            [-3.396e+00 -2.801e+00  6.324e-01]]
     nfev: 96
     njev: 24

Resulting params = [0.0347648  0.15615129 0.81364313]


In [None]:
garch_with_sentiment = GARCH()
garch_with_sentiment.train([0.5]*6, 100*log_returns, exo_sentiment)

In [6]:
from arch import arch_model

model = arch_model(100*log_returns, vol='GARCH', mean='Zero', p=1, q=1)
garch_fit = model.fit(disp='off')

In [7]:
garch_fit.summary()

0,1,2,3
Dep. Variable:,Log_Returns,R-squared:,0.0
Mean Model:,Zero Mean,Adj. R-squared:,0.0
Vol Model:,GARCH,Log-Likelihood:,-4768.75
Distribution:,Normal,AIC:,9543.51
Method:,Maximum Likelihood,BIC:,9562.15
,,No. Observations:,3694.0
Date:,"Sat, Nov 02 2024",Df Residuals:,3694.0
Time:,13:31:21,Df Model:,0.0

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
omega,0.0348,7.386e-03,4.714,2.427e-06,"[2.034e-02,4.930e-02]"
alpha[1],0.1563,1.910e-02,8.184,2.745e-16,"[ 0.119, 0.194]"
beta[1],0.8135,1.971e-02,41.265,0.000,"[ 0.775, 0.852]"
