In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns

import scipy.stats
from scipy.optimize import minimize, least_squares
from scipy.stats import chi2, norm
import time

from tqdm import tqdm
import warnings
warnings.simplefilter("ignore")

In [33]:
 pip install numdifftools



In [34]:
import numdifftools as nd

# Stock C

In [35]:
C = pd.read_csv('StockC.dat', header=None,sep=' ', skipinitialspace=True)
C = C.apply(np.log)
C.head()

Unnamed: 0,0
0,3.853334
1,3.859677
2,3.861782
3,3.863883
4,3.861782


In [36]:
rC = (C.shift(-1) - C).dropna().values # returns for stock C
rC

array([[ 0.00634252],
       [ 0.00210526],
       [ 0.00210084],
       ...,
       [-0.009792  ],
       [ 0.009792  ],
       [-0.02092384]])

In [37]:
E_loge2 = norm.expect(lambda x: np.log(x ** 2))
TS = rC

In [38]:
# likelihood function
def LA(param):
    m,w,b,a = param
    T = len(TS)
    eta = TS - m # error term
    logst2 = np.zeros(T)
    logst2[0] = (w + a * E_loge2) / (1 - a - b) # initial value
    for t in range(1, T):
        if eta[t - 1] ** 2 == 0:
            logst2[t] = w + b * logst2[t - 1] + a * np.log(eta[t - 1] ** 2 + 1e-100)
        else:
            logst2[t] = w + b * logst2[t - 1] + a * np.log(eta[t - 1] ** 2)

    lsum=0
    for t in range(T):
        lsum += (1/2)* (logst2[t] + (eta[t] ** 2) * np.exp(-logst2[t]))
    return lsum/T


def LA_score(param): # function for score calculation
    m,w,b,a = param
    T = len(TS)
    eta = TS - m #  error term
    logst2 = np.zeros(T)
    logst2[0] = (w + a * E_loge2) / (1 - a - b) # initial value
    for t in range(1, T):
        if eta[t - 1] ** 2 == 0:
            logst2[t] = w + b * logst2[t - 1] + a * np.log(eta[t - 1] ** 2 + 1e-100)
        else:
            logst2[t] = w + b * logst2[t - 1] + a * np.log(eta[t - 1] ** 2)

    lsc=np.zeros(T)
    for t in range(T):
        lsc[t] = (1/2)* (logst2[t] + (eta[t] ** 2) / np.exp(logst2[t]))
    lsc=-lsc
    return lsc

In [39]:
# numerical optimizer
result = minimize(
    LA,
    x0 = (0.5, 0.5, 0.5, 0.5),
    method='nelder-mead', # just the first one in the documentation) I also tried couple of
    # others, but they gave too much NaNs.
    options={'xtol': 1e-20}
)

In [40]:
estimates = result.x
estimates

array([-0.00137448,  0.30304633,  0.87277186,  0.13849267])

In [41]:
# covariance matrices
s = nd.Gradient(LA_score)(est) # score function
s_derivative = nd.Hessian(LA)(est) # score derivative

V1 = np.linalg.inv(s_derivative)
V2 = np.linalg.inv((s[:, :, None] @ s[:, None, :]).mean(axis=0))
V3 = np.linalg.inv(s_derivative) @ (s[:, :, None] @ s[:, None, :]).mean(axis=0) @ np.linalg.inv(s_derivative)

In [42]:
# standard errors
se1 = np.round(np.sqrt(np.diag(V1) / len(TS)),5) # from info matrix
se2 = np.round(np.sqrt(np.diag(V2) / len(TS)),5) # from square of scores
se3 = np.round(np.sqrt(np.diag(V3) / len(TS)),5) # robust standard error estimate

# Stock T

In [43]:
TT = pd.read_csv('StockT.dat', header=None,sep=' ', skipinitialspace=True)
TT = TT.apply(np.log)
TT.head()

Unnamed: 0,0
0,3.206398
1,3.204777
2,3.203965
3,3.201119
4,3.201933


In [44]:
# returns
rT = (TT.shift(-1) - TT).dropna().values
rT

array([[-0.0016214 ],
       [-0.00081169],
       [-0.00284611],
       ...,
       [ 0.00647251],
       [-0.00647251],
       [ 0.00647251]])

In [45]:
# calculating estimates
TS = rT

def LA(param):
    m,w,b,a = param
    T = len(TS)
    eta = TS - m
    logst2 = np.zeros(T)
    logst2[0] = (w + a * E_loge2) / (1 - a - b)
    for t in range(1, T):
        if eta[t - 1] ** 2 == 0:
            logst2[t] = w + b * logst2[t - 1] + a * np.log(eta[t - 1] ** 2 + 1e-100)
        else:
            logst2[t] = w + b * logst2[t - 1] + a * np.log(eta[t - 1] ** 2)
    lsum=0
    for t in range(T):
        lsum += (1/2)* (logst2[t] + (eta[t] ** 2) * np.exp(-logst2[t]))
    return lsum/T

def LA_score(param):
    m,w,b,a = param
    T = len(TS)
    eta = TS - m
    logst2 = np.zeros(T)
    logst2[0] = (w + a * E_loge2) / (1 - a - b)
    for t in range(1, T):
        if eta[t - 1] ** 2 == 0:
            logst2[t] = w + b * logst2[t - 1] + a * np.log(eta[t - 1] ** 2 + 1e-100)
        else:
            logst2[t] = w + b * logst2[t - 1] + a * np.log(eta[t - 1] ** 2)
    lsc=np.zeros(T)
    for t in range(T):
        lsc[t] = (1/2)* (logst2[t] + (eta[t] ** 2) / np.exp(logst2[t]))
    lsc=-lsc
    return lsc

In [46]:
result = minimize(
    LA,
    x0 = (0.06, -0.02, 0.3, 0.4),
    method='nelder-mead',
    options={'xtol': 1e-20}
)

In [47]:
estimatestT = result.x
estimatestT

array([ 6.81566886e-03, -2.36932856e-04,  8.45375821e-01,  1.38800267e-01])

In [48]:
# covariance matrices
s = nd.Gradient(LA_score)(estimatestT) # score function
s_derivative = nd.Hessian(LA)(estimatestT) # score derivative

In [49]:
V1 = np.linalg.inv(s_derivative)
V2 = np.linalg.inv((s[:, :, None] @ s[:, None, :]).mean(axis=0))
V3 = np.linalg.inv(s_derivative) @ (s[:, :, None] @ s[:, None, :]).mean(axis=0) @ np.linalg.inv(s_derivative)

In [50]:
# standard errors
se1 = np.round(np.sqrt(np.diag(V1) / len(TS)),5)
se2 = np.round(np.sqrt(np.diag(V2) / len(TS)),5)
se3 = np.round(np.sqrt(np.diag(V3) / len(TS)),5)

# Comparison

In [51]:
df_C = pd.DataFrame(np.stack([estimates, se1, se2, se3]),
             columns=["m", "w", "b", "a"],
             index=["Estimates", "se1", "se2", "se3"]).T
df_T = pd.DataFrame(np.stack([est, se1, se2, se3]),
             columns=["m", "w", "b", "a"],
             index=["Estimates", "se1", "se2", "se3"]).T

In [52]:
comp = pd.concat([df_C, df_T], axis=1, keys=['C', 'T'])
comp

Unnamed: 0_level_0,C,C,C,C,T,T,T,T
Unnamed: 0_level_1,Estimates,se1,se2,se3,Estimates,se1,se2,se3
m,-0.001374,0.00357,0.00015,0.08802,-0.001374,0.00357,0.00015,0.08802
w,0.303046,,0.01198,0.0,0.303046,,0.01198,0.0
b,0.872772,0.0,0.00217,0.0,0.872772,0.0,0.00217,0.0
a,0.138493,0.00021,0.00164,0.00085,0.138493,0.00021,0.00164,0.00085


Overall, estimates alpha and beta are positive (significantly).Here SE_3 appear to be robust, but still there is difference with SE_1 and 2. Moreover, numerical method appears to be sensitive to the chosen method as well as starting points.