In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize

from src.HMM import *

In [46]:
# Load from csv and compute results
msci_data = pd.read_csv('MSCI World Index_11_22_21-12_31_93.csv')
returns = msci_data.set_index('Date').sort_index().pct_change().dropna().query('Date >= "1997-01-01" and Date <= "2015-12-31"')
log_returns = np.log(1 + returns.Close.values)[:,np.newaxis]

In [47]:
N_eff = 260
f = 1 - 1/N_eff
A = 1/N_eff

In [32]:
pi_params = np.array([[0.17/252,0.11/np.sqrt(252)],[-0.32/252,0.35/np.sqrt(252)]]) 
# r = np.random.randn(2,1)*0.01 + (1/2)
# Gamma = np.hstack([r,1-r])
Gamma = np.array([[0.99,0.01],[0.035,0.965]])
# delta_r = np.random.randn(1)*0.01 + (1/2)
# delta = np.array([delta_r[0], 1 - delta_r[0]])
delta = np.array([0.99, 0.01])
pi = [ NDist(p[0], p[1]) for p in pi_params ]

In [33]:
theta_0 = params_to_vec(delta, Gamma, pi)

In [6]:
weights = f**np.arange(log_returns.shape[0], 0, -1)

In [55]:
theta_hat = initialize_theta(50, theta_0, log_returns, N_eff, f, A)

In [54]:
theta_0 = params_to_vec(*estimate_parameters(log_returns[:N_eff], method='em'))

In [56]:
theta_hat

array([[0.4421498331588374],
       [0.6305362342087927],
       [0.9977263174567201],
       [0.002482471130195164],
       [0.01843801285874401],
       [0.0003182151703767247],
       [0.006633164019670593]], dtype=object)

In [57]:
for i in range(1,20):
#     w_score_hat, inf_hat, l_hat = estimate_weighted_score(theta_hat, N_eff+i, log_returns, f)
    delta_hat, Gamma_hat, pi_hat = vec_to_params(theta_hat)
    l_hat, score_hat, inf_hat = score_and_information(delta_hat, Gamma_hat, pi_hat, log_returns[:N_eff+i])
    theta_hat = theta_hat + A*np.linalg.inv(inf_hat)@score_hat
    

In [58]:
theta_hat

array([[0.4594296105964495],
       [0.6158304256844532],
       [0.997257969554121],
       [0.0008397316402982062],
       [0.018208752849881653],
       [0.0002806130417421476],
       [0.006729180559473825]], dtype=object)

In [None]:
%%time
start = 50
l = 0
deltaHat, GammaHat, piHat = vec_to_params(theta_0)
thetas = [theta_0.squeeze()]
T = N_eff

for t in range(1, T):    
    w = f**(T-t)
    l_t, score_t = calculate_score(deltaHat, GammaHat, piHat, log_returns[:t])
    l += w*l_t

    if t>1:
        score = score + (score_t - score)*w
        inf = inf + (1/t)*(score@score.T - inf)
    else:
        score = score_t
        inf = score@score.T
        theta_hat = theta_0

    if t > start:   
        theta_hat = theta_hat + A*np.linalg.inv(inf)@score
        thetas.append(theta_hat.squeeze())
        if np.isnan(theta_hat.astype(float)).any() or (theta_hat[4] < 0) or (theta_hat[6] < 0):
            print('NaN found')
        theta_hat[0] = min(max(theta_hat[0],0),1)
        theta_hat[1] = min(max(theta_hat[1],0),1)
        theta_hat[2] = min(max(theta_hat[2],0),1)
        theta_hat[3] = min(max(theta_hat[2],-1/252),1/252)
        theta_hat[4] = min(max(theta_hat[6],0.00001),1/np.sqrt(252))
        theta_hat[5] = min(max(theta_hat[2],-1/252),1/252)
        theta_hat[6] = min(max(theta_hat[6],0.00001),1/np.sqrt(252))
        deltaHat, GammaHat, piHat = vec_to_params(theta_hat.squeeze())


In [None]:
theta_hat

In [None]:
score, inf, l = estimate_weighted_score(theta_hat, N_eff + 1, log_returns, 1)

In [None]:
A*np.linalg.inv(inf)@score

In [None]:
calculate_score(deltaHat, GammaHat, piHat, log_returns[:N_eff+1])

In [None]:
theta_hat + A*np.linalg.inv(inf)@score

In [None]:
deltaHat

In [None]:
GammaHat

In [None]:
piHat

In [None]:
thetas

In [None]:
score

In [None]:
foo = lambda x: -log_likelihood_optim(x, log_returns[:N_eff])

In [None]:
result_nm = optimize.minimize(
        foo, 
        np.array(theta_0), 
        method='Nelder-Mead',
        bounds=[
                (0,1),
                (0,1),
                (0,1),
                (-0.05,0.05),
                (0.002,0.05),
                (-0.1,0.1),
                (0.002,0.05)
        ])

In [None]:
result_nm

In [None]:
result_tnc = optimize.minimize(
        foo, 
        np.array(theta_0), 
        method='TNC',
        bounds=[
                (0,1),
                (0,1),
                (0,1),
                (-0.05,0.05),
                (0.002,0.05),
                (-0.1,0.1),
                (0.002,0.05)
        ])

In [None]:
result_tnc

In [None]:
result_t_ex = optimize.minimize(
        foo, 
        np.array(theta_0), 
        method='trust-exact',
        jac = None,
        hess = optimize.BFGS
)

In [None]:
for i in range(260):
    delta, Gamma, pi = estimate_parameters(l_ret_t, theta_0=theta)
    calculate_score(delta, Gamma, pi, observations)

In [None]:
f**(1000)

In [None]:
theta_hat = params_to_vec(deltaHat, GammaHat, piHat)

In [12]:
theta_hat

array([[0.6175814182325154],
       [0.4854415167434831],
       [0.4047907098735073],
       [0.003968253968253968],
       [0.02287334416458487],
       [0.003968253968253968],
       [0.02287334416458487]], dtype=object)

In [None]:
w_score_0, inf_0, l_0 = estimate_weighted_score(theta_0, N_eff+1, log_returns, f)

In [None]:
A*np.linalg.inv(inf_0)@w_score_0

In [22]:
w_score_hat, inf_hat, l_hat = estimate_weighted_score(theta_hat, N_eff+1, log_returns, f)

In [None]:
l_0

In [None]:
l_hat

In [None]:
pd.DataFrame(inf_0)

In [None]:
pd.DataFrame(inf_hat)

In [13]:
A*np.linalg.inv(inf_0)@w_score_0

NameError: name 'inf_0' is not defined

In [23]:
A*np.linalg.inv(inf_hat)@w_score_hat

array([[-8.52283233e-03],
       [ 3.66934423e-03],
       [ 1.87652068e-03],
       [-5.22617193e-05],
       [-9.92255279e-05],
       [-8.10198130e-06],
       [-2.58281651e-06]])

In [None]:
A*np.linalg.inv(inf_hat)@w_score_hat

In [None]:
%%time
# theta_hat = theta_0
for i in range(N_eff+1,N_eff+6):
    score, inf, l = estimate_weighted_score(theta_hat, i, log_returns, f)
    theta_hat = theta_hat + A*np.linalg.inv(inf)@score

In [None]:
log_returns[:265]

In [None]:
theta_hat

In [None]:
theta_hat = theta_hat + A*np.linalg.inv(inf_hat)@w_score_hat

In [None]:
theta_hat

In [None]:
pi_params = np.array([[0.001062485, np.sqrt(9.378643e-05)],[-0.001423847, np.sqrt(2.266116e-05)]]) 
Gamma = np.array([[0.81923264,0.1807674],[0.05821518,0.9417848]])
delta = np.array([0.0008201211, 0.9991798789])
pi = [ NDist(p[0], p[1]) for p in pi_params ]

In [None]:
l, score, inf = score_and_information(delta, Gamma, pi, log_returns, weights=weights)

In [None]:
A*np.linalg.inv(inf)@score

In [None]:
theta_0 + A*np.linalg.inv(inf)@score

In [17]:
vec_to_params(theta_hat)

(array([0.61758142, 0.38241858]),
 array([[0.48544152, 0.51455848],
        [0.59520929, 0.40479071]]),
 [{
      "class" : "Distribution",
      "name" : "NormalDistribution",
      "parameters" : [
          0.003968253968253968,
          0.02287334416458487
      ],
      "frozen" : false
  },
  {
      "class" : "Distribution",
      "name" : "NormalDistribution",
      "parameters" : [
          0.003968253968253968,
          0.02287334416458487
      ],
      "frozen" : false
  }])