In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
# import yfinance as yf
#  from pomegranate import HiddenMarkovModel, State, NormalDistribution, GeneralMixtureModel
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy import optimize

from HMM.HMM import *

### Testing with discrete samples (known result)

In [None]:
Gamma = np.array([[0.7,0.3],[0.4,0.6]])
pi = np.array([[0.1,0.4,0.5],[0.7,0.2,0.1]])
delta = [0.6,0.4]
sequence = [0,1,0,2]

In [None]:
l = logLikelihood_discrete(delta, Gamma, pi, sequence)

In [None]:
np.exp(l)

### Load Data

In [2]:
# Using yfinance
# msci_world = yf.Ticker('URTH')
# msci_data = msci_world.history(period='max')
# msci_data.drop_duplicates(inplace=True)

# Loading from csv
msci_data = pd.read_csv('MSCI World Index_11_22_21-12_31_93.csv')

In [80]:
log_returns = np.log(1 + msci_data.set_index('Date').sort_index().pct_change().dropna().query('Date >= "1997-01-01"').Close.values)

In [4]:
N_eff = 260

In [5]:
f = 1 - 1/N_eff

In [6]:
weights = f**np.arange(N_eff, 0, -1)

In [81]:
kmeans = KMeans(n_clusters=2).fit(log_returns[:,np.newaxis])

## Cell below can be used to guess mean and std dev of 2 states

In [82]:
pi = []
for i in range(kmeans.n_clusters):
    X = log_returns[np.where(kmeans.labels_ == i)[0]]
    pi.append(NDist(kmeans.cluster_centers_[i][0],X.std()))

In [83]:
pi_params = np.array([[0.05/252,0.11/np.sqrt(252)],[-0.1/252,0.3/np.sqrt(252)]]) 

In [84]:
pi = [NDist(p[0],p[1]) for p in pi_params]

In [85]:
r = np.random.randn(2,1)*0.01 + (1/2)
Gamma = np.hstack([r,1-r])
delta = np.random.randn(2,1)*0.01 + (1/2)

In [86]:
l = log_likelihood(delta, Gamma, pi, log_returns[:N_eff], weights=weights)

In [87]:
l

8.482320928868047

In [88]:
theta_0 = [delta[0][0],Gamma[0,1],Gamma[1,0],pi_params[0][0],pi_params[0][1],pi_params[1][0],pi_params[1][1]]

In [89]:
theta_0

[0.5077055826569,
 0.5089675748916382,
 0.49371308141923975,
 0.0001984126984126984,
 0.006929348671835832,
 -0.0003968253968253968,
 0.01889822365046136]

In [93]:
foo = lambda x: -log_likelihood_optim(x,log_returns)

In [94]:
theta = optimize.minimize(foo, np.array(theta_0), method='Nelder-Mead',bounds=[(0,1),(0,1),(0,1),(-0.05,0.05),(0.002,0.05),(-0.1,0.1),(0.002,0.05)])

  _lambda[j, t] /= Lambda[t-1]


In [95]:
theta.x

array([ 1.71886280e-03,  1.00000000e+00,  1.56178938e-01,  8.75618896e-04,
        2.00000000e-03, -2.09113589e-03,  3.05494832e-03])

In [96]:
theta_0_prime = [0.7,0.99,0.95,0.001,np.sqrt(1/252)*0.11,-0.005,np.sqrt(1/252)*0.3]

In [None]:
theta_0_prime

In [99]:
foo_prime = lambda x: -log_likelihood_optim(x,log_returns[:N_eff], weights=weights)

In [100]:
theta_prime = optimize.minimize(foo_prime, np.array(theta_0),method='Nelder-Mead', bounds=[(0,1),(0,1),(0,1),(-0.05,0.05),(0.002,0.05),(-0.1,0.1),(0.002,0.05)])

In [109]:
theta_prime.x[4]**2

4e-06

In [None]:
logLikelihood_optim(theta_prime.x, log_returns[:N_eff], weights=weights)

In [None]:
msci_data.Close.pct_change()[:N_eff].hist()

### Calculating forecasts according to HMM

In [None]:
K = 100

Pi = np.eye(2)
for y_t in log_returns[:K]:
    P_t = np.diag([d1.probability(y_t),d2.probability(y_t)])
    Pi = Pi@(Gamma@P_t)

alpha = (delta.T@Pi/((delta.T@Pi).sum())).T

alphas = np.zeros((K,2))
alphas[0] = alpha.T
for i in range(1, K):
    alphas[i] = alphas[i-1]@Gamma

means = np.array([[d1.parameters[0]],[d2.parameters[0]]])
sigmas = np.array([[d1.parameters[1]],[d2.parameters[1]]])

mu = alphas@means

sigma_sqd = alphas@(means**2+sigmas**2) + mu**2

forecasted_mean = np.exp(mu+sigma_sqd/2)-1

forecasted_var = (np.exp(sigma_sqd)-1)*np.exp(2*mu+sigma_sqd)

forecasted_mean

In [None]:
for t in range(T):
    length N_eff = 260
    estimate parameters based of data 
    calculate forecast
    solve using corbin's code
    calc new portfolio