## Initial Setting

In [2]:
import numpy as np
import pandas as pd
import msm
import sklearn.metrics
from simulatedata import simulatedata
from simulatedata_initial_state import simulatedata_with_initial_prob
from tqdm.notebook import tqdm

from arch import arch_model
from random import gauss

## Data Selection

Pour DEXJPUS.csv

In [3]:
dat2 = pd.read_csv("DEXJPUS.csv")                                   
dat2 = dat2.loc[dat2.DEXJPUS != "."].DEXJPUS.astype(float)
dat2 = np.array(dat2)
dat2_rtn = dat2[0:-1]
dat2 = np.log(dat2[1:])-np.log(dat2[0:-1])
#dat2 = dat2[dat2 != 0]
dat2 = dat2[:,np.newaxis]
dat2_rtn = dat2_rtn[:,np.newaxis]
T = len(dat2)
E = np.rint(0.6*T).astype(int)
dat2E = dat2[0:E,]
dat2F = dat2[E:,]
dat2_rtnE = dat2_rtn[0:E,]
dat2_rtnF = dat2_rtn[E:,]
dat2E.shape, dat2F.shape

((4379, 1), (2919, 1))

Pour IBM

In [4]:
symbol = "IBM"
frequence = 15

pathName = 'C:\\users\\hp\\MSM_dossier_sheng\\'+symbol+'\\'+str(frequence)+'min\\'

dat2 = pd.read_csv(pathName + "merge_res.csv")
dat2 = dat2.sort_values("time",ascending=True)
dat2 = dat2.loc[dat2.close != "."].close.astype(float)
dat2 = np.array(dat2)
dat2_rtn = dat2[0:-1]
dat2 = np.log(dat2[1:])-np.log(dat2[0:-1])
#dat2 = dat2[dat2 != 0]
dat2 = dat2[:,np.newaxis]
dat2_rtn = dat2_rtn[:,np.newaxis]
T = len(dat2)
E = np.rint(0.6*T).astype(int)
dat2E = dat2[0:E,]
dat2F = dat2[E:,]
dat2_rtnE = dat2_rtn[0:E,]
dat2_rtnF = dat2_rtn[E:,]
dat2E.shape, dat2F.shape


((6334, 1), (4223, 1))

## Estimate MSM Parameter

In [5]:
data = dat2E               # Simulated dta
kbar = 4
niter = 1
temperature = 1.0
stepsize = 1.0

parameters, LL, niter, output = msm.glo_min(kbar, data, niter, temperature, stepsize)

# name parameters for later use:
b_sim = parameters[0]
m_0_sim = parameters[1]
gamma_kbar_sim = parameters[2]
sigma_sim = parameters[3]
LL_sim = LL

print("Parameters from glo_min for Simulated dataset: ", "\n"
      "kbar = ", kbar,"\n"
      'b = %.5f' % b_sim,"\n"
      'm_0 = %.5f' % m_0_sim,"\n"
      'gamma_kbar = %.5f' % gamma_kbar_sim,"\n"
      'sigma = %.5f' % (sigma_sim*np.sqrt(252)),"\n"
      'Likelihood = %.5f' % LL_sim,"\n"
      "niter = " , niter,"\n"
      "output = " , output,"\n")

Parameters from glo_min for Simulated dataset:  
kbar =  4 
b = 1.98396 
m_0 = 1.61340 
gamma_kbar = 0.47534 
sigma = 0.03948 
Likelihood = -31531.71655 
niter =  1 
output =  ['requested number of basinhopping iterations completed successfully'] 



In [6]:
b = parameters[0]
m0 = parameters[1]
gamma_kbar = parameters[2]
sigma = parameters[3]

theta_in = [b, gamma_kbar, sigma]

## Analysis MSM Binomial

### in sample

In [7]:
def r2p(p,y_pred,delta):
    P = np.zeros(y_pred.shape)
    P = P[delta:]
    for i in range(P.shape[0]):
        P[i,0] = p[i,0]
        for k in range(delta):
            P[i,0]*=np.exp(y_pred[i+k,0])
    return P

In [8]:
delta = 1
x = dat2E[:-delta,]
y = dat2E[delta:,]
p = dat2_rtnE[delta:,]
R2 = 0
MSE = 0
MAE = 0
n = 100

init = simulatedata(b,m0,gamma_kbar,sigma,kbar,delta)
for i in tqdm(range(n)):
    y_pred = np.zeros((E,1))
    for j in range(E-delta):
        if j<delta:
            y_pred[j,0] = init[j]
        else:
            y_pred[j,0] = simulatedata(b,m0,gamma_kbar,sigma,kbar,delta)[-1]
    #y_pred = simulatedata(b,m0,gamma_kbar,sigma,kbar,E)
    p_pred = r2p(p,y_pred,delta)
    y_pred = y_pred[delta:,]
    R2 += sklearn.metrics.r2_score(y,y_pred)
    MSE += sklearn.metrics.mean_squared_error(p,p_pred)
    MAE += sklearn.metrics.mean_absolute_error(p,p_pred)
    

print("The R2 value of " , delta , " day(s) forcast in log return is: " , R2/n)
print("The MSE value of " , delta , " day(s) forcast in price is: " , MSE/n)
print("The MAE value of " , delta , " day(s) forcast in price is: " , MAE/n)

  0%|          | 0/100 [00:00<?, ?it/s]

The R2 value of  1  day(s) forcast in log return is:  -3.5772598018374167
The MSE value of  1  day(s) forcast in price is:  0.33964936402270013
The MAE value of  1  day(s) forcast in price is:  0.4128470118914808


### out of sample

In [9]:
delta = 1
x = dat2F[:-delta,]
y = dat2F[delta:,]
p = dat2_rtnF[delta:,]
n=100

R2 = 0
MSE = 0
MAE = 0

for i in tqdm(range(n)):
    y_pred = simulatedata(b,m0,gamma_kbar,sigma,kbar,T-E)
    p_pred = r2p(p,y_pred,delta)
    y_pred = y_pred[delta:,]
    R2 += sklearn.metrics.r2_score(y,y_pred)
    MSE += sklearn.metrics.mean_squared_error(p,p_pred)
    MAE += sklearn.metrics.mean_absolute_error(p,p_pred)
    

print("The R2 value of " , delta , " day(s) forcast in log return is: " , R2/n)
print("The MSE value of " , delta , " day(s) forcast in price is: " , MSE/n)
print("The MAE value of " , delta , " day(s) forcast in price is: " , MAE/n)

  0%|          | 0/100 [00:00<?, ?it/s]

The R2 value of  1  day(s) forcast in log return is:  -0.5494558365052322
The MSE value of  1  day(s) forcast in price is:  0.1028215916093434
The MAE value of  1  day(s) forcast in price is:  0.2046359816322577


## Analysis MSM initial state

### in sample

In [10]:
x = dat2E[:-delta,]
probs = []

for j in tqdm(range(E)):
    probs.append(msm.g_pi_t(m0, kbar, x[:j+1,], theta_in))

  0%|          | 0/6334 [00:00<?, ?it/s]

In [11]:
delta = 1
x = dat2E[:-delta,]
y = dat2E[delta:,]
p = dat2_rtnE[delta:,]

R2 = 0
MSE = 0
MAE = 0
n = 100

init = simulatedata_with_initial_prob(b,m0,gamma_kbar,sigma,kbar,probs[0],delta)
for i in tqdm(range(n)):
    y_pred = np.zeros((E,1))
    for j in range(E-delta):
        if j<delta:
            y_pred[j,0] = init[j]
        else:
            y_pred[j,0] = simulatedata_with_initial_prob(b,m0,gamma_kbar,sigma,kbar,probs[j-delta],delta)[-1]

    p_pred = r2p(p,y_pred,delta)
    y_pred = y_pred[delta:,]
    R2 += sklearn.metrics.r2_score(y,y_pred)
    MSE += sklearn.metrics.mean_squared_error(p,p_pred)
    MAE += sklearn.metrics.mean_absolute_error(p,p_pred)


print("The R2 value of " , delta , " day(s) forcast in log return is: " , R2/n)
print("The MSE value of " , delta , " day(s) forcast in price is: " , MSE/n)
print("The MAE value of " , delta , " day(s) forcast in price is: " , MAE/n)

  0%|          | 0/100 [00:00<?, ?it/s]

The R2 value of  1  day(s) forcast in log return is:  -2.1536487705775658
The MSE value of  1  day(s) forcast in price is:  0.20463039714835507
The MAE value of  1  day(s) forcast in price is:  0.2948465898802649


### out of sample

In [12]:
x = dat2[:-delta,]
probs = []

for j in tqdm(range(T-E)):
    probs.append(msm.g_pi_t(m0, kbar, x[:E+j+1,], theta_in))

  0%|          | 0/4223 [00:00<?, ?it/s]

In [13]:
delta = 1
x = dat2F[:-delta,]
y = dat2F[delta:,]
p = dat2_rtnF[delta:,]
R2 = 0
MSE = 0
MAE = 0
n = 100

for i in tqdm(range(n)):
    y_pred = np.zeros((T-E,1))
    for j in range(T-E):
        y_pred[j,0] = simulatedata_with_initial_prob(b,m0,gamma_kbar,sigma,kbar,probs[j],1)[0]

    p_pred = r2p(p,y_pred,delta)
    y_pred = y_pred[delta:,]
    R2 += sklearn.metrics.r2_score(y,y_pred)
    print(y)
    print(y_pred)
    MSE += sklearn.metrics.mean_squared_error(p,p_pred)
    print(p)
    print(p_pred)
    MAE += sklearn.metrics.mean_absolute_error(p,p_pred)


print("The R2 value of " , delta , " day(s) forcast in log return is: " , R2/n)
print("The MSE value of " , delta , " day(s) forcast in price is: " , MSE/n)
print("The MAE value of " , delta , " day(s) forcast in price is: " , MAE/n)


  0%|          | 0/100 [00:00<?, ?it/s]

[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[ 0.00026869]
 [-0.0005451 ]
 [ 0.0008031 ]
 ...
 [ 0.00324853]
 [ 0.00186668]
 [-0.01205402]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]]
[[118.32151701]
 [118.88193849]
 [118.37545637]
 ...
 [127.68339409]
 [128.8177889 ]
 [128.63990573]]
[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[ 1.33985300e-03]
 [-4.78943228e-07]
 [-1.81880229e-04]
 ...
 [-6.93949682e-03]
 [-4.91610113e-03]
 [ 1.57260157e-06]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]]
[[118.16363864]
 [119.00934826]
 [118.43994327]
 ...
 [127.7048755 ]
 [127.51205312]
 [127.77032166]]
[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[ 0.00080025]
 [ 0.00119044]
 [ 0.00126906]
 ...
 [-0.00506316]
 [ 0.00073232]
 [-0.01943238]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]

[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[ 4.49850335e-04]
 [-8.24483051e-05]
 [ 8.61162167e-04]
 ...
 [-5.13402022e-03]
 [-5.91837257e-03]
 [-1.07085476e-03]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]]
[[117.83859369]
 [118.90347674]
 [118.43023523]
 ...
 [129.27359165]
 [127.74248111]
 [127.64232527]]
[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[-0.00107801]
 [ 0.00048014]
 [-0.000515  ]
 ...
 [-0.00277312]
 [ 0.00926136]
 [-0.00097678]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]]
[[116.36536271]
 [118.72194787]
 [118.49688108]
 ...
 [127.97317931]
 [128.04442408]
 [129.5946821 ]]
[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[ 0.00037767]
 [-0.00010661]
 [ 0.00047559]
 ...
 [-0.00937409]
 [ 0.00923208]
 [ 0.00797028]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]

[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[-0.0007441 ]
 [-0.00074749]
 [ 0.00077351]
 ...
 [-0.00213233]
 [ 0.01342787]
 [ 0.00132878]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]]
[[119.26249497]
 [118.76159665]
 [118.35149994]
 ...
 [128.43027929]
 [128.12650038]
 [130.1357659 ]]
[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[ 1.01315089e-04]
 [ 1.23296408e-04]
 [-5.81355645e-05]
 ...
 [ 5.75096296e-03]
 [ 1.38028870e-05]
 [-2.76657694e-04]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]]
[[118.24585499]
 [118.86204191]
 [118.45460413]
 ...
 [129.61883975]
 [129.14055104]
 [128.4017723 ]]
[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[-0.0003376 ]
 [ 0.00061555]
 [-0.00060623]
 ...
 [-0.00177654]
 [-0.00639273]
 [-0.0141216 ]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]

[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[-6.92700114e-04]
 [ 2.19477508e-05]
 [ 1.14964997e-03]
 ...
 [ 1.03947745e-02]
 [-8.65691585e-03]
 [ 2.05716952e-03]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]]
[[118.69229885]
 [118.7677011 ]
 [118.44259952]
 ...
 [128.28121693]
 [129.74165004]
 [127.29324944]]
[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[ 8.27411028e-04]
 [-1.08089010e-03]
 [-1.95614740e-05]
 ...
 [-2.13699474e-03]
 [-9.68450722e-03]
 [-1.07825337e-02]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]]
[[117.07670564]
 [118.94837849]
 [118.31204854]
 ...
 [127.74642937]
 [128.12590285]
 [127.16251118]]
[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[-0.00144926]
 [ 0.00037512]
 [-0.00060393]
 ...
 [-0.01450836]
 [-0.00946755]
 [ 0.00242071]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128

[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[ 0.00051205]
 [ 0.00088394]
 [ 0.00026383]
 ...
 [ 0.00164501]
 [-0.00019961]
 [ 0.00078869]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]]
[[118.12361222]
 [118.91087313]
 [118.54473958]
 ...
 [127.94320844]
 [128.61139316]
 [128.37437324]]
[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[ 1.07297619e-04]
 [ 5.38464905e-05]
 [ 4.62747158e-04]
 ...
 [ 1.49935456e-03]
 [-2.59726333e-03]
 [ 2.85569318e-03]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]]
[[117.54837153]
 [118.86275301]
 [118.44637775]
 ...
 [127.44771331]
 [128.59266152]
 [128.06694409]]
[[ 0.00709283]
 [-0.00345569]
 [ 0.00050646]
 ...
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]
[[-0.00031102]
 [ 0.0015525 ]
 [ 0.00088043]
 ...
 [-0.00337406]
 [ 0.00424676]
 [-0.00616415]]
[[118.01]
 [118.85]
 [118.44]
 ...
 [128.4 ]
 [128.4 ]
 [128.4 ]

## Estimate Garch paramaters

In [14]:
data = dat2E               # Simulated dta


model = arch_model(1000*data, p=1, q=1)
model_fit = model.fit()
model_fit.summary()

Iteration:      1,   Func. Count:      6,   Neg. LLF: 28287.899958017075
Iteration:      2,   Func. Count:     14,   Neg. LLF: 19324.303373785187
Iteration:      3,   Func. Count:     22,   Neg. LLF: 14469.855281829681
Iteration:      4,   Func. Count:     29,   Neg. LLF: 14215.615007455526
Iteration:      5,   Func. Count:     35,   Neg. LLF: 13723.42552997942
Iteration:      6,   Func. Count:     40,   Neg. LLF: 13723.2340074244
Iteration:      7,   Func. Count:     45,   Neg. LLF: 13723.230689853412
Iteration:      8,   Func. Count:     50,   Neg. LLF: 13723.2414166044
Iteration:      9,   Func. Count:     56,   Neg. LLF: 13723.229022445861
Iteration:     10,   Func. Count:     61,   Neg. LLF: 13723.228991777949
Iteration:     11,   Func. Count:     65,   Neg. LLF: 13723.228991777461
Optimization terminated successfully    (Exit mode 0)
            Current function value: 13723.228991777949
            Iterations: 11
            Function evaluations: 65
            Gradient evaluati

0,1,2,3
Dep. Variable:,y,R-squared:,0.0
Mean Model:,Constant Mean,Adj. R-squared:,0.0
Vol Model:,GARCH,Log-Likelihood:,-13723.2
Distribution:,Normal,AIC:,27454.5
Method:,Maximum Likelihood,BIC:,27481.5
,,No. Observations:,6334.0
Date:,"Thu, Apr 21 2022",Df Residuals:,6333.0
Time:,13:34:21,Df Model:,1.0

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
mu,-0.0349,2.714e-02,-1.286,0.198,"[-8.811e-02,1.830e-02]"

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
omega,2.0732,0.668,3.105,1.904e-03,"[ 0.764, 3.382]"
alpha[1],0.2301,7.597e-02,3.029,2.453e-03,"[8.123e-02, 0.379]"
beta[1],0.3849,8.402e-02,4.581,4.624e-06,"[ 0.220, 0.550]"


## Analysis Garch

### In sample

In [15]:
delta = 1
x = dat2E[:-delta,]
y = dat2E[delta:,]
p = dat2_rtnE[delta:,]
R2 = 0
MSE = 0
MAE = 0
n = 100
omega=2.07
alpha1=0.23
beta1=0.38


init = simulategarchdata(delta, omega, alpha1,0, beta1,0)[1]
init = [i/1000 for i in init]
for i in tqdm(range(n)):
    y_pred = np.zeros((E,1))
    for j in range(E-delta):
        if j<delta:
            y_pred[j,0] = init[j]
        else:
            y_pred[j,0] = simulategarchdata(delta, omega, alpha1,0, beta1,0)[1][-1]/1000
            
    #y_pred = simulatedata(b,m0,gamma_kbar,sigma,kbar,E)
    p_pred = r2p(p,y_pred,delta)
    y_pred = y_pred[delta:,]
    R2 += sklearn.metrics.r2_score(y,y_pred)
    print(y)
    print(y_pred)
    MSE += sklearn.metrics.mean_squared_error(p,p_pred)
    print(p)
    print(p_pred)
    MAE += sklearn.metrics.mean_absolute_error(p,p_pred)
    

print("The R2 value of " , delta , " day(s) forcast in log return is: " , R2/n)
print("The MSE value of " , delta , " day(s) forcast in price is: " , MSE/n)
print("The MAE value of " , delta , " day(s) forcast in price is: " , MAE/n)

NameError: name 'simulategarchdata' is not defined