## Initial Setting

In [28]:
import numpy as np
import pandas as pd
import msm
import sklearn.metrics
from simulatedata import simulatedata
from simulatedata_initial_state import simulatedata_with_initial_prob
from tqdm.notebook import tqdm

from simulategarchdata import simulategarchdata
from arch import arch_model
from random import gauss

## Data Selection

### Pour DEXJPUS.csv

In [170]:
dat2 = pd.read_csv("DEXJPUS.csv")                                   
dat2 = dat2.loc[dat2.DEXJPUS != "."].DEXJPUS.astype(float)
dat2 = np.array(dat2)
dat2_rtn = dat2[0:-1]
dat2 = np.log(dat2[1:])-np.log(dat2[0:-1])
#dat2 = dat2[dat2 != 0]
dat2 = dat2[:,np.newaxis]
dat2_rtn = dat2_rtn[:,np.newaxis]
T = len(dat2)
E = np.rint(0.6*T).astype(int)
dat2E = dat2[0:E,]
dat2F = dat2[E:,]
dat2_rtnE = dat2_rtn[0:E,]
dat2_rtnF = dat2_rtn[E:,]
dat2E.shape, dat2F.shape

((4379, 1), (2919, 1))

### Pour IBM et BA

In [162]:
symbol = "BA"
frequence = 15


pathName = 'C:\\users\\hp\\MSM_dossier_sheng\\'+symbol+'\\'+str(frequence)+'min\\'

dat2 = pd.read_csv(pathName + "merge_res.csv")
dat2 = dat2.sort_values("time",ascending=True)
dat2 = dat2.loc[dat2.close != "."].close.astype(float)
dat2 = np.array(dat2)
dat2_rtn = dat2[0:-1]
dat2 = np.log(dat2[1:])-np.log(dat2[0:-1])
#dat2 = dat2[dat2 != 0]
dat2 = dat2[:,np.newaxis]
dat2_rtn = dat2_rtn[:,np.newaxis]
T = len(dat2)
E = np.rint(0.6*T).astype(int)
dat2E = dat2[0:E,]
dat2F = dat2[E:,]
dat2_rtnE = dat2_rtn[0:E,]
dat2_rtnF = dat2_rtn[E:,]
dat2E.shape, dat2F.shape

((7120, 1), (4747, 1))

In [33]:
def r2p(p,y_pred,delta):
    P = np.zeros(y_pred.shape)
    P = P[delta:]
    for i in range(P.shape[0]):
        P[i,0] = p[i,0]
        for k in range(delta):
            P[i,0]*=np.exp(y_pred[i+k,0])
    return P

## Estimate Garch paramaters

In [171]:
data = dat2E              # Simulated dta


model = arch_model(1000*data, p=1, q=1)
model_fit = model.fit()
model_fit.summary()

Iteration:      1,   Func. Count:      6,   Neg. LLF: 21288.275576832926
Iteration:      2,   Func. Count:     13,   Neg. LLF: 2450065.571035227
Iteration:      3,   Func. Count:     20,   Neg. LLF: 14551.875155735073
Iteration:      4,   Func. Count:     27,   Neg. LLF: 13626.99135225594
Iteration:      5,   Func. Count:     32,   Neg. LLF: 13625.47252207527
Iteration:      6,   Func. Count:     37,   Neg. LLF: 13624.65743170582
Iteration:      7,   Func. Count:     42,   Neg. LLF: 13624.684456121257
Iteration:      8,   Func. Count:     48,   Neg. LLF: 13624.63492627448
Iteration:      9,   Func. Count:     53,   Neg. LLF: 13624.634920658697
Iteration:     10,   Func. Count:     58,   Neg. LLF: 13624.634910484718
Iteration:     11,   Func. Count:     62,   Neg. LLF: 13624.634903205242
Optimization terminated successfully    (Exit mode 0)
            Current function value: 13624.634910484718
            Iterations: 15
            Function evaluations: 62
            Gradient evaluati

0,1,2,3
Dep. Variable:,y,R-squared:,0.0
Mean Model:,Constant Mean,Adj. R-squared:,0.0
Vol Model:,GARCH,Log-Likelihood:,-13624.6
Distribution:,Normal,AIC:,27257.3
Method:,Maximum Likelihood,BIC:,27282.8
,,No. Observations:,4379.0
Date:,"Sat, Apr 23 2022",Df Residuals:,4378.0
Time:,13:07:22,Df Model:,1.0

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
mu,-0.0259,6.143e-02,-0.421,0.674,"[ -0.146,9.452e-02]"

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
omega,0.5873,0.471,1.246,0.213,"[ -0.336, 1.511]"
alpha[1],0.1405,6.056e-02,2.320,2.033e-02,"[2.181e-02, 0.259]"
beta[1],0.8595,5.885e-02,14.605,2.622e-48,"[ 0.744, 0.975]"


## Analysis Garch

### In sample

In [111]:
def estimators(delta):

    x = dat2E[:-delta,]
    y = dat2E[delta:,]
    p = dat2_rtnE[delta:,]
    R2 = 0
    MSE = 0
    MAE = 0
    n = 100
    omega=5*10**-5
    alpha1=0.0295
    beta1=0.9702


    init = simulategarchdata(delta, omega, alpha1,0, beta1,0)[1]
    init = [i/1000 for i in init]
    for i in tqdm(range(n)):
        y_pred = np.zeros((E,1))
        for j in range(E-delta):
            if j<delta:
                y_pred[j,0] = init[j]
            else:
                y_pred[j,0] = simulategarchdata(delta, omega, alpha1,0, beta1,0)[1][-1]/1000

        #y_pred = simulatedata(b,m0,gamma_kbar,sigma,kbar,E)
        p_pred = r2p(p,y_pred,delta)
        y_pred = y_pred[delta:,]
        R2 += sklearn.metrics.r2_score(y,y_pred)
        MSE += sklearn.metrics.mean_squared_error(p,p_pred)
        MAE += sklearn.metrics.mean_absolute_error(p,p_pred)


    print("The R2 value of " , delta , " day(s) forcast in log return is: " , R2/n)
    print("The MSE value of " , delta , " day(s) forcast in price is: " , MSE/n)
    print("The MAE value of " , delta , " day(s) forcast in price is: " , MAE/n)

In [116]:
estimators(1)
estimators(5)
estimators(10)
estimators(20)
estimators(50)

  0%|          | 0/100 [00:00<?, ?it/s]

The R2 value of  50  day(s) forcast in log return is:  -0.16683941273332514
The MSE value of  50  day(s) forcast in price is:  2.469958404277018
The MAE value of  50  day(s) forcast in price is:  1.2519505728942633


### Out of sample

In [172]:
def estimators_out(delta):

    x = dat2F[:-delta,]
    y = dat2F[delta:,]
    p = dat2_rtnF[delta:,]
    R2 = 0
    MSE = 0
    MAE = 0
    n = 100
    omega=0.5873
    alpha1=0.1405
    beta1=0.8595


    init = simulategarchdata(delta, omega, alpha1,0, beta1,0)[1]
    init = [i/1000 for i in init]
    for i in tqdm(range(n)):
        y_pred = np.zeros((T-E,1))
        for j in range(T-E-delta):
            if j<delta:
                y_pred[j,0] = init[j]
            else:
                y_pred[j,0] = simulategarchdata(delta, omega, alpha1,0, beta1,0)[1][-1]/1000

        #y_pred = simulatedata(b,m0,gamma_kbar,sigma,kbar,E)
        p_pred = r2p(p,y_pred,delta)
        y_pred = y_pred[delta:,]
        R2 += sklearn.metrics.r2_score(y,y_pred)
        MSE += sklearn.metrics.mean_squared_error(p,p_pred)
        MAE += sklearn.metrics.mean_absolute_error(p,p_pred)


    print("The R2 value of " , delta , " day(s) forcast in log return is: " , R2/n)
    print("The MSE value of " , delta , " day(s) forcast in price is: " , MSE/n)
    print("The MAE value of " , delta , " day(s) forcast in price is: " , MAE/n)

In [177]:
estimators_out(1)
estimators_out(5)
estimators_out(10)
estimators_out(20)
estimators_out(50)

  0%|          | 0/100 [00:00<?, ?it/s]

The R2 value of  20  day(s) forcast in log return is:  -0.2471745113134063
The MSE value of  20  day(s) forcast in price is:  3.436157552720856
The MAE value of  20  day(s) forcast in price is:  1.4642224738295309
