# Hidden Markov Model

In [1]:
import pandas as pd
from datetime import datetime
from matplotlib import pyplot as plt
%matplotlib inline
from  statsmodels.tsa.stattools import adfuller,kpss, acf
import statsmodels.api
import numpy as np
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error,mean_absolute_error, mean_squared_log_error,balanced_accuracy_score
from statistics import mean
import statsmodels.api as sm
from sklearn import preprocessing
import seaborn as sns 
import gapminder
import sys
import warnings
import time
from pomegranate import *
from sklearn.metrics import accuracy_score, f1_score
import json
from math import e

#if not sys.warnoptions:
#    warnings.simplefilter("ignore")

In [2]:
def return_size(df):
    df['return']=df.loc[:,'close']-df.loc[:,'close'].shift(1)
    df=df.dropna()
    return df

def plot_return(df):
    plt.figure(figsize=(15,2))
    if df.name=='Hour':
        plt.plot(df[['date']][8:15],df[['return']][8:15] )
    else:
        plt.plot(df[['date']][:30],df[['return']][:30] )
    plt.title(str(df.name)+' Return')
    plt.xlabel('time')
    plt.ylabel('return')
    plt.subplot()
    
def plot_close(df):
    plt.figure(figsize=(15,2))
    plt.plot(df[['date']],df[['close']])
    plt.title(str(df.name)+' Price Close')
    plt.xlabel('time')
    plt.ylabel('price')
    plt.subplot()

def rename_col(df):
    df.columns=['date','time','open','high','low','close','vol']
    return df

def format_time(df):
    df['date']=df['date']+' '+df['time']
    df=df.drop(columns=['time'])
    df['date']=[datetime.strptime(i, '%d/%m/%y %H:%M:%S') for i in list(df['date'])]
    return df
def split(df):
    ind_train = int(df.shape[0] * 0.7)
    margin = int(df.shape[0] * 0.01) # as we implement embargoing
    train = df.iloc[:ind_train]
    test = df.iloc[ind_train+margin:day.shape[0]]
    return train,  test

def trend(data, step, mode):
    df = data.copy()
    df['sign'] = np.nan
    for row in range(df.shape[0]):
        if df.iloc[row, 6] > 0:
            df.iloc[row, -1] = 1
        else:
            df.iloc[row, -1] = 0
    df_incr = []
    df_decr = []
    d = 0
    k = 0
    df['sum'] = df['sign'].rolling(window = step).sum()
    while d < df.shape[0] - 1:
        if df.iloc[d,-1] == step and df.iloc[d + 1,-1] != step:
            df_incr.append(df.iloc[k+1:d+1,:])
            k = d
        elif df.iloc[d,-1] == 0 and df.iloc[d + 1,-1] != 0:
            df_decr.append(df.iloc[k+1:d+1, :])
            k = d
        d += 1
        
    df_pos_fin = df_incr[0].iloc[:,:]
    for entry in range(1,len(df_incr)):
        df_pos_fin = df_pos_fin.append(df_incr[entry])
    
    df_neg_fin = df_decr[0].iloc[:,:]
    for entry_ in range(1,len(df_decr)):
        df_neg_fin = df_neg_fin.append(df_decr[entry_])

    if mode == 'pos':
        return df_pos_fin
    elif mode == 'neg':
        return df_neg_fin
    
 def init_parameters(states):
    pi =  np.random.rand(states)
    pi = pi/pi.sum()
    end =  np.random.rand(states)
    end = end/end.sum()
    trans = np.random.rand(states, states)
    for row in range(trans.shape[0]):
        trans[row] = trans[row] / trans[row].sum()
    return pi, end, trans

def make_cv(df, step, col_n, mode):
    cv = []
    a = step - 1 
    b = 0
    if mode == 'neg':
        while a < df.shape[0]:
            if df.iloc[a,-1] == 0:
                arr = np.array(df.iloc[b:a - (step - 1), col_n])
                if arr.size > 1:
                    cv.append(arr)
                b = a + 1
            a += 1
    if mode == 'pos':
        while a < df.shape[0]:
            if df.iloc[a,-1] == step:
                arr = np.array(df.iloc[b:a - (step - 1), col_n])
                if arr.size > 1:
                    cv.append(arr) #close price
                b = a + 1
            a += 1
    return cv

def test_hmm(df_test, model_neg, model_pos, step, col_n, mode):
    try_test = make_cv(df_test, step, col_n, mode)
    lab_true = []
    lab_pred = []
    if mode == 'pos':
        for item in range(len(try_test)):
            lab_true.append('P')
    elif mode == 'neg':
        for item in range(len(try_test)):
            lab_true.append('N')
    for i in try_test:
        nprob = model_neg.forward(i)[-1]
        total_neg_prob=0
        for pr_neg in nprob:
            if pr_neg!=-np.Inf:
                total_neg_prob+=pr_neg
        pprob = model_pos.forward(i)[-1]
        total_pos_prob=0
        for pr_pos in pprob:
            if pr_pos!=-np.Inf:
                total_pos_prob+=pr_pos
        if total_neg_prob > total_pos_prob:
            lab_pred.append('N')
        else:
            lab_pred.append('P')
    
    return f1_score(lab_true, lab_pred, average='micro')

def final_check_day(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(day_neg_testsets[ind], day_neg_mod[ind], day_pos_mod[ind], day_neg_steps[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(day_pos_testsets[ind], day_neg_mod[ind],day_pos_mod[ind], day_pos_steps[ind],col_n, 'pos' )
    
def train_hmm(data, mode, col, col_number):
    n_states = []
    n_steps = []
    n_pi = []
    n_end = []
    n_trans = []
    n_dists = []
    n_prob = []
    n_mod = []
    n_cv = []
    n_testsets = []
    for step in range(3, 6):
        df = trend(data, step, mode) 
        df_train, df_test = split(df)
        for states in range(3,9):
            for try_ in range(30):
                df_pi, df_end, df_trans = init_parameters(states)
                df_data = list()
                df_data.append(np.array(df_train[col]))
                dists = list()
                mean = df_train[col].mean()#close
                std = df_train[col].std()#close
                for i in range(states):
                    dists.append(NormalDistribution(numpy.random.randint(50,100)/100 * mean, numpy.random.randint(50,100)/100 * std))
                mod = HiddenMarkovModel.from_matrix(df_trans, dists, df_pi, df_end)
                mod.fit(df_data, algorithm='baum-welch');
                n_states.append(states)
                n_steps.append(step)
                n_pi.append(df_pi)
                n_end.append(df_end)
                n_trans.append(df_trans)
                n_dists.append(dists)
                n_mod.append(mod)
                n_testsets.append(df_test)

    return n_states, n_steps, n_pi, n_end, n_trans, n_dists,  n_mod,  n_testsets

def best_model(mod_length, col_n, func):
    score = []
    for idx in range(mod_length):
        p1 = func('neg', col_n, idx)
        p2 = func('pos', col_n, idx)
        score.append(p1+p2)
    all_max = list(filter(lambda x: x> 1, score))
    max_ = max(all_max)
    max_idx = score.index(max_)
    return score[max_idx], max_idx

def emission_matr(df,cap):
    emiss = []
    for stat in df.states:
        try: 
            mean_, std_ = stat.distribution.parameters
            emiss.append('N({}, {})'.format(round(mean_, 6), round(std_,6)))
        except AttributeError:
            pass
    return print(pd.DataFrame(emiss).to_latex(caption='Emission matrix of '+cap))

def trans_matr(data, caption):
    df=pd.DataFrame(data.dense_transition_matrix())
    df = df.iloc[:-1,:-2]
    pi = df.iloc[-1,:]
    df = df.iloc[:-1,:]
    for r in range(df.shape[0]):
        df.iloc[r,:] = df.iloc[r,:]/df.iloc[r,:].sum()
    return print(df.round(6).to_latex(caption='Transition matrix of ' + caption)), print(pi.round(6).to_latex(caption=
                                                                                                           'Initial state probabilities vector of ' + caption))

#### Open and format data

In [3]:
minute=pd.read_csv('C:\\Users\\Dell\\Desktop\\coursework\\Stonks-2\\AMZN_min.csv')
minute=minute.iloc[:,2:]
minute=rename_col(minute)
minute=format_time(minute)


In [4]:
#generate hourly data
hourly = minute.copy()
hourly['year'] = [i.year for i in hourly['date']]
hourly['month'] = [i.month for i in hourly['date']]
hourly['day'] = [i.day for i in hourly['date']]
hourly['hour'] = [i.hour for i in hourly['date']]
hour_low = hourly.groupby(['year', 'month', 'day', 'hour']).min()['low']
hour_high = hourly.groupby(['year', 'month', 'day', 'hour']).max()['high']
hour_open = hourly.groupby(['year', 'month', 'day', 'hour']).first()['open']
hour_date = hourly.groupby(['year', 'month', 'day', 'hour']).first()['date']
hour_close=hourly.groupby(['year', 'month', 'day', 'hour']).last()['close']
hour_vol=hourly.groupby(['year', 'month', 'day', 'hour']).sum()['vol']
AMZNhour=pd.DataFrame([hour_open,hour_low, hour_high, hour_close, hour_vol]).transpose()
AMZNhour=pd.concat([hour_date, AMZNhour], axis=1)
#AMZNhour.to_excel('AMZNhour.xlsx')

In [51]:
#generate daily data
daily = minute.copy()
daily['year'] = [i.year for i in daily['date']]
daily['month'] = [i.month for i in daily['date']]
daily['day'] = [i.day for i in daily['date']]
daily_low = daily.groupby(['year', 'month', 'day']).min()['low']
daily_high = daily.groupby(['year', 'month', 'day']).max()['high']
daily_open = daily.groupby(['year', 'month', 'day']).first()['open']
daily_date = daily.groupby(['year', 'month', 'day']).first()['date']
daily_close=daily.groupby(['year', 'month', 'day']).last()['close']
daily_vol=daily.groupby(['year', 'month', 'day']).sum()['vol']
AMZNdaily=pd.DataFrame([daily_open,daily_low, daily_high, daily_close, daily_vol]).transpose()
AMZNdaily=pd.concat([daily_date, AMZNdaily], axis=1)
for date in range(AMZNdaily.shape[0]):
    AMZNdaily.iloc[date,0] = AMZNdaily.iloc[date,0].strftime("%d.%m.%Y %H:%M:%S")
    AMZNdaily.iloc[date,0] = str(AMZNdaily.iloc[date,0])[:10]
AMZNdaily.head(3)
#AMZNdaily.to_excel('AMZNdaily.xlsx')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,date,open,low,high,close,vol
year,month,day,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019,2,19,19.02.2019,1606.89,1605.36,1634.0,1627.23,1815520.0
2019,2,20,20.02.2019,1630.0,1610.12,1634.93,1622.12,1678423.0
2019,2,21,21.02.2019,1619.85,1600.91,1623.56,1619.49,1881385.0


In [6]:
day=pd.read_excel('C:\\Users\\Dell\\Desktop\\coursework\\Stonks-2\\AMZNdaily.xlsx')
hour=pd.read_excel('C:\\Users\\Dell\\Desktop\\coursework\\Stonks-2\\AMZNhour.xlsx')
day['overnight'] = day.loc[:,'open'] - day.loc[:,'close'].shift(1)
day_ret = day.copy()
hour_ret = hour.copy()
minute_ret = minute.copy()
day['return']=day.loc[:,'close']-day.loc[:,'close'].shift(1)
day.loc[:,'open'] = day.loc[:,'close'].shift(1)
hour['return']=hour.loc[:,'close']-hour.loc[:,'close'].shift(1)
minute['return']=minute.loc[:,'close']-minute.loc[:,'close'].shift(1)
day.loc[:,'open'] = day.loc[:,'close'].shift(1)
day=day.iloc[1:,:]
hour=hour.iloc[1:,:]
minute=minute.iloc[1:,:]

In [5]:
day_ret['open_ret'] = day.loc[:,'open']-day.loc[:,'open'].shift(1)
day_ret['low_ret'] = day.loc[:,'low']-day.loc[:,'low'].shift(1)
day_ret['high_ret'] = day.loc[:,'high']-day.loc[:,'high'].shift(1)
day_ret['return'] = day.loc[:,'close']-day.loc[:,'close'].shift(1)
day_ret = day_ret.iloc[2:,:]

In [6]:
hour_ret['open_ret'] = hour_ret.loc[:,'open']-hour_ret.loc[:,'open'].shift(1)
hour_ret['low_ret'] = hour_ret.loc[:,'low']-hour_ret.loc[:,'low'].shift(1)
hour_ret['high_ret'] = hour_ret.loc[:,'high']-hour_ret.loc[:,'high'].shift(1)
hour_ret['return'] = hour_ret.loc[:,'close']-hour_ret.loc[:,'close'].shift(1)
hour_ret = hour_ret.iloc[1:,:]

In [7]:
minute_ret['open_ret'] = minute_ret.loc[:,'open']-minute_ret.loc[:,'open'].shift(1)
minute_ret['low_ret'] = minute_ret.loc[:,'low']-minute_ret.loc[:,'low'].shift(1)
minute_ret['high_ret'] = minute_ret.loc[:,'high']-minute_ret.loc[:,'high'].shift(1)
minute_ret['return'] = minute_ret.loc[:,'close']-minute_ret.loc[:,'close'].shift(1)
minute_ret = minute_ret.iloc[1:,:]

In [5]:
#overnight
time_m=[str(j)[-8:] for j in minute['date'].to_list()]
indices_end_m = [v for v, x in enumerate(time_m) if x == "16:00:00"]
for ind in range(0, len(indices_end_m) - 1):
    minute.iloc[indices_end_m[ind] + 1,1] = minute.iloc[indices_end_m[ind],4]
    
time_h=[str(p)[-8:] for p in hour['date'].to_list()]
indices_end_h = [y for y, u in enumerate(time_h) if u == "16:00:00"]
for ind_ in range(0, len(indices_end_h) - 1):
    hour.iloc[indices_end_h[ind_] + 1,1] = hour.iloc[indices_end_h[ind_],4]

#### EDA

In [52]:
ind = -1
print('sd deviations are: daily {},hourly {}, minute {}'.format(day_ret.iloc[-29:,ind].std(),hour_ret.iloc[-231:, ind].std(), minute_ret.iloc[-8995:,ind].std()))
print('means are: daily {},hourly {}, minute {}'.format(day_ret.iloc[-29:,ind].mean(),hour_ret.iloc[-231:, ind].mean(), minute_ret.iloc[-8995:,ind].mean()))

sd deviations are: daily 61.03828055897429,hourly 20.58191656571765, minute 3.012366934199707
means are: daily 13.513103448275858,hourly 1.696450216450216, minute 0.04356642579210671


In [None]:
plt.figure(figsize=(15,3))
plt.plot(day.iloc[-29:,0], day.iloc[-29:,-1])
plt.tight_layout()
plt.xlabel('time')
plt.ylabel('return USD');
plt.figure(figsize=(15,3))
plt.plot(hour.iloc[-174:,0], hour.iloc[-174:,-1])
plt.tight_layout()
plt.xlabel('time')
plt.ylabel('return USD');
plt.figure(figsize=(15,3))
plt.plot( minute.iloc[-8144:,0],  minute.iloc[-8144:,-2])
plt.tight_layout()
plt.xlabel('time')
plt.ylabel('return USD');

In [None]:
#correlation between lags
for j in range(1,4):
    print('DAY correlation for {} lags ={}'.format(j,day['return'].corr(day['return'].shift(j))))
print('------------------------------------------------')
for j in range(1,4):
    print('HOUR correlation for {} lags ={}'.format(j,hour['return'].corr(hour['return'].shift(j))))
print('------------------------------------------------')
for j in range(1,4):
    print('MINUTE correlation for {} lags ={}'.format(j,minute['return'].corr(minute['return'].shift(j))))

In [89]:
min_r=statsmodels.tsa.stattools.acf(np.array(minute['return'][1:]), nlags=5, unbiased=True)
day_r=statsmodels.tsa.stattools.acf(np.array(day['return'][1:]), nlags=5, unbiased=True)
hour_r=statsmodels.tsa.stattools.acf(np.array(hour['return'][1:]), nlags=5, unbiased=True)
_corr=pd.DataFrame([day_r,hour_r, min_r]).transpose()
_corr.columns=['day return', 'hour return','minute return']
_corr



Unnamed: 0,day return,hour return,minute return
0,1.0,1.0,1.0
1,-0.071925,-0.013108,-0.019161
2,0.012636,-0.066159,-0.008529
3,0.021012,0.009329,-0.015591
4,-0.009186,0.034167,-0.007527
5,0.068894,-0.004627,-0.013526


#### Test on stationarity 

In [76]:
str_='close'
#Lags chosen through AIC minimization
#All tests show stationarity
adf=adfuller(day[str_],regression='c',autolag='AIC')
print('Daily - ADF - p-value = {}, number of lags = {}'.format(adf[1],adf[2]))
kps=kpss(day[str_],regression='c')
print('Daily -KPSS - p-value = {}, number of lags = {}'.format(kps[1],kps[2]))
print('----------------------------------------------------')
adf=adfuller(hour[str_],regression='c',autolag='AIC')
print('Hourly - ADF - p-value = {}, number of lags = {}'.format(adf[1],adf[2]))
kps=kpss(hour['return'],regression='c')
print('Hourly -KPSS - p-value = {}, number of lags = {}'.format(kps[1],kps[2]))
print('----------------------------------------------------')
adf=adfuller(minute[str_][:5000],maxlag=40,regression='c',autolag='AIC')#since high computational cost
print('Minute - ADF - p-value = {}, number of lags = {}'.format(adf[1],adf[2]))
kps=kpss(minute[str_][:5000],regression='c')
print('Minute -KPSS - p-value = {}, number of lags = {}'.format(kps[1],kps[2]))

Daily - ADF - p-value = 0.9948125745488968, number of lags = 24
Daily -KPSS - p-value = 0.01, number of lags = 25
----------------------------------------------------
Hourly - ADF - p-value = 0.833106965331807, number of lags = 26
Hourly -KPSS - p-value = 0.1, number of lags = 27
----------------------------------------------------




Minute - ADF - p-value = 0.5547199418804214, number of lags = 7
Minute -KPSS - p-value = 0.01, number of lags = 32




### Data preparation

#### Split

#### HMM -- day -- univariate -- close - close -- long sequence

In [12]:
day_pos_states, day_pos_steps, day_pos_pi, day_pos_end, day_pos_trans, day_pos_dist, day_pos_mod, day_pos_testsets = train_hmm(day, 'pos', 'return', 6)
day_neg_states, day_neg_steps, day_neg_pi, day_neg_end, day_neg_trans, day_neg_dist, day_neg_mod, day_neg_testsets = train_hmm(day, 'neg',  'return', 6)

KeyboardInterrupt: 

In [181]:
day_neg_return_mod,day_neg_return_ind = best_model(len(day_neg_mod), 6, final_check_day)
print('Total accuracy: {}'.format(day_neg_return_mod))
print('Index: {}'.format(day_neg_return_ind))

Total accuracy: 1.5
Index: 174


In [183]:
print('negative: {}'.format(final_check_day('neg', 6, 174)))
print('positive: {}'.format(final_check_day('pos', 6, 174)))

negative: 0.8333333333333334
positive: 0.6666666666666666


In [190]:
with open('day_neg_mod_ret_long.json', 'w') as js:
    json.dump(day_neg_mod[174].to_json(), js)
with open('day_pos_mod_ret_long.json', 'w') as js:
    json.dump(day_pos_mod[174].to_json(), js)

#### Short return

In [164]:
day_pos_states_sh, day_pos_steps_sh, day_pos_pi_sh, day_pos_end_sh, day_pos_trans_sh, day_pos_dist_sh, day_pos_mod_sh, day_pos_testsets_sh = train_hmm(day[:800], 'pos', 'return', 6)
day_neg_states_sh, day_neg_steps_sh, day_neg_pi_sh, day_neg_end_sh, day_neg_trans_sh, day_neg_dist_sh, day_neg_mod_sh, day_neg_testsets_sh = train_hmm(day[:800], 'neg',  'return', 6)

In [168]:
def final_check_day_sh(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(day_neg_testsets_sh[ind], day_neg_mod_sh[ind], day_pos_mod_sh[ind], day_neg_steps_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(day_pos_testsets_sh[ind], day_neg_mod_sh[ind],day_pos_mod_sh[ind], day_pos_steps_sh[ind],col_n, 'pos' )

In [178]:
day_neg_return_mod_sh,day_neg_return_ind_sh = best_model(len(day_neg_mod_sh), 6, final_check_day_sh)
print('Total accuracy: {}'.format(day_neg_return_mod_sh))
print('Index: {}'.format(day_neg_return_ind_sh))

Total accuracy: 2.0
Index: 413


In [176]:
print('negative: {}'.format(final_check_day_sh('neg', 6, 413)))
print('positive: {}'.format(final_check_day_sh('pos', 6, 413)))

negative: 1.0
positive: 1.0


In [189]:
with open('day_neg_mod_ret_short.json', 'w') as js:
    json.dump(day_neg_mod_sh[413].to_json(), js)
with open('day_pos_mod_ret_short.json', 'w') as js:
    json.dump(day_pos_mod_sh[413].to_json(), js)

#### Day close short

In [185]:
day_pos_states_c, day_pos_steps_c, day_pos_pi_c, day_pos_end_c, day_pos_trans_c, day_pos_dist_c, day_pos_mod_c, day_pos_testsets_c = train_hmm(day[:800], 'pos', 'close', 4)
day_neg_states_c, day_neg_steps_c, day_neg_pi_c, day_neg_end_c, day_neg_trans_c, day_neg_dist_c, day_neg_mod_c, day_neg_testsets_c = train_hmm(day[:800], 'neg',  'close', 4)

In [186]:
def final_check_day_c(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(day_neg_testsets_c[ind], day_neg_mod_c[ind], day_pos_mod_c[ind], day_neg_steps_c[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(day_pos_testsets_c[ind], day_neg_mod_c[ind],day_pos_mod_c[ind], day_pos_steps_c[ind],col_n, 'pos' )

In [187]:
day_neg_mod_c,day_neg_ind_c = best_model(len(day_neg_mod_c), 4, final_check_day_c)
print('Total accuracy: {}'.format(day_neg_mod_c))
print('Index: {}'.format(day_neg_ind_c))

Total accuracy: 2.0
Index: 437


In [188]:
print('negative: {}'.format(final_check_day_c('neg', 4, 437)))
print('positive: {}'.format(final_check_day_c('pos', 4, 437)))

negative: 1.0
positive: 1.0


In [191]:
with open('day_neg_mod_cl_short.json', 'w') as js:
    json.dump(day_neg_mod_c[437].to_json(), js)
with open('day_pos_mod_cl_short.json', 'w') as js:
    json.dump(day_pos_mod_c[437].to_json(), js)

#### Close long

In [118]:
day_pos_states_c2, day_pos_steps_c2, day_pos_pi_c2, day_pos_end_c2, day_pos_trans_c2, day_pos_dist_c2, day_pos_mod_c2, day_pos_testsets_c2 = train_hmm(day, 'pos', 'close', 4)
day_neg_states_c2, day_neg_steps_c2, day_neg_pi_c2, day_neg_end_c2, day_neg_trans_c2, day_neg_dist_c2, day_neg_mod_c2, day_neg_testsets_c2 = train_hmm(day, 'neg',  'close', 4)

In [119]:
def final_check_day_c_long(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(day_neg_testsets_c2[ind], day_neg_mod_c2[ind], day_pos_mod_c2[ind], day_neg_steps_c2[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(day_pos_testsets_c2[ind], day_neg_mod_c2[ind],day_pos_mod_c2[ind], day_pos_steps_c2[ind],col_n, 'pos' )

In [120]:
day_neg_return_mod_c2,day_neg_return_ind_c2 = best_model(len(day_neg_mod_c2), 4, final_check_day_c_long)
print('Total accuracy: {}'.format(day_neg_return_mod_c2))
print('Index: {}'.format(day_neg_return_ind_c2))

Total accuracy: 1.5714285714285714
Index: 516


In [121]:
print('negative: {}'.format(final_check_day_c_long('neg', 4, 516)))
print('positive: {}'.format(final_check_day_c_long('pos', 4, 516)))

negative: 1.0
positive: 0.5714285714285714


In [123]:
with open('day_neg_mod_close_long.json', 'w') as js:
    json.dump(day_neg_mod_c2[516].to_json(), js)
with open('day_pos_mod_close_long.json', 'w') as js:
    json.dump(day_pos_mod_c2[516].to_json(), js)

In [125]:
day_pos_states_c2[516]

8

#### Hourly long return

In [18]:
hour_pos_states, hour_pos_steps, hour_pos_pi, hour_pos_end, hour_pos_trans, hour_pos_dist, hour_pos_mod, hour_pos_testsets = train_hmm(hour.iloc[-1858:,:], 'pos', 'return', 6)
hour_neg_states, hour_neg_steps, hour_neg_pi, hour_neg_end, hour_neg_trans, hour_neg_dist, hour_neg_mod, hour_neg_testsets = train_hmm(hour.iloc[-1858:,:], 'neg',  'return', 6)

In [19]:
def final_check_hour_ret_long(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(hour_neg_testsets[ind], hour_neg_mod[ind], hour_pos_mod[ind], hour_neg_steps[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(hour_pos_testsets[ind], hour_neg_mod[ind],hour_pos_mod[ind], hour_pos_steps[ind],col_n, 'pos' )

In [22]:
hour_neg_return_mod,hour_neg_return_ind = best_model(len(hour_neg_mod), 6, final_check_hour_ret_long)
print('Total accuracy: {}'.format(hour_neg_return_mod))
print('Index: {}'.format(hour_neg_return_ind))

Total accuracy: 1.5
Index: 474


In [23]:
print('negative: {}'.format(final_check_hour_ret_long('neg', 6, 474)))
print('positive: {}'.format(final_check_hour_ret_long('pos', 6, 474)))

negative: 0.8000000000000002
positive: 0.7


In [24]:
with open('hour_neg_mod_ret_long.json', 'w') as js:
    json.dump(hour_neg_mod[474].to_json(), js)
with open('hour_pos_mod_ret_long.json', 'w') as js:
    json.dump(hour_pos_mod[474].to_json(), js)

#### Hourly long close

In [32]:
hour_pos_states_cl_l, hour_pos_steps_cl_l, hour_pos_pi_cl_l, hour_pos_end_cl_l, hour_pos_trans_cl_l, hour_pos_dist_cl_l, hour_pos_mod_cl_l, hour_pos_testsets_cl_l = train_hmm(hour.iloc[-1858,:], 'pos', 'close', 4)
hour_neg_states_cl_l, hour_neg_steps_cl_l, hour_neg_pi_cl_l, hour_neg_end_cl_l, hour_neg_trans_cl_l, hour_neg_dist_cl_l, hour_neg_mod_cl_l, hour_neg_testsets_cl_l = train_hmm(hour.iloc[-1858:,:], 'neg',  'close', 4)

In [33]:
def final_check_hour_cl_long(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(hour_neg_testsets_cl_l[ind], hour_neg_mod_cl_l[ind], hour_pos_mod_cl_l[ind], hour_neg_steps_cl_l[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(hour_pos_testsets_cl_l[ind], hour_neg_mod_cl_l[ind],hour_pos_mod_cl_l[ind], hour_pos_steps_cl_l[ind],col_n, 'pos' )

In [34]:
hour_neg_close_mod,hour_neg_close_ind = best_model(len(hour_neg_mod_cl_l), 4, final_check_hour_cl_long)
print('Total accuracy: {}'.format(hour_neg_close_mod))
print('Index: {}'.format(hour_neg_close_ind))

Total accuracy: 1.6
Index: 467


In [35]:
print('negative: {}'.format(final_check_hour_cl_long('neg', 4, 467)))
print('positive: {}'.format(final_check_hour_cl_long('pos', 4, 467)))

negative: 1.0
positive: 0.6


In [36]:
with open('hour_neg_mod_close_long.json', 'w') as js:
    json.dump(hour_neg_mod_cl_l[467].to_json(), js)
with open('hour_pos_mod_close_long.json', 'w') as js:
    json.dump(hour_pos_mod_cl_l[467].to_json(), js)

#### Hour close short

In [39]:
hour_pos_states_cl_sh, hour_pos_steps_cl_sh, hour_pos_pi_cl_sh, hour_pos_end_cl_sh, hour_pos_trans_cl_sh, hour_pos_dist_cl_sh, hour_pos_mod_cl_sh, hour_pos_testsets_cl_sh = train_hmm(hour.iloc[-800:,:], 'pos', 'close', 4)
hour_neg_states_cl_sh, hour_neg_steps_cl_sh, hour_neg_pi_cl_sh, hour_neg_end_cl_sh, hour_neg_trans_cl_sh, hour_neg_dist_cl_sh, hour_neg_mod_cl_sh, hour_neg_testsets_cl_sh = train_hmm(hour.iloc[-800:,:], 'neg',  'close', 4)

In [42]:
def final_check_hour_cl_short(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(hour_neg_testsets_cl_sh[ind], hour_neg_mod_cl_sh[ind], hour_pos_mod_cl_sh[ind], hour_neg_steps_cl_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(hour_pos_testsets_cl_sh[ind], hour_neg_mod_cl_sh[ind],hour_pos_mod_cl_sh[ind], hour_pos_steps_cl_sh[ind],col_n, 'pos' )

In [43]:
hour_neg_close_mod_sh,hour_neg_close_ind_sh = best_model(len( hour_neg_mod_cl_sh), 4, final_check_hour_cl_short)
print('Total accuracy: {}'.format(hour_neg_close_mod_sh))
print('Index: {}'.format(hour_neg_close_ind_sh))

Total accuracy: 1.4666666666666668
Index: 361


In [44]:
print('negative: {}'.format(final_check_hour_cl_short('neg', 4, 361)))
print('positive: {}'.format(final_check_hour_cl_short('pos', 4, 361)))

negative: 0.6666666666666666
positive: 0.8000000000000002


In [45]:
with open('hour_neg_mod_close_short.json', 'w') as js:
    json.dump(hour_neg_mod_cl_sh[361].to_json(), js)
with open('hour_pos_mod_close_short.json', 'w') as js:
    json.dump(hour_pos_mod_cl_sh[361].to_json(), js)

##### Hour return short

In [105]:
hour_pos_states_ret_sh, hour_pos_steps_ret_sh, hour_pos_pi_ret_sh, hour_pos_end_ret_sh, hour_pos_trans_ret_sh, hour_pos_dist_ret_sh, hour_pos_mod_ret_sh, hour_pos_testsets_ret_sh = train_hmm(hour.iloc[-800:,:], 'pos', 'return', 6)
hour_neg_states_ret_sh, hour_neg_steps_ret_sh, hour_neg_pi_ret_sh, hour_neg_end_ret_sh, hour_neg_trans_ret_sh, hour_neg_dist_ret_sh, hour_neg_mod_ret_sh, hour_neg_testsets_ret_sh = train_hmm(hour.iloc[-800:,:], 'neg',  'return', 6)

In [106]:
def final_check_hour_ret_short(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(hour_neg_testsets_ret_sh[ind], hour_neg_mod_ret_sh[ind], hour_pos_mod_ret_sh[ind], hour_neg_steps_ret_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(hour_pos_testsets_ret_sh[ind], hour_neg_mod_ret_sh[ind],hour_pos_mod_ret_sh[ind], hour_pos_steps_ret_sh[ind],col_n, 'pos' )

In [107]:
hour_neg_ret_mod_sh, hour_neg_ret_ind_sh = best_model(len( hour_neg_mod_ret_sh), 6, final_check_hour_ret_short)
print('Total accuracy: {}'.format(hour_neg_ret_mod_sh))
print('Index: {}'.format(hour_neg_ret_ind_sh))

Total accuracy: 1.8000000000000003
Index: 270


In [110]:
print('negative: {}'.format(final_check_hour_ret_short('neg', 6, 270)))
print('positive: {}'.format(final_check_hour_ret_short('pos', 6, 270)))
print('steps: {}'.format(hour_pos_steps_ret_sh[270]))
print('states: {}'.format(hour_pos_states_ret_sh[270]))

negative: 0.8000000000000002
positive: 1.0
steps: 4
states: 6


In [111]:
with open('hour_neg_mod_return_short.json', 'w') as js:
    json.dump(hour_neg_mod_ret_sh[270].to_json(), js)
with open('hour_pos_mod_return_short.json', 'w') as js:
    json.dump(hour_pos_mod_ret_sh[270].to_json(), js)

#### Minute return long

In [96]:
min_pos_states_ret, min_pos_steps_ret, min_pos_pi_ret, min_pos_end_ret, min_pos_trans_ret, min_pos_dist_ret, min_pos_mod_ret, min_pos_testsets_ret = train_hmm(minute.iloc[-1858:,:], 'pos', 'return', 6)
min_neg_states_ret, min_neg_steps_ret, min_neg_pi_ret, min_neg_end_ret, min_neg_trans_ret, min_neg_dist_ret, min_neg_mod_ret, min_neg_testsets_ret = train_hmm(minute.iloc[-1858:,:], 'neg',  'return', 6)

In [121]:
def final_check_min_ret_long(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(min_neg_testsets_ret[ind], min_neg_mod_ret[ind], min_pos_mod_ret[ind], min_neg_steps_ret[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(min_pos_testsets_ret[ind], min_neg_mod_ret[ind], min_pos_mod_ret[ind], min_pos_steps_ret[ind],col_n, 'pos' )

In [122]:
min_neg_ret_mod_l, min_neg_ret_ind_l = best_model(len( min_neg_mod_ret), 6, final_check_min_ret_long)
print('Total accuracy: {}'.format(min_neg_ret_mod_l))
print('Index: {}'.format(min_neg_ret_ind_l))

Total accuracy: 1.8000000000000003
Index: 469


In [102]:
print('negative: {}'.format(final_check_min_ret_long('neg', 6, 469)))
print('positive: {}'.format(final_check_min_ret_long('pos', 6, 469)))
print('steps: {}'.format(min_pos_steps_ret[469]))
print('states: {}'.format(min_pos_states_ret[469]))

negative: 0.8000000000000002
positive: 1.0
steps: 5
states: 6


In [103]:
with open('min_neg_mod_return_long.json', 'w') as js:
    json.dump(min_neg_mod_ret[469].to_json(), js)
with open('min_pos_mod_return_long.json', 'w') as js:
    json.dump(min_pos_mod_ret[469].to_json(), js)

#### Minute return short

In [15]:
min_pos_states_ret_sh, min_pos_steps_ret_sh, min_pos_pi_ret_sh, min_pos_end_ret_sh, min_pos_trans_ret_sh, min_pos_dist_ret_sh, min_pos_mod_ret_sh, min_pos_testsets_ret_sh = train_hmm(minute.iloc[-800:,:], 'pos', 'return', 6)
min_neg_states_ret_sh, min_neg_steps_ret_sh, min_neg_pi_ret_sh, min_neg_end_ret_sh, min_neg_trans_ret_sh, min_neg_dist_ret_sh, min_neg_mod_ret_sh, min_neg_testsets_ret_sh = train_hmm(minute.iloc[-800:,:], 'neg',  'return', 6)

In [123]:
def final_check_min_ret_short(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(min_neg_testsets_ret_sh[ind], min_neg_mod_ret_sh[ind], min_pos_mod_ret_sh[ind], min_neg_steps_ret_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(min_pos_testsets_ret_sh[ind], min_neg_mod_ret_sh[ind], min_pos_mod_ret_sh[ind], min_pos_steps_ret_sh[ind],col_n, 'pos' )

In [124]:
min_neg_ret_mod_sh, min_neg_ret_ind_sh = best_model(len( min_neg_mod_ret_sh), 6, final_check_min_ret_short)
print('Total accuracy: {}'.format(min_neg_ret_mod_sh))
print('Index: {}'.format(min_neg_ret_ind_sh))

Total accuracy: 2.0
Index: 413


In [94]:
print('negative: {}'.format(final_check_min_ret_short('neg', 6, 413)))
print('positive: {}'.format(final_check_min_ret_short('pos', 6, 413)))
print('steps: {}'.format(min_pos_steps_ret_sh[413]))
print('states: {}'.format(min_pos_states_ret_sh[413]))

negative: 1.0
positive: 1.0
steps: 5
states: 4


In [95]:
with open('min_neg_mod_return_short.json', 'w') as js:
    json.dump( min_neg_mod_ret_sh[413].to_json(), js)
with open('min_pos_mod_return_short.json', 'w') as js:
    json.dump( min_pos_mod_ret_sh[413].to_json(), js)

##### Minute close long

In [104]:
min_pos_states_cl_l, min_pos_steps_cl_l, min_pos_pi_cl_l, min_pos_end_cl_l, min_pos_trans_cl_l, min_pos_dist_cl_l, min_pos_mod_cl_l, min_pos_testsets_cl_l = train_hmm(minute.iloc[-1858:,:], 'pos', 'close', 4)
min_neg_states_cl_l, min_neg_steps_cl_l, min_neg_pi_cl_l, min_neg_end_cl_l, min_neg_trans_cl_l, min_neg_dist_cl_l, min_neg_mod_cl_l, min_neg_testsets_cl_l= train_hmm(minute.iloc[-1858:,:], 'neg',  'close', 4)

In [112]:
def final_check_min_close_long(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(min_neg_testsets_cl_l[ind], min_neg_mod_cl_l[ind], min_pos_mod_cl_l[ind], min_neg_steps_cl_l[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(min_pos_testsets_cl_l[ind], min_neg_mod_cl_l[ind], min_pos_mod_cl_l[ind], min_pos_steps_cl_l[ind],col_n, 'pos' )

In [114]:
min_neg_cl_mod_l, min_cl_ret_ind_l = best_model(len( min_pos_mod_cl_l), 4, final_check_min_close_long)
print('Total accuracy: {}'.format(min_neg_cl_mod_l))
print('Index: {}'.format(min_cl_ret_ind_l))

Total accuracy: 1.4000000000000001
Index: 446


In [115]:
print('negative: {}'.format(final_check_min_close_long('neg', 4, 446)))
print('positive: {}'.format(final_check_min_close_long('pos', 4, 446)))
print('steps: {}'.format(min_pos_steps_cl_l[446]))
print('states: {}'.format(min_pos_states_cl_l[446]))

negative: 0.8000000000000002
positive: 0.6
steps: 5
states: 5


In [117]:
with open('min_neg_mod_close_long.json', 'w') as js:
    json.dump(min_neg_mod_cl_l[446].to_json(), js)
with open('min_pos_mod_close_long.json', 'w') as js:
    json.dump(min_pos_mod_cl_l[446].to_json(), js)

#### Minute close short

In [142]:
min_pos_states_cl_sh, min_pos_steps_cl_sh, min_pos_pi_cl_sh, min_pos_end_cl_sh, min_pos_trans_cl_sh, min_pos_dist_cl_sh, min_pos_mod_cl_sh, min_pos_testsets_cl_sh = train_hmm(minute.iloc[-800:,:], 'pos', 'close', 4)
min_neg_states_cl_sh, min_neg_steps_cl_sh, min_neg_pi_cl_sh, min_neg_end_cl_sh, min_neg_trans_cl_sh, min_neg_dist_cl_sh, min_neg_mod_cl_sh, min_neg_testsets_cl_sh= train_hmm(minute.iloc[-800:,:], 'neg',  'close', 4)

In [143]:
def final_check_min_close_short(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(min_neg_testsets_cl_sh[ind], min_neg_mod_cl_sh[ind], min_pos_mod_cl_sh[ind], min_neg_steps_cl_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(min_pos_testsets_cl_sh[ind], min_neg_mod_cl_sh[ind], min_pos_mod_cl_sh[ind], min_pos_steps_cl_sh[ind],col_n, 'pos' )

In [144]:
min_neg_cl_mod_sh, min_cl_ret_ind_sh = best_model(len(min_pos_mod_cl_sh), 4, final_check_min_close_short)
print('Total accuracy: {}'.format(min_neg_cl_mod_sh))
print('Index: {}'.format(min_cl_ret_ind_sh))

Total accuracy: 1.4000000000000001
Index: 396


In [146]:
print('negative: {}'.format(final_check_min_close_short('neg', 4, 396)))
print('positive: {}'.format(final_check_min_close_short('pos', 4, 396)))
print('steps: {}'.format(min_pos_steps_cl_sh[396]))
print('states: {}'.format(min_pos_states_cl_sh[396]))

negative: 0.4000000000000001
positive: 1.0
steps: 5
states: 4


In [147]:
with open('min_neg_mod_close_short.json', 'w') as js:
    json.dump( min_neg_mod_cl_sh[396].to_json(), js)
with open('min_pos_mod_close_short.json', 'w') as js:
    json.dump( min_pos_mod_cl_sh[396].to_json(), js)

#### Multivariate day close short

In [17]:
m_day_pos_states_cl_sh, m_day_pos_steps_cl_sh, m_day_pos_pi_cl_sh, m_day_pos_end_cl_sh, m_day_pos_trans_cl_sh, m_day_pos_dist_cl_sh, m_day_pos_mod_cl_sh, m_day_pos_testsets_cl_sh = train_hmm(day[:800], 'pos', ['open','low','high','close'],[1,2,3,4])
m_day_neg_states_cl_sh, m_day_neg_steps_cl_sh, m_day_neg_pi_cl_sh, m_day_neg_end_cl_sh, m_day_neg_trans_cl_sh, m_day_neg_dist_cl_sh, m_day_neg_mod_cl_sh, m_day_neg_testsets_cl_sh = train_hmm(day[:800], 'neg', ['open','low','high','close'],[1,2,3,4])

In [18]:
def final_check_day_c_short_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_day_neg_testsets_cl_sh[ind], m_day_neg_mod_cl_sh[ind], m_day_pos_mod_cl_sh[ind], m_day_neg_steps_cl_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_day_pos_testsets_cl_sh[ind], m_day_neg_mod_cl_sh[ind],m_day_pos_mod_cl_sh[ind], m_day_pos_steps_cl_sh[ind],col_n, 'pos' )

In [32]:
m_day_neg_close_mod_sh,m_day_neg_close_ind_sh = best_model(len(m_day_pos_mod_cl_sh), 4, final_check_day_c_short_multi)
print('Total accuracy: {}'.format(m_day_neg_close_mod_sh))
print('Index: {}'.format(m_day_neg_close_ind_sh))

Total accuracy: 2.0
Index: 408


In [41]:
print('negative: {}'.format(final_check_day_c_short_multi('neg', 4, 408)))
print('positive: {}'.format(final_check_day_c_short_multi('pos', 4, 408)))
print('steps: {}'.format(m_day_pos_steps_cl_sh[408]))
print('states: {}'.format(m_day_pos_states_cl_sh[408]))

negative: 1.0
positive: 1.0
steps: 5
states: 4


In [42]:
with open('m_day_neg_mod_cl_short.json', 'w') as js:
    json.dump(m_day_neg_mod_cl_sh[408].to_json(), js)
with open('m_day_pos_mod_cl_short.json', 'w') as js:
    json.dump(m_day_pos_mod_cl_sh[408].to_json(), js)

#### Multivariate day close long

In [35]:
m_day_pos_states_cl_l, m_day_pos_steps_cl_l, m_day_pos_pi_cl_l, m_day_pos_end_cl_l, m_day_pos_trans_cl_l, m_day_pos_dist_cl_l, m_day_pos_mod_cl_l, m_day_pos_testsets_cl_l = train_hmm(day, 'pos', ['open','low','high','close'],[1,2,3,4])
m_day_neg_states_cl_l, m_day_neg_steps_cl_l, m_day_neg_pi_cl_l, m_day_neg_end_cl_l, m_day_neg_trans_cl_l, m_day_neg_dist_cl_l, m_day_neg_mod_cl_l, m_day_neg_testsets_cl_l = train_hmm(day, 'neg', ['open','low','high','close'],[1,2,3,4])

In [43]:
def final_check_day_c_long_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_day_neg_testsets_cl_l[ind], m_day_neg_mod_cl_l[ind], m_day_pos_mod_cl_l[ind],  m_day_neg_steps_cl_l[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_day_pos_testsets_cl_l[ind], m_day_neg_mod_cl_l[ind],m_day_pos_mod_cl_l[ind],  m_day_pos_steps_cl_l[ind],col_n, 'pos' )

In [39]:
m_day_neg_close_mod_l,m_day_neg_close_ind_l = best_model(len(m_day_pos_mod_cl_l), 4, final_check_day_c_long_multi)
print('Total accuracy: {}'.format(m_day_neg_close_mod_l))
print('Index: {}'.format(m_day_neg_close_ind_l))

Total accuracy: 1.5714285714285714
Index: 387


In [44]:
print('negative: {}'.format(final_check_day_c_long_multi('neg', 4, 387)))
print('positive: {}'.format(final_check_day_c_long_multi('pos', 4, 387)))
print('steps: {}'.format(m_day_pos_steps_cl_l[387]))
print('states: {}'.format(m_day_pos_states_cl_l[387]))

negative: 1.0
positive: 0.5714285714285714
steps: 5
states: 3


In [45]:
with open('m_day_neg_mod_cl_long.json', 'w') as js:
    json.dump(m_day_neg_mod_cl_l[387].to_json(), js)
with open('m_day_pos_mod_cl_long.json', 'w') as js:
    json.dump(m_day_pos_mod_cl_l[387].to_json(), js)

#### Multivariate day return long

In [64]:
m_day_pos_states_ret_l, m_day_pos_steps_ret_l, m_day_pos_pi_ret_l, m_day_pos_end_ret_l, m_day_pos_trans_ret_l, m_day_pos_dist_ret_l, m_day_pos_mod_ret_l, m_day_pos_testsets_ret_l = train_hmm(day_ret, 'pos', ['overnight','open_ret','low_ret','high_ret','return'],[6,7,8,9,10])
m_day_neg_states_ret_l, m_day_neg_steps_ret_l, m_day_neg_pi_ret_l, m_day_neg_end_ret_l, m_day_neg_trans_ret_l, m_day_neg_dist_ret_l, m_day_neg_mod_ret_l, m_day_neg_testsets_ret_l = train_hmm(day_ret, 'neg', ['overnight','open_ret','low_ret','high_ret','return'],[6,7,8,9,10])

In [65]:
def final_check_day_ret_long_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_day_neg_testsets_ret_l[ind], m_day_neg_mod_ret_l[ind], m_day_pos_mod_ret_l[ind],  m_day_neg_steps_ret_l[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_day_pos_testsets_ret_l[ind], m_day_neg_mod_ret_l[ind], m_day_pos_mod_ret_l[ind],  m_day_pos_steps_ret_l[ind],col_n, 'pos' )

In [72]:
m_day_neg_ret_mod_l,m_day_ret_ind_l = best_model(len(m_day_pos_mod_ret_l), 10, final_check_day_ret_long_multi)
print('Total accuracy: {}'.format(m_day_neg_ret_mod_l))
print('Index: {}'.format(m_day_ret_ind_l))

Total accuracy: 1.5714285714285714
Index: 479


In [70]:
print('negative: {}'.format(final_check_day_ret_long_multi('neg', 10, 479)))
print('positive: {}'.format(final_check_day_ret_long_multi('pos', 10, 479)))
print('steps: {}'.format(m_day_pos_steps_ret_l[479]))
print('states: {}'.format(m_day_pos_states_ret_l[479]))

negative: 1.0
positive: 0.5714285714285714
steps: 5
states: 6


In [73]:
with open('m_day_neg_mod_ret_long.json', 'w') as js:
    json.dump(m_day_neg_mod_ret_l[479].to_json(), js)
with open('m_day_pos_mod_ret_long.json', 'w') as js:
    json.dump(m_day_pos_mod_ret_l[479].to_json(), js)

#### Multivariate day return short

In [75]:
m_day_pos_states_ret_sh, m_day_pos_steps_ret_sh, m_day_pos_pi_ret_sh, m_day_pos_end_ret_sh, m_day_pos_trans_ret_sh, m_day_pos_dist_ret_sh, m_day_pos_mod_ret_sh, m_day_pos_testsets_ret_sh = train_hmm(day_ret[:800], 'pos', ['overnight','open_ret','low_ret','high_ret','return'],[6,7,8,9,10])
m_day_neg_states_ret_sh, m_day_neg_steps_ret_sh, m_day_neg_pi_ret_sh, m_day_neg_end_ret_sh, m_day_neg_trans_ret_sh, m_day_neg_dist_ret_sh, m_day_neg_mod_ret_sh, m_day_neg_testsets_ret_sh = train_hmm(day_ret[:800], 'neg', ['overnight','open_ret','low_ret','high_ret','return'],[6,7,8,9,10])

In [76]:
def final_check_day_ret_short_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_day_neg_testsets_ret_sh[ind], m_day_neg_mod_ret_sh[ind], m_day_pos_mod_ret_sh[ind],  m_day_neg_steps_ret_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_day_pos_testsets_ret_sh[ind], m_day_neg_mod_ret_sh[ind], m_day_pos_mod_ret_sh[ind],  m_day_pos_steps_ret_sh[ind],col_n, 'pos' )

In [77]:
m_day_neg_ret_mod_sh,m_day_ret_ind_sh = best_model(len(m_day_pos_mod_ret_sh), 10, final_check_day_ret_short_multi)
print('Total accuracy: {}'.format(m_day_neg_ret_mod_sh))
print('Index: {}'.format(m_day_ret_ind_sh))

Total accuracy: 1.2916666666666665
Index: 345


In [84]:
print('negative: {}'.format(final_check_day_ret_short_multi('neg', 10, 345)))
print('positive: {}'.format(final_check_day_ret_short_multi('pos', 10, 345)))
print('steps: {}'.format(m_day_pos_steps_ret_sh[345]))
print('states: {}'.format(m_day_pos_states_ret_sh[345]))

negative: 0.6666666666666666
positive: 0.625
steps: 4
states: 8


In [85]:
with open('m_day_neg_mod_ret_short.json', 'w') as js:
    json.dump(m_day_neg_mod_ret_sh[345].to_json(), js)
with open('m_day_pos_mod_ret_short.json', 'w') as js:
    json.dump(m_day_pos_mod_ret_sh[345].to_json(), js)

#### Multivariate hour close long

In [101]:
m_hour_pos_states_cl_l, m_hour_pos_steps_cl_l, m_hour_pos_pi_cl_l, m_hour_pos_end_cl_l, m_hour_pos_trans_cl_l, m_hour_pos_dist_cl_l, m_hour_pos_mod_cl_l, m_hour_pos_testsets_cl_l = train_hmm(hour.iloc[-1858:,:], 'pos', ['open','low','high','close'],[1,2,3,4])
m_hour_neg_states_cl_l, m_hour_neg_steps_cl_l, m_hour_neg_pi_cl_l, m_hour_neg_end_cl_l, m_hour_neg_trans_cl_l, m_hour_neg_dist_cl_l, m_hour_neg_mod_cl_l, m_hour_neg_testsets_cl_l = train_hmm(hour.iloc[-1858:,:], 'neg', ['open','low','high','close'],[1,2,3,4])

In [102]:
def final_check_hour_close_long_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_hour_neg_testsets_cl_l[ind], m_hour_neg_mod_cl_l[ind], m_hour_pos_mod_cl_l[ind],  m_hour_neg_steps_cl_l[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_hour_pos_testsets_cl_l[ind], m_hour_neg_mod_cl_l[ind], m_hour_pos_mod_cl_l[ind],  m_hour_pos_steps_cl_l[ind],col_n, 'pos' )

In [103]:
m_hour_cl_mod_l,m_hour_cl_ind_l = best_model(len(m_hour_pos_mod_cl_l), 4, final_check_hour_close_long_multi)
print('Total accuracy: {}'.format(m_hour_cl_mod_l))
print('Index: {}'.format(m_hour_cl_ind_l))

Total accuracy: 1.6
Index: 360


In [105]:
print('negative: {}'.format(final_check_hour_close_long_multi('neg', 4, 360)))
print('positive: {}'.format(final_check_hour_close_long_multi('pos', 4, 360)))
print('steps: {}'.format(m_hour_pos_steps_cl_l[360]))
print('states: {}'.format(m_hour_pos_states_cl_l[360]))

negative: 0.6
positive: 1.0
steps: 5
states: 3


In [106]:
with open('m_hour_neg_mod_cl_long.json', 'w') as js:
    json.dump(m_hour_neg_mod_cl_l[360].to_json(), js)
with open('m_hour_pos_mod_cl_long.json', 'w') as js:
    json.dump(m_hour_pos_mod_cl_l[360].to_json(), js)

#### Multivariate hour close short

In [107]:
m_hour_pos_states_cl_sh, m_hour_pos_steps_cl_sh, m_hour_pos_pi_cl_sh, m_hour_pos_end_cl_sh, m_hour_pos_trans_cl_sh, m_hour_pos_dist_cl_l, m_hour_pos_mod_cl_sh, m_hour_pos_testsets_cl_sh = train_hmm(hour.iloc[-800:,:], 'pos', ['open','low','high','close'],[1,2,3,4])
m_hour_neg_states_cl_sh, m_hour_neg_steps_cl_sh, m_hour_neg_pi_cl_sh, m_hour_neg_end_cl_sh, m_hour_neg_trans_cl_sh, m_hour_neg_dist_cl_l, m_hour_neg_mod_cl_sh, m_hour_neg_testsets_cl_sh = train_hmm(hour.iloc[-800:,:], 'neg', ['open','low','high','close'],[1,2,3,4])

In [108]:
def final_check_hour_close_short_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_hour_neg_testsets_cl_sh[ind], m_hour_neg_mod_cl_sh[ind], m_hour_pos_mod_cl_sh[ind],  m_hour_neg_steps_cl_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_hour_pos_testsets_cl_sh[ind], m_hour_neg_mod_cl_sh[ind], m_hour_pos_mod_cl_sh[ind],  m_hour_pos_steps_cl_sh[ind],col_n, 'pos' )

In [109]:
m_hour_cl_mod_sh,m_hour_cl_ind_sh = best_model(len(m_hour_pos_mod_cl_sh), 4, final_check_hour_close_short_multi)
print('Total accuracy: {}'.format(m_hour_cl_mod_sh))
print('Index: {}'.format(m_hour_cl_ind_sh))

Total accuracy: 1.475
Index: 239


In [110]:
print('negative: {}'.format(final_check_hour_close_short_multi('neg', 4, 239)))
print('positive: {}'.format(final_check_hour_close_short_multi('pos', 4, 239)))
print('steps: {}'.format(m_hour_pos_steps_cl_sh[239]))
print('states: {}'.format(m_hour_pos_states_cl_sh[239]))

negative: 0.6
positive: 0.875
steps: 4
states: 4


In [111]:
with open('m_hour_neg_mod_cl_short.json', 'w') as js:
    json.dump(m_hour_neg_mod_cl_sh[239].to_json(), js)
with open('m_hour_pos_mod_cl_short.json', 'w') as js:
    json.dump(m_hour_pos_mod_cl_sh[239].to_json(), js)

#### Multivariate hour return long

In [125]:
m_hour_pos_states_ret_l, m_hour_pos_steps_ret_l, m_hour_pos_pi_ret_l, m_hour_pos_end_ret_l, m_hour_pos_trans_ret_l, m_hour_pos_dist_ret_l, m_hour_pos_mod_ret_l, m_hour_pos_testsets_ret_l = train_hmm(hour_ret.iloc[-1858:,:], 'pos', ['open_ret','low_ret','high_ret','return'],[6,7,8,9])
m_hour_neg_states_ret_l, m_hour_neg_steps_ret_l, m_hour_neg_pi_ret_l, m_hour_neg_end_ret_l, m_hour_neg_trans_ret_l, m_hour_neg_dist_ret_l, m_hour_neg_mod_ret_l, m_hour_neg_testsets_ret_l = train_hmm(hour_ret.iloc[-1858:,:], 'neg', ['open_ret','low_ret','high_ret','return'],[6,7,8,9])

In [128]:
def final_check_hour_ret_long_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_hour_neg_testsets_ret_l[ind], m_hour_neg_mod_ret_l[ind], m_hour_pos_mod_ret_l[ind],  m_hour_neg_steps_ret_l[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_hour_pos_testsets_ret_l[ind], m_hour_neg_mod_ret_l[ind], m_hour_pos_mod_ret_l[ind],  m_hour_pos_steps_ret_l[ind],col_n, 'pos' )

In [134]:
m_hour_neg_ret_mod_l,m_hour_ret_ind_l = best_model(len(m_hour_pos_mod_ret_l), 9, final_check_hour_ret_long_multi)
print('Total accuracy: {}'.format(m_hour_neg_ret_mod_l))
print('Index: {}'.format(m_hour_ret_ind_l))

Total accuracy: 1.8571428571428572
Index: 510


In [135]:
print('negative: {}'.format(final_check_hour_ret_long_multi('neg', 9, 510)))
print('positive: {}'.format(final_check_hour_ret_long_multi('pos', 9, 510)))
print('steps: {}'.format(m_hour_pos_steps_ret_l[510]))
print('states: {}'.format(m_hour_pos_states_ret_l[510]))

negative: 1.0
positive: 0.8571428571428571
steps: 5
states: 8


In [136]:
with open('m_hour_neg_mod_ret_long.json', 'w') as js:
    json.dump(m_hour_neg_mod_ret_l[510].to_json(), js)
with open('m_hour_pos_mod_ret_long.json', 'w') as js:
    json.dump(m_hour_pos_mod_ret_l[510].to_json(), js)

#### Multivariate hour return short

In [132]:
m_hour_pos_states_ret_sh, m_hour_pos_steps_ret_sh, m_hour_pos_pi_ret_sh, m_hour_pos_end_ret_sh, m_hour_pos_trans_ret_sh, m_hour_pos_dist_ret_sh, m_hour_pos_mod_ret_sh, m_hour_pos_testsets_ret_sh = train_hmm(hour_ret.iloc[-800:,:], 'pos', ['open_ret','low_ret','high_ret','return'],[6,7,8,9])
m_hour_neg_states_ret_sh, m_hour_neg_steps_ret_sh, m_hour_neg_pi_ret_sh, m_hour_neg_end_ret_sh, m_hour_neg_trans_ret_sh, m_hour_neg_dist_ret_sh, m_hour_neg_mod_ret_sh, m_hour_neg_testsets_ret_sh = train_hmm(hour_ret.iloc[-800:,:], 'neg', ['open_ret','low_ret','high_ret','return'],[6,7,8,9])

In [137]:
def final_check_hour_ret_short_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_hour_neg_testsets_ret_sh[ind], m_hour_neg_mod_ret_sh[ind], m_hour_pos_mod_ret_sh[ind],  m_hour_neg_steps_ret_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_hour_pos_testsets_ret_sh[ind], m_hour_neg_mod_ret_sh[ind], m_hour_pos_mod_ret_sh[ind],  m_hour_pos_steps_ret_sh[ind],col_n, 'pos' )

In [138]:
m_hour_neg_ret_mod_sh,m_hour_ret_ind_sh = best_model(len(m_hour_pos_mod_ret_sh), 9, final_check_hour_ret_short_multi)
print('Total accuracy: {}'.format(m_hour_neg_ret_mod_sh))
print('Index: {}'.format(m_hour_ret_ind_sh))

Total accuracy: 2.0
Index: 535


In [139]:
print('negative: {}'.format(final_check_hour_ret_short_multi('neg', 9, 535)))
print('positive: {}'.format(final_check_hour_ret_short_multi('pos', 9, 535)))
print('steps: {}'.format(m_hour_pos_steps_ret_sh[535]))
print('states: {}'.format(m_hour_pos_states_ret_sh[535]))

negative: 1.0
positive: 1.0
steps: 5
states: 8


In [140]:
with open('m_hour_neg_mod_ret_short.json', 'w') as js:
    json.dump(m_hour_neg_mod_ret_sh[535].to_json(), js)
with open('m_hour_pos_mod_ret_short.json', 'w') as js:
    json.dump(m_hour_pos_mod_ret_sh[535].to_json(), js)

#### Multivariate minute close short

In [167]:
m_min_pos_states_cl_sh, m_min_pos_steps_cl_sh, m_min_pos_pi_cl_sh, m_min_pos_end_cl_sh, m_min_pos_trans_cl_sh, m_min_pos_dist_cl_sh, m_min_pos_mod_cl_sh, m_min_pos_testsets_cl_sh = train_hmm(minute.iloc[-800:,:], 'pos', ['open','low','high','close'],[1,2,3,4])
m_min_neg_states_cl_sh, m_min_neg_steps_cl_sh, m_min_neg_pi_cl_sh, m_min_neg_end_cl_sh, m_min_neg_trans_cl_sh, m_min_neg_dist_cl_sh, m_min_neg_mod_cl_sh, m_min_neg_testsets_cl_sh = train_hmm(minute.iloc[-800:,:], 'neg', ['open','low','high','close'],[1,2,3,4])

In [168]:
def final_check_min_close_short_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_min_neg_testsets_cl_sh[ind], m_min_neg_mod_cl_sh[ind], m_min_pos_mod_cl_sh[ind],  m_min_neg_steps_cl_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_min_pos_testsets_cl_sh[ind], m_min_neg_mod_cl_sh[ind], m_min_pos_mod_cl_sh[ind],  m_min_pos_steps_cl_sh[ind],col_n, 'pos' )

In [169]:
m_min_cl_mod_sh,m_min_cl_ind_sh = best_model(len(m_min_pos_mod_cl_sh), 4, final_check_min_close_short_multi)
print('Total accuracy: {}'.format(m_min_cl_mod_sh))
print('Index: {}'.format(m_min_cl_ind_sh))

Total accuracy: 1.4555555555555557
Index: 154


In [172]:
print('negative: {}'.format(final_check_min_close_short_multi('neg', 4, 154)))
print('positive: {}'.format(final_check_min_close_short_multi('pos', 4, 154)))
print('steps: {}'.format(m_min_pos_steps_cl_sh[154]))
print('states: {}'.format(m_min_pos_states_cl_sh[154]))

negative: 0.5555555555555556
positive: 0.9
steps: 3
states: 8


In [173]:
with open('m_min_neg_mod_cl_short.json', 'w') as js:
    json.dump(m_min_neg_mod_cl_sh[154].to_json(), js)
with open('m_min_pos_mod_cl_short.json', 'w') as js:
    json.dump(m_min_pos_mod_cl_sh[154].to_json(), js)

#### Multivariate minute close long

In [174]:
m_min_pos_states_cl_l, m_min_pos_steps_cl_l, m_min_pos_pi_cl_l, m_min_pos_end_cl_l, m_min_pos_trans_cl_l, m_min_pos_dist_cl_l, m_min_pos_mod_cl_l, m_min_pos_testsets_cl_l = train_hmm(minute.iloc[-1858:,:], 'pos', ['open','low','high','close'],[1,2,3,4])
m_min_neg_states_cl_l, m_min_neg_steps_cl_l, m_min_neg_pi_cl_l, m_min_neg_end_cl_l, m_min_neg_trans_cl_l, m_min_neg_dist_cl_l, m_min_neg_mod_cl_l, m_min_neg_testsets_cl_l = train_hmm(minute.iloc[-1858:,:], 'neg', ['open','low','high','close'],[1,2,3,4])

In [175]:
def final_check_min_close_long_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_min_neg_testsets_cl_l[ind], m_min_neg_mod_cl_l[ind], m_min_pos_mod_cl_l[ind],  m_min_neg_steps_cl_l[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_min_pos_testsets_cl_l[ind], m_min_neg_mod_cl_l[ind], m_min_pos_mod_cl_l[ind],  m_min_pos_steps_cl_l[ind],col_n, 'pos' )

In [176]:
m_min_cl_mod_l,m_min_cl_ind_l = best_model(len(m_min_pos_mod_cl_l), 4, final_check_min_close_long_multi)
print('Total accuracy: {}'.format(m_min_cl_mod_l))
print('Index: {}'.format(m_min_cl_ind_l))

Total accuracy: 1.3
Index: 391


In [177]:
print('negative: {}'.format(final_check_min_close_long_multi('neg', 4, 391)))
print('positive: {}'.format(final_check_min_close_long_multi('pos', 4, 391)))
print('steps: {}'.format(m_min_pos_steps_cl_l[391]))
print('states: {}'.format(m_min_pos_states_cl_l[391]))

negative: 0.9
positive: 0.4000000000000001
steps: 5
states: 4


In [179]:
with open('m_min_neg_mod_cl_long.json', 'w') as js:
    json.dump(m_min_neg_mod_cl_l[391].to_json(), js)
with open('m_min_pos_mod_cl_long.json', 'w') as js:
    json.dump(m_min_pos_mod_cl_l[391].to_json(), js)

#### Multivariate minute return long

In [23]:
m_min_pos_states_ret_l, m_min_pos_steps_ret_l, m_min_pos_pi_ret_l, m_min_pos_end_ret_l, m_min_pos_trans_ret_l, m_min_pos_dist_ret_l, m_min_pos_mod_ret_l, m_min_pos_testsets_ret_l = train_hmm(minute_ret.iloc[-1858:,:], 'pos', ['open_ret','low_ret','high_ret','return'],[6,7,8,9])
m_min_neg_states_ret_l, m_min_neg_steps_ret_l, m_min_neg_pi_ret_l, m_min_neg_end_ret_l, m_min_neg_trans_ret_l, m_min_neg_dist_ret_l, m_min_neg_mod_ret_l, m_min_neg_testsets_ret_l = train_hmm(minute_ret.iloc[-1858:,:], 'neg', ['open_ret','low_ret','high_ret','return'],[6,7,8,9])

In [24]:
def final_check_min_ret_long_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_min_neg_testsets_ret_l[ind], m_min_neg_mod_ret_l[ind], m_min_pos_mod_ret_l[ind],  m_min_neg_steps_ret_l[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_min_pos_testsets_ret_l[ind], m_min_neg_mod_ret_l[ind], m_min_pos_mod_ret_l[ind],  m_min_pos_steps_ret_l[ind],col_n, 'pos' )

In [25]:
m_min_ret_mod_l, m_min_ret_ind_l = best_model(len(m_min_pos_mod_ret_l), 9, final_check_min_ret_long_multi)
print('Total accuracy: {}'.format(m_min_ret_mod_l))
print('Index: {}'.format(m_min_ret_ind_l))

Total accuracy: 1.393939393939394
Index: 482


In [26]:
print('negative: {}'.format(final_check_min_ret_long_multi('neg', 9, 482)))
print('positive: {}'.format(final_check_min_ret_long_multi('pos', 9, 482)))
print('steps: {}'.format(m_min_pos_steps_ret_l[482]))
print('states: {}'.format(m_min_pos_states_ret_l[482]))

negative: 0.6666666666666666
positive: 0.7272727272727273
steps: 5
states: 7


In [29]:
with open('m_min_neg_mod_ret_long.json', 'w') as js:
    json.dump(m_min_neg_mod_ret_l[482].to_json(), js)
with open('m_min_pos_mod_ret_long.json', 'w') as js:
    json.dump(m_min_pos_mod_ret_l[482].to_json(), js)

#### Multivariate minute return short

In [30]:
m_min_pos_states_ret_sh, m_min_pos_steps_ret_sh, m_min_pos_pi_ret_sh, m_min_pos_end_ret_sh, m_min_pos_trans_ret_sh, m_min_pos_dist_ret_sh, m_min_pos_mod_ret_sh, m_min_pos_testsets_ret_sh = train_hmm(minute_ret.iloc[-800:,:], 'pos', ['open_ret','low_ret','high_ret','return'],[6,7,8,9])
m_min_neg_states_ret_sh, m_min_neg_steps_ret_sh, m_min_neg_pi_ret_sh, m_min_neg_end_ret_sh, m_min_neg_trans_ret_sh, m_min_neg_dist_ret_sh, m_min_neg_mod_ret_sh, m_min_neg_testsets_ret_sh = train_hmm(minute_ret.iloc[-800:,:], 'neg', ['open_ret','low_ret','high_ret','return'],[6,7,8,9])

In [31]:
def final_check_min_ret_short_multi(mode, col_n, ind):
    if mode == 'neg':
        return test_hmm(m_min_neg_testsets_ret_sh[ind], m_min_neg_mod_ret_sh[ind], m_min_pos_mod_ret_sh[ind],  m_min_neg_steps_ret_sh[ind], col_n, 'neg' )
    elif mode == 'pos':
        return test_hmm(m_min_pos_testsets_ret_sh[ind], m_min_neg_mod_ret_sh[ind], m_min_pos_mod_ret_sh[ind],  m_min_pos_steps_ret_sh[ind],col_n, 'pos' )

In [32]:
m_min_ret_mod_sh, m_min_ret_ind_sh = best_model(len(m_min_pos_mod_ret_sh), 9, final_check_min_ret_short_multi)
print('Total accuracy: {}'.format(m_min_ret_mod_sh))
print('Index: {}'.format(m_min_ret_ind_sh))

Total accuracy: 1.625
Index: 177


In [33]:
print('negative: {}'.format(final_check_min_ret_short_multi('neg', 9, 177)))
print('positive: {}'.format(final_check_min_ret_short_multi('pos', 9, 177)))
print('steps: {}'.format(m_min_pos_steps_ret_sh[177]))
print('states: {}'.format(m_min_pos_states_ret_sh[177]))

negative: 1.0
positive: 0.625
steps: 3
states: 8


In [35]:
with open('m_min_neg_mod_ret_short.json', 'w') as js:
    json.dump(m_min_neg_mod_ret_sh[177].to_json(), js)
with open('m_min_pos_mod_ret_short.json', 'w') as js:
    json.dump(m_min_pos_mod_ret_sh[177].to_json(), js)