In [381]:
import warnings
import time
import datetime
import sys
import numpy as np

from matplotlib import cm, pyplot as plt
import matplotlib.dates as dates
import hmmlearn
from matplotlib.lines import Line2D
from hmmlearn import hmm

import random
import pandas as pd
from scipy import stats
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix

pd.options.mode.use_inf_as_na = True

In [54]:
df = pd.read_csv('SP500.csv')

In [55]:
date_list = pd.to_datetime(df['Date'])
volume_list = df['Volume']
open_list = df['Volume']
close_list = df['Close']

In [56]:
# feature 1
# everyday's closing prices' difference in percentage 
feature_1=np.diff(close_list)/close_list[:-1]

In [57]:
rise_or_drop = []

for i in range(0,17319):
    if feature_1[i] > 0: 
        rise_or_drop.append(1)
    else: rise_or_drop.append(0)

In [58]:
# feature 2
# everyday's volume
feature_2=np.diff(volume_list)/volume_list[:-1]

In [59]:
more_or_less = []

for i in range(0,17319):
    if feature_2[i] > 0: 
        more_or_less.append(1)
    else: more_or_less.append(0)

In [201]:
x = pd.DataFrame(list(zip(rise_or_drop,more_or_less)), 
             columns = ['Rise or Drop','More or Less'],
            index = np.arange(1,17320,1))

In [207]:
x.tail()

Unnamed: 0,Rise or Drop,More or Less
17315,0,1
17316,1,0
17317,0,1
17318,0,0
17319,1,1


In [221]:
predict_10_year= x[13669:17320]

In [229]:
predict_10_year = predict_10_year.set_index(np.arange(1,3651,1))
predict_10_year.tail()

Unnamed: 0,Rise or Drop,More or Less
3646,0,1
3647,1,0
3648,0,1
3649,0,0
3650,1,1


In [248]:
len(rise_or_drop[13719:17318])

3599

In [420]:
# build HMM model
from hmmlearn.hmm import MultinomialHMM
model_10=hmm.MultinomialHMM(n_components=10, tol=1e-12, n_iter=10000)

In [421]:
model_10 = model_10.fit(x)

In [444]:
# 10 hidden states
# set a week as a observation window
past_likelihood = []
K = 5

for i in range(0,17319 - K,1):
    past_likelihood = np.append(past_likelihood, model_10.score((x[i:i + K])))

In [453]:
past_likelihood

array([ -5.42549647,  -6.93545919,  -6.7724909 , ..., -14.73354288,
       -14.50283148, -13.41886374])

In [446]:
likelihood_diff_idx_1 = []
past_similar = []


for i in range(3650 - K - 1,0,-1):
    curr_likelihood = model_10.score(predict_10_year[i:i+K])
    likelihood_diff_idx = np.argmin(np.absolute(past_likelihood[0:13719] - curr_likelihood))
    likelihood_diff_idx_1.append(likelihood_diff_idx+K+1)
    past_similar.append(rise_or_drop[likelihood_diff_idx+K+1])

In [431]:
len(past_similar)

3599

In [447]:
rise_or_drop_2 = list(past_similar)
rise_or_drop_2

[1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,


In [476]:
cm = confusion_matrix(rise_or_drop_1[0:30],rise_or_drop_2[0:30])

In [477]:
accuracy1=(cm[0,0]+cm[1,1])/sum(sum(cm))
sensitivity1 = cm[0,0]/(cm[0,0]+cm[0,1])
specificity1 = cm[1,1]/(cm[1,0]+cm[1,1])

print("The Score of Training set by 10-HMM prediction is ", accuracy1)
print("The Sensitivity of Training set by 10-HMM prediction is ", sensitivity1)
print("The Specificity of Training set by 10-HMM prediction is ", specificity1)

The Score of Training set by 10-HMM prediction is  0.7
The Sensitivity of Training set by 10-HMM prediction is  0.6190476190476191
The Specificity of Training set by 10-HMM prediction is  0.8888888888888888


In [96]:
# likelihood_diff_idx_1 = []
# # past_similar = 

# for i in range(0,17314,1):
#     curr_likelihood = model_10.score(x[i:i+5])
#     likelihood_diff_idx = np.argmin(np.absolute((past_likelihood[:i]+past_likelihood[i+1:]) - curr_likelihood))
#     likelihood_diff_idx_1.append(likelihood_diff_idx)
# #     past_similar.append(rise_or_drop[likelihood_diff_idx + 6])

ValueError: operands could not be broadcast together with shapes (0,) (17313,) 