In [2]:
import keras
import pandas as pd
import numpy as np
import math
from matplotlib import pyplot as plt
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, LSTM, Input, CuDNNLSTM
from keras.optimizers import Adam
from scipy import stats
from sklearn.metrics import confusion_matrix
import tensorflow as tf
# np.random.seed(1234)
# tf.random.set_seed(1234)

In [6]:
lookback = 50
filepath = 'KOSPI_200_lg20_lb50_la20_S+labels.csv'
data=pd.read_csv(filepath, names=range(1,lookback+3))
data.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,43,44,45,46,47,48,49,50,51,52
0,72.156843,72.679548,73.137053,73.536925,73.875597,74.138146,74.294249,74.335228,74.279909,74.152373,...,66.118288,66.771242,67.471419,68.1828,68.890996,69.587529,70.271491,70.940678,71.572341,73.2
1,68.11024,67.460332,66.856603,66.305601,65.796264,65.32746,64.891928,64.501064,64.176832,63.929009,...,75.78877,75.931664,76.052782,76.151534,76.238121,76.311848,76.36982,76.405812,76.419163,66.31
2,65.414463,65.788523,66.238216,66.744549,67.30179,67.904195,68.543535,69.218359,69.914829,70.611722,...,75.004865,74.708214,74.396448,74.086279,73.784942,73.488658,73.191799,72.879598,72.558549,69.83
3,75.491594,75.664301,75.824537,75.960697,76.071852,76.160932,76.238881,76.306062,76.360876,76.396083,...,68.450306,68.118999,67.806499,67.526319,67.274883,67.05261,66.871211,66.740938,66.681958,58.36
4,75.535799,75.280786,75.004863,74.707916,74.396235,74.085912,73.784564,73.488371,73.191601,72.879571,...,69.726275,70.026747,70.302626,70.534523,70.701968,70.798931,70.814282,70.738479,70.577335,64.54


In [7]:
stockdata = np.array(data)
print(stockdata.shape)

(238, 52)


In [8]:
def create_dataset(StockData, Ratio_train=0.7,Standardization=False):
    from sklearn.preprocessing import minmax_scale
    StockData = minmax_scale(StockData, axis=1, copy=True,feature_range=(1.0E-5,1))
    Input_arr, Label_arr = [], [],
    nb_data, LookBack = StockData.shape[0],StockData.shape[1]-2
    for i in range(nb_data):
        temp=[]
        for j in range(LookBack,0,-1):
            temp.append((StockData[i-2]-StockData[i,LookBack-j])/StockData[i,LookBack-j])
        Input_arr.append(temp)
        Label_arr.append((StockData[i,-1]-StockData[i,-2])/StockData[i,-2])
    
    Input_arr = np.array(Input_arr)
    Label_arr = np.array(Label_arr)
    nb_train = int(nb_data*Ratio_train)
    Input_train, Label_train = Input_arr[:nb_train], Label_arr[:nb_train]
    Input_test, Label_test = Input_arr[nb_train:], Label_arr[nb_train:]
    
    if (Standardization):
        Input_train=stats.zscore(np.array(Input_train).reshape(-1, 1))
        Label_train=stats.zscore(np.array(Label_train).reshape(-1, 1))
        Input_test=stats.zscore(np.array(Input_test).reshape(-1, 1))
        Label_test=stats.zscore(np.array(Label_test).reshape(-1, 1))
    
    Input_train, Label_train=np.array(Input_train).reshape(-1,LookBack,1), np.array(Label_train).flatten()
    Input_test, Label_test=np.array(Input_test).reshape(-1,LookBack,1), np.array(Label_test).flatten()
    Input_data, Label_data=np.array(Input_arr).reshape(-1,LookBack,1), np.array(Label_arr).flatten()
    
    return Input_data, Label_data, Input_train, Label_train, Input_test, Label_test


In [9]:
standardization=False
ratio = 0.7
input_data, label_data, input_train, label_train, input_test, label_test = create_dataset(stockdata,Ratio_train=ratio,Standardization=standardization)
print('shape of data set is ',input_data.shape, label_data.shape)
print('shape of training set is ',input_train.shape, label_train.shape)
print('shape of test set is ',input_test.shape, label_test.shape)

shape of data set is  (12376, 50, 1) (238,)
shape of training set is  (8632, 50, 1) (166,)
shape of test set is  (3744, 50, 1) (72,)


In [10]:
# Run, if Classification
def trans_binary(data):
    binary=[]
    for i in range(len(data)):
        binary.append( (lambda p: 1 if p>=0 else 0)(data[i]) )
    return np.array(binary)
label_data_binary=trans_binary(label_data)
label_train_binary=trans_binary(label_train)
label_test_binary=trans_binary(label_test)
print(label_train_binary[:11])

[1 0 0 0 0 1 1 1 1 1 1]


In [11]:
class LossHistory(keras.callbacks.Callback):
    def init(self):
        self.loss = []
        self.accuracy = []
        self.val_loss = []
        self.val_accuracy = []
        
        
    def on_epoch_end(self, batch, logs={}):
        self.loss.append(logs.get('loss'))
        self.accuracy.append(logs.get('acc'))
        self.val_loss.append(logs.get('val_loss'))
        self.val_accuracy.append(logs.get('val_acc'))

history = LossHistory()
history.init()

In [14]:
# Model with Classification
nb_LSTM1 = 20
nb_LSTM2 = 15
nb_LSTM3 = 10
dropout=0.3
epoch_train = 100
epoch_test = 10

model = Sequential()
model.add(LSTM(nb_LSTM1, batch_input_shape=(1, lookback, 1),stateful=True,return_sequences=True))
model.add(Dropout(dropout))
model.add(LSTM(nb_LSTM2,stateful=True,return_sequences=True))
model.add(Dropout(dropout))
model.add(LSTM(nb_LSTM3,stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer=Adam(lr=0.00001),metrics=['accuracy'])


for epoch_idx in range(epoch_train):
    print ('epochs : ' + str(epoch_idx+1) + ' / ' + str(epoch_train))
    model.fit(input_train, label_train_binary, epochs=1, batch_size=1, verbose=1, shuffle=False,
              validation_data=(input_test, label_test_binary), callbacks=[history])
    model.reset_states()

In [None]:
plt.plot(history.loss, label='train')
plt.plot(history.val_loss, label='test')
plt.legend()
plt.show()

In [None]:
plt.plot(history.accuracy, label='train')
plt.plot(history.val_accuracy, label='test')
plt.legend()
plt.show()

In [None]:
Loss, Accuray = model.evaluate(input_test,label_test_binary,batch_size=1)
print('ACCURACY IS ',Accuray*100,'%')

In [None]:
# prediction=[]
# prediction_binary=[]
# for i in range(input_test.shape[0]):
#   p=model.predict(input_test[i:(i+1)],batch_size=1)
#   prediction.append(p[0,0])
#   p=(lambda p: 1 if p>=0.5 else 0)(p[0])
#   prediction_binary.append(p)
#   print('prediction : ',p,' actual: ',label_test_binary[i])
#   for epoch_idx in range(epoch_test):
#     model.fit(input_data[:input_train.shape[0]+(i+1)],label_data_binary[:input_train.shape[0]+(i+1)],batch_size=1,epochs=1,verbose=0,shuffle=False)
#     model.reset_states()
#   print(i+1,'th update /',input_test.shape[0])

# score=[1 if a==b else 0 for a,b in zip(prediction_binary, label_test_binary)]
# print('ACCURACY IS ', sum(score)/len(score)*100,'%')
# print(prediction)

In [None]:
# matrix = confusion_matrix( label_test_binary, prediction_binary)
# print('Confusion Matrix')
# print('     Predicted    Positive      Negative')
# print('Actual')
# print('Positive           ',matrix[1][1],'          ',matrix[1][0] )
# print('Negative           ',matrix[0][1],'          ',matrix[0][0] )

In [None]:
# Model with Regression
# nb_LSTM1 = 50
# nb_LSTM2 = 20
# dropout=0.3
# epoch = 50

# model = Sequential()
# model.add(LSTM(nb_LSTM1, batch_input_shape=(1, lookback, 1),stateful=True,return_sequences=True))
# model.add(Dropout(dropout))
# model.add(LSTM(nb_LSTM2, batch_input_shape=(1, nb_LSTM1, 1),stateful=True))
# model.add(Dense(1, activation='sigmoid'))
# model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['accuracy']) 
# print(model.summary())

# history = model.fit(input_train,label_train_binary, batch_size=1, epochs=epoch, verbose=2,shuffle=False)
