In [1]:
# multi-class classification with Keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

import pickle


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [3]:
# load dataset
dataset = pd.read_csv("convertcsv_label.csv", header=0, index_col=0)
dataset = dataset.drop(['Date', 'Date.1', 'Date.2', 'Date.3'], axis=1)
values = dataset.values
# ensure all data is float
values = values.astype('float32')

for i in range(1, 1914):
    for j in range(1, 50):
        values[i][j] -= values[0][j]
for k in range(1, 50):
    values[0][k] = 0

days = 20
features = 57
obs = days * features

# Normalize or not Normalize
# scaler = MinMaxScaler(feature_range=(0, 1))
# values = scaler.fit_transform(values)


reframed = series_to_supervised(values, days, 1)
reframed = reframed.values
np.savetxt("foo2.csv", reframed, delimiter=",")
print(reframed.shape)
Y = reframed[:, -features]
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
dummy_y = np_utils.to_categorical(encoded_Y)

print(dummy_y)
# print(values)
np.savetxt("foo.csv", dummy_y, delimiter=",")

(1919, 1197)
[[0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 ...
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]


In [4]:
"""
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

You can change the n_train_hours from 0.85 * 1938 or 0.9 or 0.8, whatever you like

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

"""
n_train_hours = 1647  # 0.8*1938
train_X = reframed[:n_train_hours, :obs]  # 1647, 1140
train_Y = dummy_y[:n_train_hours, :]
test_X = reframed[n_train_hours:, :obs]  # 248, 1140
test_Y = dummy_y[n_train_hours:, :]

train_X = train_X.reshape((train_X.shape[0], days, features))
test_X = test_X.reshape((test_X.shape[0], days, features))
print(test_X.shape)
# np.savetxt("foo.csv", train_X, delimiter=",")

edit = []

# covert 3d to one domension

for i in dummy_y:
    for k,v in enumerate(i):
        if (v == 1):
            edit.append(k)


train_Y = edit[:n_train_hours]
test_Y = edit[n_train_hours:]



(272, 20, 57)


In [5]:
import collections
collections.Counter(edit)

from sklearn.utils import class_weight

# oversampling the buy and sell signal
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(train_Y),
                                                 train_Y)


print(class_weights)

[5.17924528 0.38178025 5.33009709]




In [6]:
model = Sequential()
model.add(LSTM(128, input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(256, return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(units=3))
model.add(Activation('sigmoid'))
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=['accuracy'])
model.fit(train_X, train_Y, epochs=50, batch_size=44, validation_data=(test_X, test_Y), verbose=2, class_weight = class_weights)
# test_mse = model.evaluate(test_X, test_Y, verbose=1)
predicted_values = model.predict(test_X)
# print(predicted_values)

predict_from_test = pd.DataFrame(predicted_values, columns=['sell', 'stay', 'buy'])
predict_from_test.to_csv("prediction.csv")


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 1647 samples, validate on 272 samples
Epoch 1/50
 - 5s - loss: 0.5102 - accuracy: 0.8658 - val_loss: 0.5346 - val_accuracy: 0.8603
Epoch 2/50
 - 5s - loss: 0.4754 - accuracy: 0.8731 - val_loss: 0.5116 - val_accuracy: 0.8603
Epoch 3/50
 - 4s - loss: 0.4728 - accuracy: 0.8731 - val_loss: 0.5141 - val_accuracy: 0.8603
Epoch 4/50
 - 5s - loss: 0.4696 - accuracy: 0.8731 - val_loss: 0.5131 - val_accuracy: 0.8603
Epoch 5/50
 - 5s - loss: 0.4737 - accuracy: 0.8731 - val_loss: 0.5252 - val_accuracy: 0.8603
Epoch 6/50
 - 5s - loss: 0.4734 - accuracy: 0.8731 - val_loss: 0.5079 - val_accuracy: 0.8603
Epoch 7/50
 - 5s - loss: 0.4700 - accuracy: 0.8731 - val_loss: 0.5113 - val_accuracy: 0.8603
Epoch 8/50
 - 5s - loss: 0.4679 - accuracy: 0.8731 - val_loss: 0.5138 - val_accuracy: 0.8603
Epoch 9/50
 - 5s - loss: 0.4698 - accuracy: 0.8731 - val_loss: 0.5106 - val_accuracy: 0.8603
Epoch 10/50
 - 5s - l

In [7]:
!pip list

Package                            Version            
---------------------------------- -------------------
absl-py                            0.10.0             
alabaster                          0.7.12             
anaconda-client                    1.7.2              
anaconda-navigator                 1.9.12             
anaconda-project                   0.8.3              
argh                               0.26.2             
asn1crypto                         1.3.0              
astor                              0.8.1              
astroid                            2.3.3              
astropy                            4.0                
atomicwrites                       1.3.0              
attrs                              19.3.0             
autopep8                           1.4.4              
Babel                              2.8.0              
backcall                           0.1.0              
backports.functools-lru-cache      1.6.1              
backports.

In [17]:
# Pickle the best model
tuple_objects = (model, train_X, train_Y)
pickle.dump(tuple_objects, open("picklefile.pkl", 'wb'))

In [19]:
with open('51_profit_last272.pkl', 'rb') as f:
    data = pickle.load(f)
predicted_values = data[0].predict(test_X)
predict_from_test = pd.DataFrame(predicted_values, columns=['sell', 'stay', 'buy'])
predict_from_test.to_csv("pickle_prediction.csv")   

(<keras.engine.sequential.Sequential at 0x1b12a3a3808>,
 array([[[ 1.        ,  0.        ,  0.        , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  2.37      ,  1.5       , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  2.9099998 ,  1.6000004 , ...,  0.        ,
           0.        ,  0.        ],
         ...,
         [ 1.        , -1.0100002 , -1.3499994 , ...,  0.        ,
           0.        ,  1.        ],
         [ 1.        , -1.0100002 , -1.3499994 , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  1.0499992 , -0.19999981, ...,  0.        ,
          -1.        , -1.        ]],
 
        [[ 0.        ,  2.37      ,  1.5       , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  2.9099998 ,  1.6000004 , ...,  0.        ,
           0.        ,  0.        ],
         [ 0.        ,  1.8599997 ,  0.95000076, ...,  0.        ,
           0.        ,  0.  