In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM
from keras.optimizers import SGD
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.utils import np_utils
from sklearn.metrics import precision_recall_fscore_support
from keras.callbacks import TensorBoard
import tensorflow as tf

os.chdir('../Utils/')
import featureGenerator
from featureGenerator import *
os.chdir('../src/')
import orderbook_lstm
from orderbook_lstm import OrderBookLSTM

# Generate Features and Response Vars

In [None]:
data_dir = '../../ProjectData/'
in_path = data_dir+'msft-orderbook.csv'
out_path = data_dir+'msft-orderbook-all.csv'
out_path2 = data_dir+'msft-orderbook-final.csv'

mergeOrderBookDays(data_dir, out_path, ['msft'])

In [None]:
createFeatures(out_path, out_path2, response_type = 'Classification')
data = pd.read_csv(out_path2)
data.head()

In [None]:
data = pd.read_csv(out_path2)
data = data.drop(['datetime', 'direct.last_SRO'], axis = 1)

# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
cols_to_normalize = [col for col in data.columns if col != 'Response']
data[cols_to_normalize] = scaler.fit_transform(data[cols_to_normalize])

dataset = data.values
dataset = dataset.astype('float32')


# Train/Test Split

In [None]:
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=5):
    dataY = get_one_hot(dataset[look_back+1:,dataset.shape[1]-1].astype(int).reshape(-1),3)
    dataX = []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]    
        dataX.append(a)
    return np.array(dataX), np.array(dataY)

In [None]:
# Convert response variable to one-hot vectors
def get_one_hot(targets, nb_classes):
    return np.eye(nb_classes)[np.array(targets).reshape(-1)]

In [None]:
look_back = 10
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
print(trainX.shape)
print(trainY.shape)

# Train Model

In [None]:
tf.reset_default_graph()

timesteps = 10
n_features = 69
n_neurons = 100
n_classes = 3
n_hidden = 1
dropout = None

#lstm = OrderBookLSTM(timesteps, n_neurons, (timesteps,n_features), n_classes, n_hidden, dropout)


lstm = OrderBookLSTM(10, 100, (10,69), 3, 2)

In [None]:
mod = lstm.get_model()

In [None]:
import scipy
import scipy.stats as stats
stats.itemfreq(trainX[:,:,68])[0]

In [None]:
1.4e6

In [None]:
data['Response'].value_counts()

In [None]:
# Class weights to change
class_weight = {0 : 1.,
    1: 10.,
    2: 10.} 

mod.fit(trainX, trainY, 
          epochs=10,  
          batch_size=128, 
          verbose=1, 
          class_weight = class_weight)#,
          #callbacks=[TensorBoard(log_dir='Logs/', write_graph=True)])
    

# Make Predictions and get Metrics

In [None]:
# Training Error
preds_training = mod.predict(trainX).argmax(axis=-1)
pd.Series(preds_training).value_counts()
precision_recall_fscore_support(np.argmax(trainY, axis=1), preds_training)

In [None]:
# Validation Error
preds = mod.predict(testX).argmax(axis=-1)
pd.Series(preds).value_counts()
precision_recall_fscore_support(np.argmax(testY, axis=1), preds)

# Ignore

In [None]:
# Class weights to change
class_weight = {0 : 1.,
    1: 15.,
    2: 18.} 

# create and fit the LSTM network
model = Sequential()
model.add(LSTM(10, input_shape=(5,69), return_sequences=False))
model.add(Dense(3, activation = 'softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])
model.fit(trainX, trainY, 
          epochs=2,  
          batch_size=10, 
          #verbose=2, 
          class_weight = class_weight,
          callbacks=[TensorBoard(log_dir='Logs/testlog', write_graph=True)])

In [None]:
preds = model.predict(testX).argmax(axis=-1)
pd.Series(preds).value_counts()

In [None]:
precision_recall_fscore_support(np.argmax(testY, axis=1), preds)