In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras import Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model

In [2]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [3]:
saved_model = load_model('my_model_ethbtc_v1.1_2021-01-18_2206742115_5m.h5')

In [4]:
data_path = 'BinanceLOB/binance_dataset_2021-01-19_2206742115.csv'

In [5]:
new_model_name = 'my_model_ethbtc_v1.1_2021-01-19_2206742115.h5'

In [6]:
lob_list = []

for side in ['bid','ask']:
    for i in range(100):
        lob_list.append(side + 'price' + str(i+1))
        lob_list.append(side + 'size' + str(i+1))
header_list = ['timestamp','last'];
header_list.extend(lob_list)

In [7]:
# Import data
binance_ethbtc = pd.read_csv(data_path, names=header_list, index_col='timestamp')

In [8]:
new_lob_order = []
for i in range(100):
    for side in ['bid','ask']:
        new_lob_order.append(side + 'price' + str(i+1))
        new_lob_order.append(side + 'size' + str(i+1))
new_lob_order = ['last'] + new_lob_order

In [9]:
binance_ethbtc = binance_ethbtc[new_lob_order]

In [10]:
def get_model_data(data, sample_size=600, feature_num=200):
    data = data.values
    shape = data.shape
    X = np.zeros((shape[0]-sample_size, sample_size, feature_num))
    Y = np.zeros(shape=(shape[0]-sample_size, 1))
    for i in range(shape[0]-sample_size):
        X[i] = data[i:i+sample_size,0:feature_num]# take the first feature_num columns as features
        Y[i] = data[i+sample_size-1,-1:]# take the last one column as labels
    X = X.reshape(X.shape[0], sample_size, feature_num, 1)# add the 4th dimension: 1 channel
    
    return X,Y

In [11]:
def define_y_labels(y, prediction_period, band_size = 0.001):
    bins = [-np.inf, -band_size, band_size, np.inf]
    names = [0, 1, 2]
    y_labels = pd.cut(y.pct_change(periods=prediction_period), bins, labels=names)
    return y_labels

In [12]:
datasample_period = 600
feature_columns = 40
prediction_period = 60

In [13]:
binance_ethbtc['delta_cat'] = define_y_labels(binance_ethbtc['last'], prediction_period)
weight_array = binance_ethbtc.groupby('delta_cat').count()['last']
weight_sum = weight_array.sum()

In [14]:
class_weight = {0: 1- weight_array[0]/weight_sum, 1: 1- weight_array[1]/weight_sum, 2: 1-weight_array[2]/weight_sum}

In [15]:
#split training and validation dataset
train_X, train_Y = get_model_data(binance_ethbtc.drop(['last'], axis=1), datasample_period, feature_columns)

MemoryError: Unable to allocate 15.3 GiB for an array with shape (85772, 600, 40) and data type float64

In [None]:
train_y = to_categorical(train_Y[:])

In [None]:
saved_model.fit(train_X, train_y, epochs=100, batch_size=32, class_weight=class_weight)

In [None]:
saved_model.save(new_model_name)