In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" # select which GPU(s) to use

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras import Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model

In [3]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [5]:
predict_model = load_model('./models/2021-01-19_02-10-300_30_0.001/ckpt-loss=0.38-epoch=0100/')
datasample_period = 300
feature_columns = 40
prediction_period = 30
band_size = 0.001
test_path = './datasets/train/binance_dataset_2021-02-10_2420569459.npy'

In [8]:
def get_model_data(data, sample_size, feature_num, band_size):
   
    # generate X, Y
    shape = data.shape
    X = np.zeros((shape[0]-sample_size, sample_size, feature_num), dtype=np.float16)
    Y = np.zeros(shape=(shape[0]-sample_size, 1), dtype=np.int)
    for i in range(shape[0]-sample_size):
        # take the first feature_num columns as features
        X[i] = data[i:i+sample_size, 1:feature_num+1]
        delta_last = (data[i+sample_size-1, 0] - data[i, 0]) / data[i+sample_size-1, 0]
        if delta_last < -band_size:
            Y[i] = 0
        elif delta_last > band_size:
            Y[i] = 2
        else:
            Y[i] = 1
    # add the 4th dimension: 1 channel
    X = X.reshape(X.shape[0], sample_size, feature_num, 1)

    # calculate sample_weights for Y
    sample_weights_y = np.append(Y.flatten(), [0,1,2]) # to ensure exhaustive coverage

    # transform y to categorical arrays
    y_labels = to_categorical(sample_weights_y)[:-3]
    
    return X, y_labels

In [9]:
test_data =  np.load(test_path)
test_X, test_Y = get_model_data(test_data, datasample_period, feature_columns, band_size)

In [11]:
predict_model.evaluate(test_X, test_Y)



[0.43208572268486023, 0.8089261054992676]

In [12]:
df = pd.DataFrame(predict_model.predict(test_X))

In [22]:
df['Y'] = np.argmax(test_Y, axis=1) # reverse to_categorical

In [33]:
df.describe()

Unnamed: 0,0,1,2,Y
count,68675.0,68675.0,68675.0,68675.0
mean,0.3552453,0.381824,0.2629305,0.999985
std,0.3951238,0.335083,0.3656421,0.773643
min,1.757327e-09,4.9e-05,4.34082e-10,0.0
25%,0.003303,0.045818,0.0007006086,0.0
50%,0.1319374,0.314769,0.03116319,1.0
75%,0.7873277,0.686489,0.5099274,2.0
max,0.9999182,0.98637,0.9999512,2.0


In [27]:
# correctly predicted down
df[(df['Y']==0)&(df[0]>=0.5)].count()

0    18560
1    18560
2    18560
Y    18560
dtype: int64

In [28]:
# incorrectly predicted down
df[(df['Y']==0)&(df[0]<0.5)].count()

0    1992
1    1992
2    1992
Y    1992
dtype: int64

In [29]:
# correctly predicted level
df[(df['Y']==1)&(df[1]>=0.5)].count()

0    20492
1    20492
2    20492
Y    20492
dtype: int64

In [30]:
# incorrectly predicted level
df[(df['Y']==1)&(df[1]<0.5)].count()

0    7080
1    7080
2    7080
Y    7080
dtype: int64

In [31]:
# correctly predicted up
df[(df['Y']==2)&(df[2]>=0.5)].count()

0    15952
1    15952
2    15952
Y    15952
dtype: int64

In [32]:
# incorrectly predicted up
df[(df['Y']==2)&(df[2]<0.5)].count()

0    4599
1    4599
2    4599
Y    4599
dtype: int64