In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:
import tensorflow as tf

from tensorflow.keras import datasets, models, optimizers
from tensorflow.keras.layers import TimeDistributed, Conv2D, Flatten, Dense, LSTM, ConvLSTM2D, MaxPool2D, Dropout, Conv1D
import matplotlib.pyplot as plt

import numpy as np 
import pandas as pd

In [3]:
tf.test.is_built_with_cuda()

True

In [4]:
tf.compat.v1.enable_eager_execution()

In [5]:
inputs_train = np.load("inputs_train.npy")
inputs_test = np.load("inputs_test.npy")

In [6]:
inputs_train = np.transpose(inputs_train, (0, 2,3,1))
inputs_test = np.transpose(inputs_test, (0, 2,3,1))

In [7]:
inputs_train = inputs_train.reshape(inputs_train.shape[:2] + (inputs_train.shape[2]*inputs_train.shape[3],))
inputs_test = inputs_test.reshape(inputs_test.shape[:2] + (inputs_test.shape[2]*inputs_test.shape[3],))

In [8]:
train = pd.read_csv('train_timeframes.csv').iloc[59:]
test = pd.read_csv('test_timeframes.csv').iloc[59:]

In [9]:
df = [train, test]

In [10]:
img = inputs_train[0,:,:]

In [11]:
inputs_train.shape, train.shape

((73695, 60, 20), (73695, 23))

In [12]:
inputs_test.shape, test.shape

((24365, 60, 20), (24365, 23))

In [13]:
idx = 0
batch_size = 32
stop_limit = 0.004
price_limit = 0.004
margin_size = 50
time_limit = 60

In [14]:
model = models.Sequential()
# model.add(LSTM(64, input_shape = img.shape, return_sequences=True, dropout=0.2))
model.add(LSTM(32, input_shape = img.shape, dropout=0.2))
# model.add(LSTM(32, dropout=0.2))
model.add(Dense(3, activation = "sigmoid"))

In [15]:
# model = models.Sequential()

# model.add(Conv2D(32, (5, 1),padding ='Same', activation='relu', input_shape = img.shape))
# model.add(Conv2D(32, (5, 1),padding = 'Same', activation ='relu'))
# model.add(MaxPool2D((2, 1)))
# model.add(Dropout(0.25))

# model.add(Conv2D(64,(3, 1),padding = 'Same', activation ='relu'))
# model.add(Conv2D(64, (3, 1),padding = 'Same', activation ='relu'))
# model.add(MaxPool2D(pool_size=(2, 1), strides=(2, 1)))
# model.add(Dropout(0.25))

# model.add(Flatten())
# model.add(Dense(256, activation = "relu"))
# model.add(Dropout(0.5))
# model.add(Dense(3, activation = "sigmoid"))

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 32)                6784      
_________________________________________________________________
dense (Dense)                (None, 3)                 99        
Total params: 6,883
Trainable params: 6,883
Non-trainable params: 0
_________________________________________________________________


In [17]:
def custom_loss_wrapper(idx, stop_limit, price_limit, margin_size, time_limit):    
    def custom_loss(y_true, y_pred):
#         print()
        margin = ((y_pred[:,:1] - 0.5) * 2)
        margin *= margin_size
        direction = tf.sign(margin)
        stop_loss = y_pred[:,1:2] * direction * -1 * stop_limit
        price_target = y_pred[:,2:3] * direction * 1 * price_limit
        
        idx = y_true[0][1].numpy()
        df_name = y_true[0][0].numpy()
        batch_size = y_pred.shape[0]
        closes = []
        spread = []
        i = tf.constant(0)
        while_condition = lambda i: tf.less(i, tf.constant(batch_size))
        def body(i):
            closes.append(df[df_name].iloc[int(idx+i.numpy()):int(idx+i.numpy()+time_limit)]['close_1min'].tolist())
            spread.append(df[df_name].iloc[int(idx+i.numpy()):int(idx+i.numpy()+time_limit)]['spread'].tolist())
            return [tf.add(i, 1)]
        r = tf.while_loop(while_condition, body, [i], parallel_iterations=batch_size, swap_memory=True)
        closes = tf.convert_to_tensor(closes)
        spread = tf.convert_to_tensor(spread)
        close1 = closes[:,:1]
        close2 = tf.zeros_like(closes[:,:1])
        lower_bound = tf.minimum(stop_loss, price_target)
        upper_bound = tf.maximum(stop_loss, price_target)
        i = tf.constant(0)
        while_condition = lambda i: tf.logical_and(tf.less(i, tf.constant(closes.shape[1])), tf.math.count_nonzero(close2).numpy() == close2.shape[0])
        def body(i):
            diff = (closes[:,i:i+1] - closes[:,:1]) * direction
            cond = tf.logical_or(tf.less(diff, lower_bound), tf.greater(diff, upper_bound))
            close2 = tf.where(tf.logical_and(tf.equal(close2, 0), cond), closes[:,i:i+1], close2)
            return [tf.add(i, 1)]
        r = tf.while_loop(while_condition, body, [i], parallel_iterations=closes.shape[1], swap_memory=True)
            
        close2 = tf.where(tf.equal(close2, 0), closes[:,-1:], close2)

        diff = close2 - close1
        profit = 100 * (margin * (diff - (spread*direction))) /  close1
        return -profit
    return custom_loss

In [18]:
def get_profit(data, y_preds, margin_lower_limit=1):
    idx = 0
    profits = []
    while idx < len(y_preds) - 1:
        margin = ((y_preds[idx][0] - 0.5) * 2)
        margin *= margin_size
        if abs(margin) < margin_lower_limit:
            idx += 1
        else:
            direction = np.sign(margin)
            stop_loss = y_preds[idx][1] * direction * -1 * stop_limit
            price_target = y_preds[idx][2] * direction * 1 * price_limit

            df_name = 1
            start_idx = idx
            close1 =  data.iloc[idx]['close_1min']
            idx += 1
            while idx < len(df[df_name]) - 1 and idx - start_idx < time_limit:
                close2 = df[df_name].iloc[idx]['close_1min']
                diff = (close2 - close1) 
                spread = df[df_name].iloc[idx]['spread']
                if min(stop_loss, price_limit) < diff * direction < max(stop_loss, price_limit):
                    break
                idx += 1

            profits.append(100 * (margin * (diff - (spread*direction))) /  close1)
    return np.sum(profits)

# data = df[]

In [27]:
from keras.callbacks import Callback

class IntervalEvaluation(Callback):
    def __init__(self, validation_data=(), interval=10):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
#             score = np.mean(custom_metric_wrapper(idx, stop_limit, price_limit, margin_size, time_limit)(tf.convert_to_tensor(self.y_val), tf.convert_to_tensor(y_pred)))
            profits = get_profit(df[1], y_pred)
            profits_0 = get_profit(df[1], y_pred, 0)
            print(np.min(y_pred, axis=0))
            print(np.mean(y_pred, axis=0))
            print(np.max(y_pred, axis=0))
            print("interval evaluation - epoch: {:d} - score: {:.6f} -- score_0: {:.6f}".format(epoch, profits, profits_0))
           
buffer = (batch_size+time_limit) + 1000
ival = IntervalEvaluation(validation_data=(inputs_test[:-buffer], np.array([[1, i] for i in range(len(test)-buffer)])), interval=5)

In [20]:
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.001),
    loss=custom_loss_wrapper(idx, stop_limit, price_limit, margin_size, time_limit),
#     metrics=[custom_metric_wrapper(idx, stop_limit, price_limit, margin_size, time_limit)],
    run_eagerly=True
)

In [None]:
history = model.fit(inputs_train[:-buffer], np.array([[0, i] for i in range(len(train)-buffer)]), 
#                     validation_data=(inputs_test[:-buffer], np.array([[1, i] for i in range(len(test)-buffer)])),
                    epochs=1000, shuffle=False,
                    batch_size=batch_size,
                    callbacks=[ival],
                   use_multiprocessing=True, verbose=1, 
#                     validation_freq=10,
                    workers=32,
                   initial_epoch=2,
                   )

Epoch 3/1000
[0.49878776 0.48543292 0.482004  ]
[0.5415889  0.5277125  0.49509847]
[0.61667484 0.564729   0.52360046]
interval evaluation - epoch: 2 - score: -991.136206 -- score_0: -991.148436
Epoch 4/1000
[0.4960488  0.49163377 0.49119744]
[0.5358718  0.52357805 0.5003031 ]
[0.6208601  0.5635424  0.52636087]
interval evaluation - epoch: 3 - score: -854.962860 -- score_0: -859.130835
Epoch 5/1000
[0.49220797 0.5020343  0.49608016]
[0.53206545 0.5245392  0.50481796]
[0.62297535 0.56691504 0.5345451 ]
interval evaluation - epoch: 4 - score: -758.252598 -- score_0: -761.920140
Epoch 6/1000

In [24]:
y_preds = model.predict(inputs_test[:-buffer])

In [26]:
np.min(y_preds, axis=0), np.mean(y_preds, axis=0), np.max(y_preds, axis=0)

(array([0.5053732, 0.4675086, 0.4687059], dtype=float32),
 array([0.5558962 , 0.53147197, 0.48632494], dtype=float32),
 array([0.6228617 , 0.5674309 , 0.51639897], dtype=float32))

In [59]:
profits_0 = get_profit(df[1], y_preds, 49.8)

-2.8314004119648235
-3.9286273011275643
-5.778957895504035


In [60]:
np.sum(profits_0)

-12.538985608596423

In [35]:
model.predict(inputs_test[:10])

array([[0.4145389 , 0.49969864, 0.485943  ],
       [0.38938323, 0.5155748 , 0.48543483],
       [0.380811  , 0.525066  , 0.48127612],
       [0.35313964, 0.5333326 , 0.47300786],
       [0.33992964, 0.53893375, 0.46911928],
       [0.33632183, 0.5414081 , 0.46604148],
       [0.35559687, 0.5366863 , 0.46333343],
       [0.33308685, 0.5325679 , 0.45460093],
       [0.29795402, 0.5304277 , 0.4447381 ],
       [0.322084  , 0.5295993 , 0.44771975]], dtype=float32)