In [190]:
from pymongo import MongoClient
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Input, Dense, GRU, Embedding
from tensorflow.python.keras.optimizers import RMSprop
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from scipy.ndimage.filters import maximum_filter1d, minimum_filter1d
from sklearn.model_selection import train_test_split
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np

In [106]:
def max_filter1d_valid(a, W):
    hW = (W-1)//2 # Half window size
    return maximum_filter1d(a,size=W, mode="constant")[hW:-hW]

def min_filter1d_valid(a, W):
    hW = (W-1)//2 # Half window size
    return minimum_filter1d(a,size=W)[hW:-hW]

In [59]:
url = '127.0.0.1'
client = MongoClient(url, ssl=False)
connection = client['binance_coins']
coin = "ETH"
symbol = "{}BTC".format(coin)
coins_collection = connection[symbol]
res = list(connection[symbol].find())

In [198]:
highs = []
lows = []
keys_list = res[0].keys()
keys_list.remove('_id')
x_data = []
for r in res:
    x = []
    for k in keys_list:
        x.append(r[k])
    x_data.append(x)

x_data = np.array(x_data)

prediction_period_minutes = 24 * 60

for i in range(len(keys_list)):
    print i, keys_list[i]

# x_max_data = max_filter1d_valid(x_data[:, 0], prediction_period_minutes)
# x_min_data = min_filter1d_valid(x_data[:, 6], prediction_period_minutes)


y_data = []
for i in range(len(x_data)-40):
    temp = []
    for x in range(40):
        temp.append(x_data[i+x][1])
        
    y_data.append(temp)

y_data = np.array(y_data)
x_data = x_data[:len(y_data)]

X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.33, random_state=42)

print X_train.shape
print X_test.shape
print y_train.shape
print y_test.shape

num_train = X_train.shape[0]
num_test = X_test.shape[0]
num_y_signals = y_train.shape[1]
num_x_signals = X_train.shape[1]

validation_data = (np.expand_dims(X_test, axis=0),
                   np.expand_dims(y_test, axis=0))

0 High
1 Open
2 Taker buy base asset volume
3 Volume
4 Open time
5 Number of trades
6 Low
7 Quote asset volume
8 Close
9 Ignore
10 Taker buy quote asset volume
11 Close time
(57921, 12)
(28529, 12)
(57921, 40)
(28529, 40)


In [181]:
batch_size = 256
sequence_length = 1000

def batch_generator(batch_size, sequence_length, X, Y):
    """
    Generator function for creating random batches of training-data.
    """
    num_samples = X.shape[0]
    # Infinite loop.
    while True:
        # Allocate a new array for the batch of input-signals.
        x_shape = (batch_size, sequence_length, X.shape[1])
        x_batch = np.zeros(shape=x_shape, dtype=np.float32)

        # Allocate a new array for the batch of output-signals.
        y_shape = (batch_size, sequence_length, Y.shape[1])
        y_batch = np.zeros(shape=y_shape, dtype=np.float32)

        # Fill the batch with random sequences of data.
        for i in range(batch_size):
            # Get a random start-index.
            # This points somewhere into the training-data.
            idx = np.random.randint(num_samples - sequence_length)
            
            # Copy the sequences of data starting at this index.
            x_batch[i] = X[idx:idx+sequence_length]
            y_batch[i] = Y[idx:idx+sequence_length]
        
        yield (x_batch, y_batch)

In [182]:

generator = batch_generator(batch_size=batch_size,
                            sequence_length=sequence_length, X=X_train, Y=y_train)

In [183]:
x_batch, y_batch = next(generator)

In [185]:
model = Sequential()

model.add(GRU(units=512, return_sequences=True, input_shape=(None, num_x_signals,)))
model.add(Dense(num_y_signals, activation='sigmoid'))



In [191]:
warmup_steps = 50

def loss_mse_warmup(y_true, y_pred):
    """
    Calculate the Mean Squared Error between y_true and y_pred,
    but ignore the beginning "warmup" part of the sequences.
    
    y_true is the desired output.
    y_pred is the model's output.
    """

    # The shape of both input tensors are:
    # [batch_size, sequence_length, num_y_signals].

    # Ignore the "warmup" parts of the sequences
    # by taking slices of the tensors.
    y_true_slice = y_true[:, warmup_steps:, :]
    y_pred_slice = y_pred[:, warmup_steps:, :]

    # These sliced tensors both have this shape:
    # [batch_size, sequence_length - warmup_steps, num_y_signals]

    # Calculate the MSE loss for each value in these tensors.
    # This outputs a 3-rank tensor of the same shape.
    loss = tf.losses.mean_squared_error(labels=y_true_slice,
                                        predictions=y_pred_slice)

    # Keras may reduce this across the first axis (the batch)
    # but the semantics are unclear, so to be sure we use
    # the loss across the entire tensor, we reduce it to a
    # single scalar with the mean function.
    loss_mean = tf.reduce_mean(loss)

    return loss_mean

In [192]:
optimizer = RMSprop(lr=1e-3)

In [193]:
model.compile(loss=loss_mse_warmup, optimizer=optimizer)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_2 (GRU)                  (None, None, 512)         806400    
_________________________________________________________________
dense_2 (Dense)              (None, None, 40)          20520     
Total params: 826,920
Trainable params: 826,920
Non-trainable params: 0
_________________________________________________________________


In [196]:
path_checkpoint = '23_checkpoint.keras'

callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint,
                                      monitor='val_loss',
                                      verbose=1,
                                      save_weights_only=True,
                                      save_best_only=True)


callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
callback_tensorboard = TensorBoard(log_dir='./23_logs/', histogram_freq=0, write_graph=False)


callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                       factor=0.1,
                                       min_lr=1e-4,
                                       patience=0,
                                       verbose=1)

callbacks = [callback_early_stopping,
             callback_checkpoint,
             callback_tensorboard,
             callback_reduce_lr]

In [None]:
%%time
model.fit_generator(generator=generator,
                    epochs=20,
                    steps_per_epoch=100,
                    validation_data=validation_data,
                    callbacks=callbacks)

Epoch 1/20
