Modules dependencies

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.contrib.keras as k
import matplotlib.pyplot as plt

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

This is a custom Module where we have the class that predict

In [4]:
import predictors

# Introduction

Problem: Should i buy or sell?

In this model we are predicting if we should buy or sell BTC. For this, we asume that the sequence of the last 30 minutes of closing price can determine the movement of the next 5 minutes. In order to learn this, we will aplly a Recurrent Neural Network (RNN) to this sequence. the input value is a sequences of 30 prices and the output value is the probability that the mean price of the next five minutes is greater than the mean price of the last 30 minutes. if this probability is greater than 50% then the order is to buy, if not we should sell. 

We are using RNN beacuse is great finding patterns in a sequences, Learning what to forget or to retain. 

# Data Step - [Sample,Time,Features]

To model this type of sequence, we have to shape the data in a way that a row is a sequence of 30 prices.

In [5]:
#data1 = pd.read_csv('data/BTC-USD_2017-04-01_2017-07-21_15sec.csv')
#data2 = pd.read_csv('data/BTC-USD_2016-06-01_2017-01-01_15sec.csv')
#data3 = pd.read_csv('data/BTC-USD_2017-01-01_2017-04-01_15sec.csv')
#data = pd.concat([data1,data2,data3],axis=0)
#del data1,data2, data3

data = pd.read_csv('BTC-USD_2015-01-14_2017-07-21_1min.csv')

After reading the data, we need a tensor of shape [sample,windows,features] 
so for this first model the expected tensor is going to be [n_rows,30,1] 

In [6]:
data.sort_values(by=['date','time'],inplace=True)

for now we are going to asume that the time interval of the data is 1 minute for all observations. so, in the next section we will build the right shape of the data

In [7]:
trading_days = np.unique(data.date)

For the normalization of the data, we are computing a ration  with fixed prices. the initial price of the sequences is going to be the base price.

In [8]:
def normalise_seq(time_series):
    normalised_data = []
    for time_serie in time_series:
        p0 = time_serie[0]
        
        new_serie = [(pi/p0)-1 for pi in time_serie]

        normalised_data.append(new_serie)
    return np.array(normalised_data)

As we have said early, X have to be 30 minutes and Y have to be the comparison bewteen the mean of 30 minutes of closing price and the mean of the next five minutes.

In [9]:
STEPS = 30
FORWARD = 5

In [None]:
x_train = []
p_train = []

sequence = []
#using day by day to extract the sequences
for d in trading_days:
    condition = data.date == d
    daily = data.loc[condition,:]
    daily = np.array(daily.drop(['date','time'],axis=1))
    #Closing price
    daily = daily[:,3:4]
    #Building a sequences
    for w in np.arange(daily.shape[0]-(STEPS+FORWARD)):        
        sequence.append(daily[w:w+STEPS+FORWARD])

In [None]:
sequence=normalise_seq(np.array(sequence)+1)

In [None]:
x_train = sequence[:,:-FORWARD,:]
y_train = sequence[:,-FORWARD,:]
# the comparison of the means
p_train = 1*(np.mean(y_train,axis=1)[:,np.newaxis] > np.mean(x_train,axis=1))
p_train = k.utils.to_categorical(p_train)

p_train = np.array(p_train)
x_train = np.array(x_train)

The general probability that the mean of the last 30 minutes is lower that the mean of the next 5 minutes is n general 54%

In [None]:
np.mean(p_train,axis=0)

We are only going to use the 60% of the sequences.

In [None]:
np.random.seed(7614)
x_train, x_test, p_train, p_test = train_test_split(x_train,p_train,test_size=0.4)

# Training Step - Keras, Tensorflow, RNN

Tensorflow is the backend for keras

## Categorical Model

In [None]:
#Input, 3D tensor
inputs = k.layers.Input([*x_train.shape[1:]])

net = inputs

# First Hidden layer, RNN with LSTM Cell.
net = k.layers.LSTM(64,activation="elu",recurrent_dropout=0.4,return_sequences=False)(net)
net = k.layers.Dropout(0.1)(net)

# Second Hidden layer, Fully conected network with Batch Normalization.
net = k.layers.Dense(32,activation=None,use_bias=None)(net)
net = k.layers.BatchNormalization()(net)
net = k.layers.Activation('elu')(net)

#Output layer, Fully conected with Softmax 
net = k.layers.Dense(2,activation=None,use_bias=None)(net)
net = k.layers.BatchNormalization()(net)
net = k.layers.Activation('softmax')(net)
output = net

In [None]:
model = k.models.Model(inputs,output)

In [None]:
model.compile(
    optimizer=k.optimizers.Adam(),
    loss=k.losses.categorical_crossentropy,
    metrics=["accuracy"]
)

In [None]:
model.fit(x_train,p_train,batch_size=512,epochs=5,shuffle=True,validation_split=0.25)

In [None]:
model.evaluate(x_test,p_test)

In [None]:
model.save("models/categorical_model_v00.h5")

# Prediction - Buy or Sell?

For the prediction we use a Class that load the model and feed the input tensor and return the response Buy or Sell

In [None]:
predictor = predictors.RnnPredictor()

In [None]:
day_test = data.loc[data.date == "2017-07-20"]

In [None]:
day_vector = np.array(day_test.drop(["date","time"],axis=1))
day_vector = day_vector[:,3:4]

This is a backtesting of how the model should behave in a day trading  

In [None]:
buy = []
sell = []

for p in day_vector:
    p = p[0]
    
    predictor.recive(p)
    buyorsell = predictor.publish()
    
    if buyorsell == "BUY":
        buy.append(p)
        sell.append(np.nan)
    elif buyorsell == "SELL":
        buy.append(np.nan)
        sell.append(p)
    else:
        buy.append(np.nan)
        sell.append(np.nan)    

Green is Buy and Red is Sell

In [None]:
%matplotlib inline
plt.plot(sell,"r")
plt.plot(buy,"g")
plt.show()