In [1]:
# Import packages
import numpy as np
import pandas as pd
import sys
from datetime import datetime, timedelta
import time
sys.path.append('../..')
import pickle

import h5py
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, MaxPooling2D, Conv2D
from keras.callbacks import ModelCheckpoint, Callback
import matplotlib.pyplot as plt
import missingno as msno

from Classifier.data_processing import processor

Using TensorFlow backend.


## Define Model and Data Collection Function

In [2]:
# Define prediction model
class LSTM_net:
    """
    RNN using LSTM
    """
    def __init__(self, input_size, learning_rate):
        self.input_size = input_size
        self.learning_rate = learning_rate
        self.build_model()

    def build_model(self):
        self.model = Sequential()
        self.model.add(LSTM(256, return_sequences=True,
                       input_shape=self.input_size))  
        #self.model.add(Dropout(0.2))
        self.model.add(LSTM(256))  
        #self.model.add(Dropout(0.2))
        self.model.add(Dense(1, activation='linear'))

        # Define optimiser and compile
        optimizer = optimizers.Adam(self.learning_rate)
        self.model.compile(optimizer=optimizer, loss='mse', metrics=['accuracy'])

In [5]:
def data_feed(interval, sequence_length, x_mean, x_std):
    """
    Function to download most recent data, scrub and normalise it, and convert to sequential values for LSTM
    :interval: Time period in minutes between datapoints. Needs to be same as for original training data
    :sequence_length: Number of intervals of "memory" for LSTM network. Data will be fed in time windows of length=sequence_length
    :x_mean: training/validation data mean values for normalisation
    :x_std: training/validation data standard deviation values for normalisation
    Returns tuple of np.array data for last time window. Shape of (1, sequence_length, input_size), and current price for logging purposes
    """
    
    # Download data for the most recent period
    end_time = datetime.now().replace(microsecond=0,second=0,minute=0)
    start_time = end_time - timedelta(minutes=interval * (sequence_length + 1)) # Adding some historical data in case interpolation needed
    data = processor.historical_download(start_time, end_time, interval)
    
    # Convert to float and interpolate any missing values
    data = data.astype('float64')
    data = data.interpolate()
    target = "Btcusd_kraken_close" # Only used for training. Must be the same as for training
    current_price = data[target] # For logging purposes
    
    # Convert to growth rates and np.arrays
    data = data.pct_change()
    x = np.array(data[1:]) # First value removed. Will always be NaN because growth rates
        
    # Normalise data
    x = (x - x_mean) / x_std
    
    # Reshape data from (num_samples, features) to (num_samples, sequence_length, features)
    seq_x = []
    for ii in range(len(x) - sequence_length + 1):
        seq_x.append(x[ii : ii + sequence_length])
    
    seq_x = np.array(seq_x)
    
    input_data = np.reshape(seq_x[-1], (-1, sequence_length, seq_x.shape[2]))
    return input_data, current_price[-1]

## Initialise Model

In [3]:
# Define global variables
interval = 1440 # 1440 minutes = 1 day
sequence_length = 4
input_size = 35
learning_rate = 0.00001 # Only needed to define optimiser. Not used in prediction
input_size = (sequence_length, input_size)

# Initialise model
LSTM_network = LSTM_net(input_size, learning_rate)

# Load the model weights with the best validation loss.
LSTM_network.model.load_weights('saved_models/LSTM_weights.hdf5')

## Set up trading script

In [11]:
live_trading = False

# Load normalisation data
with open('pickles/normalisation.pickle', 'rb') as f:
    x_mean, x_std, y_mean, y_std = pickle.load(f)

# Load latest trade positions from log
try: 
    log = pd.read_csv('trade_log.csv')
    cash = log[-1]['Cash_Position']
    position = log[-1]['BTC_Position']
except:
    cash = 1000
    position = 0

SyntaxError: invalid syntax (<ipython-input-11-44e50a4ed082>, line 12)

In [10]:
# Run prediction script
Date = datetime.now()
input_data, current_price = data_feed(interval, sequence_length, x_mean, x_std)
raw_prediction = LSTM_network.model.predict(input_data)
expected_growth = raw_prediction.item() * y_std + y_mean
predicted_price = current_price * (1 + expected_growth)

# Simple trading algorithm.
if expected_growth > 0:
    action = "buy"
    position = cash / current_price * 0.997 # 0.997 to account for fees
    volume = position
    cash = 0
if expected_growth < 0:
    action = "sell"
    cash = position * current_price * 0.997
    volume = position
    position = 0

if live_trading:
    import config
    # call Kraken API to make trades here
    api = krakenex.API(config.key, config.secret)
    k = KrakenAPI(api)
    k.add_standard_order('XXBTZUSD', action, 'market', volume)

# Reporting and logging
print("{}: {}. Price expected to change from {} to {}. Portfolio value of {}".format(Date, action, current_price, predicted_price))    
log = pd.DataFrame([Date, action, current_price, predicted_price, cash, btc, (cash + btc * current_price)], columns=["Date", "Action", "Current Price", "Predicted_Price", "Cash_Position", "BTC_Position", "Portfolio_Value"])
log.to_csv('trade_log.csv', encoding='utf-8', index=True)


time period from 2018-03-14 06:02:45.139251 to 1521018656.905364
time period from 2018-03-14 16:10:56.905364 to 1521045551.6563807
time period from 2018-03-14 23:39:11.656381 to 1521074813.1703522
time period from 2018-03-15 07:46:53.170352 to 1521104017.994906
time period from 2018-03-15 15:53:37.994906 to 1521133196.1067057
time period from 2018-03-15 23:59:56.106706 to 1521162276.3090248
time period from 2018-03-16 08:04:36.309025 to 1521193983.5122778
time period from 2018-03-16 16:53:03.512278 to 1521227434.3747156
time period from 2018-03-17 02:10:34.374716 to 1521257665.5815232
time period from 2018-03-17 10:34:25.581523 to 1521289743.3477752
time period from 2018-03-17 19:29:03.347775 to 1521317155.4914277
time period from 2018-03-18 03:05:55.491428 to 1521345306.8478117
time period from 2018-03-18 10:55:06.847812 to 1521373829.0314195
time period from 2018-03-18 18:50:29.031420 to 1521399029.0
time period from 2018-03-19 01:50:29 to 1521424229.0


  return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)


time period from 2018-03-14 03:43:21.754849 to 1521000398.8961601
call rate limiter exceeded (counter=21, limit=20) 
 sleeping for 5 seconds
time period from 2018-03-14 11:06:38.896160 to 1521026753.2895474
call rate limiter exceeded (counter=20, limit=20) 
 sleeping for 5 seconds
time period from 2018-03-14 18:25:53.289547 to 1521053897.6503463
time period from 2018-03-15 01:58:17.650346 to 1521079963.5032141
call rate limiter exceeded (counter=20, limit=20) 
 sleeping for 5 seconds
time period from 2018-03-15 09:12:43.503214 to 1521107065.0466795
time period from 2018-03-15 16:44:25.046679 to 1521133806.8125281
call rate limiter exceeded (counter=21, limit=20) 
 sleeping for 5 seconds
time period from 2018-03-16 00:10:06.812528 to 1521161050.4679766
call rate limiter exceeded (counter=20, limit=20) 
 sleeping for 5 seconds
time period from 2018-03-16 07:44:10.467977 to 1521188998.7359085
time period from 2018-03-16 15:29:58.735909 to 1521215493.791128
call rate limiter exceeded (coun

ConnectionError: ('Connection aborted.', OSError("(54, 'ECONNRESET')",))