Helper functions to download trading data

In [6]:
# Import packages
import pandas
import time
import requests
from datetime import datetime, timedelta

# GDAX download functions

def date_to_iso8601(date):
    return '{year}-{month:02d}-{day:02d}T{hour:02d}:{minute:02d}:{second:02d}'.format(
      year=date.year,
      month=date.month,
      day=date.day,
      hour=date.hour,
      minute=date.minute,
      second=date.second)


def gdax_request_trade_slice(product_id, start, end, granularity):
    """
    Single HTTP request function with error catching and management for server error responses
    Response is in the format: [[time, low, high, open, close, volume], ...]
    """
    # Allow 3 retries (we might get rate limited).
    retries = 3
    
    # Change dates to iso8601 format as specified
    iso_start = date_to_iso8601(start)
    iso_end = date_to_iso8601(end)

    # Set uri
    uri = 'https://api.gdax.com/products/{currency_pair}/candles'.format(currency_pair=product_id)
    
    for retry_count in range(0, retries):
        response = requests.get(uri, {
          'start': iso_start,
          'end': iso_end,
          'granularity': granularity * 60 # Converting to seconds for API
        })
        if response.status_code != 200:
            if retry_count + 1 == retries:
                raise Exception('Failed to get exchange data for ({}, {})! Error message: {}'.format(start, end, response.text))
            else:
                # Exponential back-off.
                time.sleep(1.5 ** retry_count)
        else:
            # Sort the historic rates (in ascending order) based on the timestamp.
            result = sorted(response.json(), key=lambda x: x[0])
            return result

        
def gdax_request_order_book(product_id, level):
    """
    Returns the current order book for a currency pair. 
    :Level: Granularity level 1, 2, or 3 as defined by API. Default = 1, preferred for my usage is 2.
    Level 2 response example: 
    {
        "sequence": "3",
        "bids": [
            [ price, size, num-orders ],
            [ "295.96", "4.39088265", 2 ],
            ...
        ],
        "asks": [
            [ price, size, num-orders ],
            [ "295.97", "25.23542881", 12 ],
            ...
        ]
    }
    """
    # Allow 3 retries (we might get rate limited).
    retries = 3

    # Set uri
    uri = 'https://api.gdax.com/products/{currency_pair}/book'.format(currency_pair=product_id)
    
    for retry_count in range(0, retries):
        response = requests.get(uri, {
          'level': level
        })
        if response.status_code != 200 or not len(response.json()):
            if retry_count + 1 == retries:
                raise Exception('Failed to get order book data! Error message: {}'.format(response.text))
            else:
                # Exponential back-off.
                time.sleep(1.5 ** retry_count)
        else:
            return result

        
def gdax_trade_downloader(currency_pair, start, end, interval):
    """
    Breaks up gdax trade data requests into chunks of 200 candlesticks to download in 1 second intervals, to comply with GDAX API rules
    :currency_pair: string with requested crypto-fiat pair
    :start: start of time period as datetime object
    :end: end of time period as datetime object
    :interval: candlestick intervals in ninutes
    Returns an array with rows of candlestick data in the following format: [timestamp, low, high, open, close, volume]    
    """      
    data = [] # Empty list to append data 
    delta = timedelta(minutes=interval * 200) # 200 intervals per request
    slice_start = start
    while slice_start != end:
        slice_end = min(slice_start + delta, end)
        print("downloading {} data from {} to {}".format(currency_pair, slice_start, slice_end))
        data += gdax_request_trade_slice(
                product_id=currency_pair,
                start=slice_start,
                end=slice_end,
                granularity=interval
        )
        slice_start = slice_end
        time.sleep(0.5)

    data_frame = pandas.DataFrame(data=data, columns=['time', 'low', 'high', 'open', 'close', 'volume'])
    data_frame.set_index('time', inplace=True)
    return data_frame


def gdax_order_book_downloader(currency_pair, interval, start_time, end_time):
    """
    Script to download order book at regular intervals. Will work out how to use this data with deep learning network later :)
    :currency_pair: string with requested crypto-fiat pair
    :interval: request intervals in minutes
    :start_time: datetime object - the time at which this operation should start requesting data
    :end_time: datetime object - the time at which this operation should finish
    Returns something...
    """
    data = [] # Empty list to append data
    
    # Calculate time to next interval
    time_to_start =  min(start_time - datetime.datetime.now(),0) # Start when specified, or now if specified time is in the past
    time.sleep(time_to_start)
    
    while datetime.datetime.now() < end_time:
        data += gdax_request_order_book(
                product_id=currency_pair,
                level=2
        )
        time.sleep(interval*60) # sleep takes time in seconds
    
    return data
    
    # Need to work out what I want to do with this. Order book has 2 pieces of important information. Presence of any walls, and overall size    
    
    

In [153]:
# Kraken API
import numpy as np
import krakenex
import pytz
from pykrakenapi import KrakenAPI

def datetime_to_interval(dt, interval): # Round a datetime DOWN to the nearest interval
    interval = datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute // interval * interval)
    return interval

def kraken_request_trade_slice(currency_pair, start, interval):    
    """
    Initializes Kraken API from pykrakenapi library and calls get_trade function. Batches trade data into OHLC (candles) 
    :currency_pair: string with requested crypto-fiat pair
    :start: start of time period as datetime object, defaults to earliest if not passed through
    :interval: candlestick intervals in ninutes
    Returns an array with rows of candlestick data in the following format: ['time', 'low', 'high', 'open', 'close', 'volume']    
    """     
    api = krakenex.API()
    k = KrakenAPI(api)
    timestamp = start.replace(tzinfo=pytz.utc).timestamp()
    trades, last = k.get_recent_trades(currency_pair, timestamp, ascending=True)
    
    return trades, last/1000000000

def kraken_to_ohlc(trades):
    """
    Groups and processes individual trade data to return OHLC (candle) data
    :trades: trades pandas dataframe as returned by kraken_trade_downloader
    Returns an array with rows of candlestick data in the following format: ['time', 'low', 'high', 'open', 'close', 'volume']    
    """
    # Converts unix timestamp data into candle interval periods. Period time corresponds to beginning of period
    trades['datetime'] = pandas.to_datetime(trades['time'], unit='s') # Reformat unix timestamp as datetime
    trades['period'] = trades['datetime'].map(lambda x: datetime_to_interval(x, interval))
    
    # Group trades into periods to aggregate volume and get low/high price
    trade_agg = trades.groupby('period')
    trade_agg = trade_agg.agg({
        'price': {'low': np.min, 'high': np.max},
        'volume': np.sum
    })
    
    # Create a fresh pandas dataframe and copies data from aggregated trades dataframe above
    ohlc = pandas.DataFrame(index=trade_agg.index.values, columns=['low', 'high', 'open', 'close', 'volume'])
    ohlc['low'] = trade_agg['price']['low']
    ohlc['high'] = trade_agg['price']['high']
    ohlc['volume'] = trade_agg['volume']['sum']
    
    # Iterate through each candle period and searches original trades dataframe for the first and last price, then set that
    for i, row in ohlc.iterrows():
        selection = trades.loc[kraken_data['period'] == i] # Return all rows for the current period
        first_trade_index = selection['time'].idxmax()
        first_trade_price = selection.loc[first_trade_index]['price']
        ohlc.at[i, 'open'] = first_trade_price
        last_trade_index = selection['time'].idxmin()
        last_trade_price = selection.loc[last_trade_index]['price']
        ohlc.at[i, 'close'] = last_trade_price

    return ohlc



In [156]:
interval = 5
currency_pair = 'XETHZUSD'
start = datetime(2015,5,1)
end = datetime(2017,5,2)

def kraken_trade_downloader(currency_pair, start, end, interval):
    slice_start = start
    end_timestamp = end.replace(tzinfo=pytz.utc).timestamp()
    data, last = kraken_request_trade_slice(currency_pair, start, interval)
    while last < end_timestamp:        
        slice_start = datetime.fromtimestamp(last)
        new_data, last = kraken_request_trade_slice(currency_pair, slice_start, interval)
        data.append(new_data)
        time.sleep(0.5)
        print("time period ending {}".format(last))
    return data

kraken_trade_downloader(currency_pair, start, end, interval)

time period ending 1440760913.8417077
time period ending 1440760913.8417077
time period ending 1440760913.8417077
time period ending 1440760913.8417077
time period ending 1440760913.8417077
time period ending 1440760913.8417077
time period ending 1440760913.8417077
time period ending 1440760913.8417077
time period ending 1440760913.8417077
time period ending 1440760913.8417077
time period ending 1440760913.8417077
time period ending 1440760913.8417077


KeyboardInterrupt: 

In [35]:
start = datetime(2015,5,1)
end = datetime(2015,5,2)

# Get GDAX data
gdax_pairs = ['BTC-USD']

for pair in gdax_pairs:    
    data = gdax_trade_downloader(pair, start, end, 5)

    print(data)
# Get Kraken data
#kraken_pairs = ['XETHZUSD',
               #'XETHZEUR',
               #'XXBTZUSD',
               #'XXBTZEUR',
               #'XLTCZUSD',
               #'XLTCZEUR']

#for pair in kraken_pairs:
#    print(pair)
#    data, last = kraken_trade_downloader(pair, start, 5)
#    print(data.head())

    

downloading BTC-USD data from 2015-05-01 00:00:00 to 2015-05-01 16:40:00
downloading BTC-USD data from 2015-05-01 16:40:00 to 2015-05-02 00:00:00
               low    high    open   close     volume
time                                                 
1430438400  236.42  236.78  236.78  236.50   1.626472
1430438700  236.01  236.49  236.49  236.01   2.784245
1430439000  236.00  236.10  236.01  236.00  73.308585
1430439300  236.00  236.04  236.04  236.04   6.197529
1430439600  235.79  236.00  236.00  235.80  15.201524
1430439900  235.73  235.82  235.80  235.74  13.571108
1430440200  235.74  235.78  235.74  235.74   4.429600
1430440500  235.74  235.80  235.75  235.77   1.208400
1430440800  235.68  235.83  235.76  235.71   5.973491
1430441100  235.75  235.92  235.75  235.90   0.107201
1430441400  235.91  236.21  235.92  236.21  22.815325
1430441700  236.27  236.40  236.28  236.40   1.700810
1430442000  236.42  238.09  236.42  236.99   5.950986
1430442300  236.96  237.19  236.96  237.18  

Helper functions to download sentiment data

Data pre-processing

In [None]:
# Calculate growth rates. Growth rate prediction is the target

Define RNN

In [None]:
#Basic RNN first
#Then with multiple types of data, try a convnet + RNN

Train RNN

Define test function

In [4]:
data = gdax_request_trade_slice(
                product_id='ETH-USD',
                start=start,
                end=end,
                granularity=5)

print(data.head())

Exception: Failed to get exchange data for (2014-01-01 00:00:00, 2014-01-01 08:20:00)! Error message: []

In [11]:
start = datetime(2014,1,1,0,0)
end = datetime(2014,1,1,8,20)
iso_start = date_to_iso8601(start)
iso_end = date_to_iso8601(end)


data = requests.get('https://api.gdax.com/products/ETH-USD/candles', {'start': iso_start,
                                                                     'end': iso_end,
                                                                     'granularity': 5 * 60})
print(data.json())

[]


In [3]:
start = datetime(2014,1,1)
end = datetime(2017,12,31)
pair = 'ETH-USD'
start, end = test(pair, start, end, 5)
print(start, end)

2014-01-01 00:00:00 2014-01-01 08:20:00


In [10]:
start = datetime(2017,1,1,0,0)
end = datetime(2017,1,1,8,20)

data = gdax_request_trade_slice('ETH-USD', start, end, 5)
print(data)

[[1483228800, 8.11, 8.16, 8.16, 8.11, 107.89282299999999], [1483229100, 8.11, 8.14, 8.11, 8.14, 237.27294536], [1483229400, 8.14, 8.16, 8.14, 8.16, 33.25517664], [1483229700, 8.16, 8.21, 8.16, 8.21, 238.18848347], [1483230000, 8.22, 8.22, 8.22, 8.22, 1.8684608299999999], [1483230300, 8.22, 8.24, 8.22, 8.24, 12.5625353], [1483230600, 8.24, 8.24, 8.24, 8.24, 12.20208], [1483230900, 8.24, 8.24, 8.24, 8.24, 187.10167799999996], [1483231200, 8.24, 8.27, 8.24, 8.27, 38.33348796], [1483231500, 8.25, 8.27, 8.27, 8.26, 25.40013], [1483231800, 8.25, 8.27, 8.25, 8.26, 60.11463923000001], [1483232100, 8.25, 8.26, 8.26, 8.26, 23.564242], [1483232400, 8.26, 8.29, 8.26, 8.26, 221.64923731], [1483232700, 8.27, 8.29, 8.29, 8.29, 26.111549999999998], [1483233000, 8.27, 8.29, 8.29, 8.29, 23.138039999999997], [1483233300, 8.27, 8.29, 8.27, 8.28, 75.39184], [1483233600, 8.29, 8.29, 8.29, 8.29, 55.163439], [1483233900, 8.28, 8.29, 8.28, 8.29, 150.42397000000003], [1483234200, 8.28, 8.29, 8.29, 8.28, 6.85839