In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

In [None]:
engine = create_engine("postgresql://localhost:5432/gdax")
df = pd.read_sql_query("select * from orderbook where pair_string LIKE 'ETH-USD' order by created_at asc limit 50000", con=engine)

In [230]:
def convertBookNumbersToFloat(book):
    return [[float(order_detail) for order_detail in order] for order in book]

def getMarketPrices(n):
    return [np.average([float(n['bids'][0][0]) , float(n['asks'][0][0])])]     

def getMarketPricesAndSize(n):
    return [np.average([float(n['bids'][0][0]) , float(n['asks'][0][0])]), np.average([float(n['bids'][0][1]) , float(n['asks'][0][1])])] 
asks = df['data'].apply(lambda n: n['asks'])
asks = asks.apply(convertBookNumbersToFloat)

bids = df['data'].apply(lambda n: n['bids'])
bids = bids.apply(convertBookNumbersToFloat)

marketPrices = df['data'].apply(getMarketPrices)

Example format

df.iloc[0][2]['asks'][0]

[ price, size, num-orders ],

['894.89', '29.7307291', 22]

In [231]:
np.average([1,2])

1.5

In [232]:
def getSplitVWAP(book):
    return [getBookVWAPAndSize(bucket) for bucket in np.split(np.array(book), 5)]

def getBookVWAP(book):
    prices = np.array([order[0] for order in book])
    sizes = np.array([order[1] for order in book])
    return [(prices * sizes).sum() / sizes.sum()]

def getBookVWAPAndSize(book):
    prices = np.array([order[0] for order in book])
    sizes = np.array([order[1] for order in book])
    return [(prices * sizes).sum() / sizes.sum(), sizes.sum()]

In [233]:
asks_VWAP = pd.Series.apply(asks, getSplitVWAP)
bids_VWAP = pd.Series.apply(bids, getSplitVWAP)

In [234]:
asks_VWAP[0]

[[458.13537431175388, 32.107203750000004],
 [458.83672508161715, 77.286264999999986],
 [459.38518398425651, 89.594676010000001],
 [459.64004643709382, 34.854033000000001],
 [459.93487116512142, 112.68685438]]

In [235]:
bids_VWAP[0]

[[457.83894150120994, 83.911493940000014],
 [457.35266750212838, 65.909963380000008],
 [456.93632289319402, 193.24693372000002],
 [456.81556623679342, 12.50021269],
 [456.20939390966862, 91.77681595]]

In [236]:
marketPrices[0]

[458.09500000000003]

In [237]:
data = np.array(np.array(bids_VWAP + asks_VWAP).tolist())

In [238]:
np.zeros((1, 10,2))

array([[[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]]])

In [239]:
np.array(data).shape

(1000, 10, 2)

In [252]:
time_series = data
time_series.shape = (200,5,10,2)
# sliced_prices = np.array(marketPrices.diff()[0:-1:5].tolist())
sliced_prices = np.diff(np.array([n[0] for n in marketPrices]))[0:-1:5]

In [253]:
print(np.array(time_series[0]).shape)
print(np.array(time_series[0][0]).shape)
print(sliced_prices.shape)
print(np.zeros((2, 3, 4)))

(5, 10, 2)
(10, 2)
(200,)
[[[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]

 [[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]]


In [320]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Conv1D, Flatten, TimeDistributed

In [339]:
model = Sequential()
model.add(TimeDistributed(Conv1D(2, 2, activation='relu'), input_shape=(5, 10, 2)))
model.add(Flatten())
model.add(Dense(50))
model.add(Dense(1))
model.compile(loss='sparse_categorical_crossentropy',
                  optimizer='Adam',
                  metrics=['accuracy'])

In [340]:
model.fit(time_series, sliced_prices, batch_size=100, epochs=5, validation_split=0.25)

Train on 150 samples, validate on 50 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x12ce3eda0>

In [341]:
time_series[0]

array([[[ 457.8389415 ,   83.91149394],
        [ 457.3526675 ,   65.90996338],
        [ 456.93632289,  193.24693372],
        [ 456.81556624,   12.50021269],
        [ 456.20939391,   91.77681595],
        [ 458.13537431,   32.10720375],
        [ 458.83672508,   77.286265  ],
        [ 459.38518398,   89.59467601],
        [ 459.64004644,   34.854033  ],
        [ 459.93487117,  112.68685438]],

       [[ 457.80927066,   93.43449394],
        [ 457.36748346,   79.32096338],
        [ 456.92793639,  194.58905458],
        [ 456.62234051,    6.31469183],
        [ 456.14901099,  138.28840596],
        [ 458.09042019,   33.92031475],
        [ 458.82494187,   76.255401  ],
        [ 459.38246061,   89.59467601],
        [ 459.77209634,   87.18494639],
        [ 460.02141754,  120.31227599]],

       [[ 457.80927066,   93.43449394],
        [ 457.3528221 ,   65.98096338],
        [ 456.92866714,  194.55893372],
        [ 456.62384372,    6.30341269],
        [ 456.20299825,   92.6399059

In [342]:
sliced_prices

array([-0.03 ,  0.   ,  0.07 ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.18 ,  0.   ,
        0.075,  0.   ,  0.075,  0.31 ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ,  0.   ,  0.   , -0.535, -0.27 ,  0.   ,  0.045, -0.21 ,
        0.   ,  0.   ,  0.   ,  0.155, -0.02 , -0.015, -0.005, -0.01 ,
       -0.01 , -0.08 ,  0.   ,  0.   ,  0.   ,  0.005,  0.   ,  0.   ,
        0.   ,  0.   , -0.01 , -0.19 ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ,  0.285,  0.   ,  0.   ,  0.   ,  0.03 ,  0.   ,  0.   ,
      

In [343]:
model.predict(np.array([time_series[5]]))

array([[ 167.43008423]], dtype=float32)

In [344]:
for i in range(100):
    print(sliced_prices[i], ' ', model.predict(np.array([time_series[i]])))

-0.03   [[ 164.51344299]]
0.0   [[ 177.60664368]]
0.07   [[ 192.1328125]]
0.0   [[ 158.41691589]]
0.0   [[ 122.04936218]]
0.0   [[ 167.43008423]]
0.0   [[ 133.40760803]]
0.0   [[ 105.81419373]]
0.0   [[ 123.65576935]]
0.0   [[ 133.97065735]]
0.0   [[ 101.76141357]]
0.0   [[ 122.21348572]]
0.0   [[ 122.39907837]]
0.0   [[ 225.26782227]]
0.0   [[ 226.13009644]]
0.0   [[ 230.81289673]]
0.0   [[ 237.57720947]]
0.0   [[ 220.40016174]]
0.0   [[ 159.64590454]]
0.0   [[ 234.07637024]]
0.0   [[ 166.99372864]]
0.0   [[ 243.46543884]]
0.0   [[ 255.29115295]]
0.0   [[ 109.79023743]]
0.0   [[ 92.7361908]]
0.0   [[ 256.7333374]]
0.0   [[ 112.68661499]]
0.0   [[ 162.94421387]]
0.0   [[ 166.73878479]]
0.0   [[ 158.30654907]]
0.0   [[ 174.09843445]]
0.0   [[ 156.72825623]]
0.0   [[ 163.75561523]]
0.0   [[ 148.51081848]]
0.0   [[ 183.58041382]]
0.0   [[ 121.11057281]]
0.0   [[ 185.29943848]]
0.0   [[ 180.99441528]]
0.18   [[ 124.74798584]]
0.0   [[ 116.48123169]]
0.075   [[ 195.87663269]]
0.0   [[ 83.96