In [422]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sklearn.preprocessing import normalize

In [345]:
engine = create_engine("postgresql://localhost:5432/gdax")
df = pd.read_sql_query("select * from orderbook where pair_string LIKE 'ETH-USD' order by created_at asc limit 50000", con=engine)

In [379]:
def convertBookNumbersToFloat(book):
    return [[float(order_detail) for order_detail in order] for order in book]

def getMarketPrices(n):
    return [np.average([float(n['bids'][0][0]) , float(n['asks'][0][0])])]

def getMarketPricesAndSize(n):
    return [np.average([float(n['bids'][0][0]) , float(n['asks'][0][0])]), np.average([float(n['bids'][0][1]) , float(n['asks'][0][1])])] 
asks = df['data'].apply(lambda n: n['asks'])
asks = asks.apply(convertBookNumbersToFloat)

bids = df['data'].apply(lambda n: n['bids'])
bids = bids.apply(convertBookNumbersToFloat)

In [380]:
marketPrices = df['data'].apply(getMarketPrices)

Example format

df.iloc[0][2]['asks'][0]

[ price, size, num-orders ],

['894.89', '29.7307291', 22]

In [390]:
np.average([1,2])


In [451]:
def getSplitVWAP(book):
    return [getBookVWAP(bucket) for bucket in np.split(np.array(book), 5)]

def getBookVWAP(book):
    prices = np.array([order[0] for order in book])
    sizes = np.array([order[1] for order in book])
    return (prices * sizes).sum() / sizes.sum()

def getBookVWAPAndSize(book):
    prices = np.array([order[0] for order in book])
    sizes = np.array([order[1] for order in book])
    return [(prices * sizes).sum() / sizes.sum(), sizes.sum()]

In [452]:
asks_VWAP = pd.Series.apply(asks, getSplitVWAP)
bids_VWAP = pd.Series.apply(bids, getSplitVWAP)

In [494]:
def bucketDiffs(orders):
    orders = np.array(np.array(orders).tolist())
    diffs = []
    for i in range(1,len(orders)):
        diffs.append(orders[i]-orders[i-1])
    return diffs

In [495]:
def normalizeDiffs(orders):
#     asks_with_shape = np.array(np.array(asks_VWAP).tolist())
#     asks_diff = np.diff(asks_with_shape, axis=1)
#     asks_diff[0]
    with_shape = np.array(np.array(orders).tolist())
    diff = bucketDiffs(orders)
    return diff

In [496]:
# a_abs = np.fabs(asks_diff)
# print(a_abs)
# # [[ 1.  2.]
# #  [ 3.  4.]
# #  [ 2.  2.]
# #  [ 0.  1.]
# #  [ 1.  3.]]

# a_max = np.amax(a_abs)
# a_max
normalized_asks = normalizeDiffs(asks_VWAP)
normalized_bids = normalizeDiffs(bids_VWAP)

In [497]:
normalized_bids[:10]

[array([-0.02967084,  0.01481596, -0.00838651, -0.19322573, -0.06038292]),
 array([ 0.        , -0.01466136,  0.00073075,  0.00150321,  0.05398726]),
 array([ 0.05834832,  0.0211964 ,  0.00532813, -0.0615994 , -0.05398726]),
 array([ 0.,  0.,  0.,  0.,  0.]),
 array([ 0.,  0.,  0.,  0.,  0.]),
 array([-0.01558439, -0.02140146, -0.00148909, -0.01559691, -0.00780259]),
 array([ 0.        ,  0.        ,  0.0002227 , -0.00040388,  0.        ]),
 array([ 0.,  0.,  0.,  0.,  0.]),
 array([  1.47481772e-01,  -1.03985131e-02,   2.22409793e-05,
          4.03884893e-04,   5.70890381e-02]),
 array([ 0.01579394,  0.01845683,  0.00047005,  0.        ,  0.        ])]

In [498]:
marketPrices[0]

458.09500000000003

In [546]:
data = np.array(np.hstack((normalized_asks, normalized_bids)).tolist())

In [547]:
data[0]

array([-0.04495413, -0.01178321, -0.00272338,  0.1320499 ,  0.08654638,
       -0.02967084,  0.01481596, -0.00838651, -0.19322573, -0.06038292])

In [561]:
np.array(data).shape

(49999, 10)

In [596]:
def sliding_window(arr, window):
    windows = []
    for i in range(window,len(arr)):
        windows.append(arr[i-window:i])
    return windows

In [604]:
print(data.shape)
print(int(time_series.shape[0]/5))
time_series = np.array(data.tolist())
print(time_series.shape)
sliding_buckets = np.array(np.array(sliding_window(time_series, 5)[:-1]).tolist())
print(sliding_buckets.shape)

(49999, 10)
9999
(49999, 10)
(49993, 5, 10)


In [605]:
# get market prices

In [606]:
target_prices = np.array(marketPrices.diff()[0:-1:5].tolist())
target_prices = normalize(np.diff(np.array(marketPrices))).ravel()[6:]



In [607]:
# sliced_prices = sliding_window(diff_prices, 5)
# sliced_prices[0]
np.array(target_prices).shape

(49993,)

In [608]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Conv1D, Flatten, TimeDistributed

In [619]:
model = Sequential()
model.add(LSTM(32, input_shape=(5, 10)))
model.add(Dense(50))
model.add(Dense(5))
model.add(Dense(1))
model.compile(loss='mean_squared_error',
                  optimizer='Adam',
                  metrics=['accuracy'])

In [620]:
model.fit(sliding_buckets, target_prices, batch_size=100, epochs=5, validation_split=0.25)

Train on 37494 samples, validate on 12499 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x19d63a128>

In [621]:
sliding_buckets[0]

array([[-0.04495413, -0.01178321, -0.00272338,  0.1320499 ,  0.08654638,
        -0.02967084,  0.01481596, -0.00838651, -0.19322573, -0.06038292],
       [-0.00281333, -0.04237785, -0.03905999, -0.19220934, -0.08528923,
         0.        , -0.01466136,  0.00073075,  0.00150321,  0.05398726],
       [ 0.01243023,  0.03390732,  0.0187935 ,  0.19219974,  0.08612759,
         0.05834832,  0.0211964 ,  0.00532813, -0.0615994 , -0.05398726],
       [ 0.        ,  0.03146324,  0.05627564,  0.07215678,  0.1123457 ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        , -0.03146324, -0.05627564, -0.07215678, -0.1123457 ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ]])

In [622]:
target_prices

array([ 0.        ,  0.        ,  0.0003003 , ..., -0.00120121,
       -0.00045045,  0.00180181])

In [623]:
model.predict(np.array([sliding_buckets[5]]))

array([[-0.00141575]], dtype=float32)

In [625]:
for i in range(100):
    print('predicted: ', model.predict(np.array([sliding_buckets[i]])), ' | actual: ',target_prices[i], ' ', )

predicted:  [[-0.00174422]]  | actual:  0.0  
predicted:  [[-0.0019192]]  | actual:  0.0  
predicted:  [[-0.00160535]]  | actual:  0.000300301847323  
predicted:  [[-0.00167266]]  | actual:  0.0  
predicted:  [[-0.00168831]]  | actual:  0.00210211293127  
predicted:  [[-0.00141575]]  | actual:  0.0  
predicted:  [[-0.00149097]]  | actual:  0.0  
predicted:  [[-0.00181654]]  | actual:  0.0  
predicted:  [[-0.00155832]]  | actual:  0.0  
predicted:  [[-0.00145115]]  | actual:  0.0  
predicted:  [[-0.00140333]]  | actual:  0.0  
predicted:  [[-0.0016278]]  | actual:  0.0  
predicted:  [[-0.00167754]]  | actual:  0.0  
predicted:  [[-0.00196853]]  | actual:  0.0  
predicted:  [[-0.00214739]]  | actual:  0.0  
predicted:  [[-0.00212487]]  | actual:  0.0  
predicted:  [[-0.00176774]]  | actual:  0.0  
predicted:  [[-0.00161035]]  | actual:  0.0  
predicted:  [[-0.00185407]]  | actual:  0.0  
predicted:  [[-0.00172677]]  | actual:  0.0  
predicted:  [[-0.00205392]]  | actual:  0.0  
predicted