In [422]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sklearn.preprocessing import normalize

In [345]:
engine = create_engine("postgresql://localhost:5432/gdax")
df = pd.read_sql_query("select * from orderbook where pair_string LIKE 'ETH-USD' order by created_at asc limit 50000", con=engine)

In [379]:
def convertBookNumbersToFloat(book):
    return [[float(order_detail) for order_detail in order] for order in book]

def getMarketPrices(n):
    return [np.average([float(n['bids'][0][0]) , float(n['asks'][0][0])])]

def getMarketPricesAndSize(n):
    return [np.average([float(n['bids'][0][0]) , float(n['asks'][0][0])]), np.average([float(n['bids'][0][1]) , float(n['asks'][0][1])])] 
asks = df['data'].apply(lambda n: n['asks'])
asks = asks.apply(convertBookNumbersToFloat)

bids = df['data'].apply(lambda n: n['bids'])
bids = bids.apply(convertBookNumbersToFloat)

In [380]:
marketPrices = df['data'].apply(getMarketPrices)

Example format

df.iloc[0][2]['asks'][0]

[ price, size, num-orders ],

['894.89', '29.7307291', 22]

In [390]:
np.average([1,2])


In [451]:
def getSplitVWAP(book):
    return [getBookVWAP(bucket) for bucket in np.split(np.array(book), 5)]

def getBookVWAP(book):
    prices = np.array([order[0] for order in book])
    sizes = np.array([order[1] for order in book])
    return (prices * sizes).sum() / sizes.sum()

def getBookVWAPAndSize(book):
    prices = np.array([order[0] for order in book])
    sizes = np.array([order[1] for order in book])
    return [(prices * sizes).sum() / sizes.sum(), sizes.sum()]

In [452]:
asks_VWAP = pd.Series.apply(asks, getSplitVWAP)
bids_VWAP = pd.Series.apply(bids, getSplitVWAP)

In [494]:
def bucketDiffs(orders):
    orders = np.array(np.array(orders).tolist())
    diffs = []
    for i in range(1,len(orders)):
        diffs.append(orders[i]-orders[i-1])
    return diffs

In [495]:
def normalizeDiffs(orders):
#     asks_with_shape = np.array(np.array(asks_VWAP).tolist())
#     asks_diff = np.diff(asks_with_shape, axis=1)
#     asks_diff[0]
    with_shape = np.array(np.array(orders).tolist())
    diff = bucketDiffs(orders)
    return diff

In [496]:
# a_abs = np.fabs(asks_diff)
# print(a_abs)
# # [[ 1.  2.]
# #  [ 3.  4.]
# #  [ 2.  2.]
# #  [ 0.  1.]
# #  [ 1.  3.]]

# a_max = np.amax(a_abs)
# a_max
normalized_asks = normalizeDiffs(asks_VWAP)
normalized_bids = normalizeDiffs(bids_VWAP)

In [497]:
normalized_bids[:10]

[array([-0.02967084,  0.01481596, -0.00838651, -0.19322573, -0.06038292]),
 array([ 0.        , -0.01466136,  0.00073075,  0.00150321,  0.05398726]),
 array([ 0.05834832,  0.0211964 ,  0.00532813, -0.0615994 , -0.05398726]),
 array([ 0.,  0.,  0.,  0.,  0.]),
 array([ 0.,  0.,  0.,  0.,  0.]),
 array([-0.01558439, -0.02140146, -0.00148909, -0.01559691, -0.00780259]),
 array([ 0.        ,  0.        ,  0.0002227 , -0.00040388,  0.        ]),
 array([ 0.,  0.,  0.,  0.,  0.]),
 array([  1.47481772e-01,  -1.03985131e-02,   2.22409793e-05,
          4.03884893e-04,   5.70890381e-02]),
 array([ 0.01579394,  0.01845683,  0.00047005,  0.        ,  0.        ])]

In [498]:
marketPrices[0]

458.09500000000003

In [546]:
data = np.array(np.hstack((normalized_asks, normalized_bids)).tolist())

In [547]:
data[0]

array([-0.04495413, -0.01178321, -0.00272338,  0.1320499 ,  0.08654638,
       -0.02967084,  0.01481596, -0.00838651, -0.19322573, -0.06038292])

In [561]:
np.array(data).shape

(49999, 10)

In [596]:
def sliding_window(arr, window):
    windows = []
    for i in range(window,len(arr)):
        windows.append(arr[i-window:i])
    return windows

In [604]:
print(data.shape)
print(int(time_series.shape[0]/5))
time_series = np.array(data.tolist())
print(time_series.shape)
sliding_buckets = np.array(np.array(sliding_window(time_series, 5)[:-1]).tolist())
print(sliding_buckets.shape)

(49999, 10)
9999
(49999, 10)
(49993, 5, 10)


In [605]:
# get market prices

In [606]:
target_prices = np.array(marketPrices.diff()[0:-1:5].tolist())
target_prices = normalize(np.diff(np.array(marketPrices))).ravel()[6:]



In [607]:
# sliced_prices = sliding_window(diff_prices, 5)
# sliced_prices[0]
np.array(target_prices).shape

(49993,)

In [608]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Conv1D, Flatten, TimeDistributed

In [612]:
model = Sequential()
model.add(LSTM(4, input_shape=(5, 10)))
model.add(Dense(1))
model.compile(loss='mean_squared_error',
                  optimizer='Adam',
                  metrics=['accuracy'])

In [None]:
model.fit(sliding_buckets, target_prices, batch_size=100, epochs=5, validation_split=0.25)

In [414]:
time_series[0]

array([[[ 457.8389415 ,   83.91149394],
        [ 457.3526675 ,   65.90996338],
        [ 456.93632289,  193.24693372],
        [ 456.81556624,   12.50021269],
        [ 456.20939391,   91.77681595],
        [ 458.13537431,   32.10720375],
        [ 458.83672508,   77.286265  ],
        [ 459.38518398,   89.59467601],
        [ 459.64004644,   34.854033  ],
        [ 459.93487117,  112.68685438]],

       [[ 457.80927066,   93.43449394],
        [ 457.36748346,   79.32096338],
        [ 456.92793639,  194.58905458],
        [ 456.62234051,    6.31469183],
        [ 456.14901099,  138.28840596],
        [ 458.09042019,   33.92031475],
        [ 458.82494187,   76.255401  ],
        [ 459.38246061,   89.59467601],
        [ 459.77209634,   87.18494639],
        [ 460.02141754,  120.31227599]],

       [[ 457.80927066,   93.43449394],
        [ 457.3528221 ,   65.98096338],
        [ 456.92866714,  194.55893372],
        [ 456.62384372,    6.30341269],
        [ 456.20299825,   92.6399059

In [415]:
sliced_prices

array([-0.0021495 ,  0.        ,  0.0050155 , ..., -0.00322425,
        0.        ,  0.00394075])

In [416]:
model.predict(np.array([time_series[5]]))

array([[ 134.60336304]], dtype=float32)

In [417]:
for i in range(100):
    print(sliced_prices[i], ' ', model.predict(np.array([time_series[i]])))

-0.00214950097888   [[ 136.83778381]]
0.0   [[ 116.76737976]]
0.00501550228405   [[ 80.7066803]]
0.0   [[ 94.66082764]]
0.0   [[ 133.93530273]]
0.0   [[ 134.60336304]]
0.0   [[ 129.40983582]]
0.0   [[ 119.22134399]]
0.0   [[ 103.34341431]]
0.0   [[ 134.41841125]]
0.0   [[ 111.44776917]]
0.0   [[ 112.42054749]]
0.0   [[ 112.60679626]]
0.0   [[ 98.43779755]]
0.0   [[ 77.46728516]]
0.0   [[ 132.86380005]]
0.0   [[ 140.58605957]]
0.0   [[ 126.48045349]]
0.0   [[ 84.05243683]]
0.0   [[ 69.62892914]]
0.0   [[ 69.06201172]]
0.0   [[-8.37551498]]
0.0   [[ 55.20742798]]
0.0   [[ 91.3833313]]
0.0   [[ 25.36014557]]
0.0   [[-25.42673492]]
0.0   [[ 87.5565033]]
0.0   [[ 131.75802612]]
0.0   [[ 114.25089264]]
0.0   [[ 99.58597565]]
0.0   [[ 131.56307983]]
0.0   [[ 89.49495697]]
0.0   [[ 5.93018723]]
0.0   [[ 55.64246368]]
0.0   [[ 60.6386261]]
0.0   [[ 122.99694061]]
0.0   [[ 27.45167923]]
0.0   [[-1.36375427]]
0.0128970058733   [[ 103.92967224]]
0.0   [[ 43.5026474]]
0.00537375244719   [[ 70.58277