In [None]:
from deep_orderbook.replayer import Replayer
from deep_orderbook.shapper import BookShapper
from deep_orderbook.datafeed import DataFeed

import numpy as np
from pylab import rcParams

In [None]:
import matplotlib.pyplot as plt
from IPython.display import clear_output
rcParams['figure.figsize'] = 20, 4

In [None]:
MARKETS = ["ETHBTC", "BTCUSDT", "ETHUSDT", "BNBBTC", "BNBETH", "BNBUSDT"]
PAIR = MARKETS[1]
PAIR

In [None]:
# itertools for asyncio !
import asyncio
import itertools
import aioitertools

In [None]:
shapper = await BookShapper.create()
file_replayer = Replayer('../data/crypto', date_regexp='20')
areplay = file_replayer.replayL2_async(pair=PAIR, shapper=shapper)

In [None]:
file_gen = file_replayer.file_generator(pair=PAIR)

i = 0
async for _ in file_gen:
    i += 1
    if i >= 10:
        break

In [None]:
import sys
sys.version

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
g = file_replayer.training_files(PAIR, side_bips=8, side_width=32)
next(g)

In [None]:
def gen(frac_from=0.0, frac_to=1.0):
    file_names = list(file_replayer.training_files(PAIR, side_bips=8, side_width=32))
    num = len(file_names)
    rangefrom = int(num * frac_from)
    rangeto = int(num * frac_to)
    for fn_bs, fn_ps, fn_ts in file_names[rangefrom:rangeto]:
        print(fn_bs, fn_ps, fn_ts)
        arr_books, arr_prices, arr_time2level = np.load(fn_bs), np.load(fn_ts), np.load(fn_ps)
        assert arr_books.shape[0] ==  arr_prices.shape[0]
        assert arr_books.shape[0] ==  arr_time2level.shape[0]
        yield arr_books, arr_prices, arr_time2level
        
g = gen(frac_to=0.75)
next(g)
g = gen(frac_from=0.75)
[a.shape for a in next(g)]

In [None]:
import functools
data = DataFeed('../data/crypto', symbol=PAIR, side_bips=8, side_width=32, date_regexp='*')

def alpha(arr_time2level):
    return 10 / (1 + (arr_time2level))
def flow(self, split, batch_size, sample_length, seed):
        assert len(split) == 1
        def train_gen():
            for fn_bs, fn_ps, fn_ts in self.raw_numpy_gen(frac_to=split[0], seed=seed):
                yield data.batch_length(fn_bs, sample_length), data.batch_length(fn_ps, sample_length), data.batch_length(alpha(fn_ts), sample_length)
        def valid_gen():
            for fn_bs, fn_ps, fn_ts in self.raw_numpy_gen(frac_from=split[0], seed=seed):
                yield data.batch_length(fn_bs, sample_length), data.batch_length(fn_ps, sample_length), data.batch_length(alpha(fn_ts), sample_length)

        def make_dataset(raw_gen, name='dataset'):
            ds = tf.data.Dataset.from_generator(
                            raw_gen, 
                            (tf.float32, tf.float32, tf.float32),
                            (tf.TensorShape([None, sample_length, self.widthbooks, self.chanbooks]),
                            tf.TensorShape([None, sample_length, self.widthbooks, 1]),
                            tf.TensorShape([None, sample_length, 2, 3]))
                            )
#            print(name, ds)
#            ds = ds.window(size=sample_length, drop_remainder=True)
            ds = ds.unbatch()
#            print(name, ds)
            if seed:
                shuffle_size = 17
                print('shuffle_size', shuffle_size)
                ds = ds.shuffle(shuffle_size, seed=seed)
            ds = ds.batch(batch_size)
#            print(name, ds)
            return ds
        return make_dataset(train_gen), make_dataset(valid_gen)

tds, vds = flow(data, split=[0.85], batch_size=1, sample_length=8192, seed=0)
tds, vds = data.data_flow(split=[0.97], batch_size=1, sample_length=8192, seed=0)
tds

In [None]:
tnp = tds.as_numpy_iterator()
vnp = vds.as_numpy_iterator()
a,b,c = next(tnp)
print(a.shape)
print(b.shape)
print(c.shape)
a,b,c = next(vnp)
print(a.shape)
print(b.shape)
print(c.shape)

In [None]:
for a,b,c in tnp:
    pass

In [None]:
a,b,c = next(tnp)
print(a.shape)
print(b.shape)
print(c.shape)
a,b,c = next(vnp)
print(a.shape)
print(b.shape)
print(c.shape)

In [None]:
import datetime
import matplotlib.dates as mdates
def batch_to_plot(b_prices, b_books, b_labels, b_outputs, nth=0, max_points=512):
    # find most violent move
    # print('b_labels.shape', b_labels.shape)
    points = b_outputs.shape[1]
    b_t_mean = np.mean(b_outputs, axis=(2, 3))
    # one could find the best batch
    nth = nth
    max_at = np.argmax(b_t_mean[nth])
    center = min(max_at, points - max_points // 2 - max_points % 2)
    center = max(center, max_points // 2)
    left = max(0, center - max_points // 2)
    right = min(points, center + max_points // 2 + max_points % 2)
    # print(f"index {nth} in batch. max_at {max_at}, left={left}, right={right}")

    return image_grid(
        b_prices[nth, left:right], 
        b_books[nth, left:right], 
        b_labels[nth, left:right], 
        b_outputs[nth, left:right])

def image_grid(prices_dts, books, labels, outputs):
    figure = plt.figure(figsize=(13, 12))
    numsubplot = 6
    prices = prices_dts[:, 0, :]
    low_trade = prices_dts[:, 0, 0]
    high_trade = prices_dts[:, 1, 2]
    lowhigh_trade = np.stack([low_trade, high_trade], axis=-1)

    dts = prices_dts[:, 1, :]
    ts = [int(dt[0]) * 3600 * 24 + int(dt[1]) for dt in dts]
    x = [datetime.datetime.utcfromtimestamp(t) for t in ts]

    ax = plt.subplot(numsubplot, 1, 1)
    plt.margins(0.0)
    # plt.plot(x, prices[:, 0], 'y')
    ax.plot(x, prices[:, 1], 'b')
    ax.plot(x, prices[:, 2], 'r')
    ax.grid(b=True, which='both', axis='both')
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d@%H:%M:%S'))
    plt.xticks(rotation=5)

    plt.subplot(numsubplot, 1, 2)
    im = np.abs(books[:, :, 0])
    im[im == 0] = -1
    toshow = np.clip(im.T, 0, 255)
    plt.imshow(toshow, cmap='nipy_spectral', origin="lower")
    plt.grid(b=True, which='both', axis='both')
    
    plt.subplot(numsubplot, 1, 3)
    im0 = books[:, :, 0].T/10
    im1 = books[:, :, 1].T/1
    im2 = books[:, :, 2].T/1
    im3 = np.stack([im0, im1, im2], -1)+0.5
    toshow = np.clip(im3[:,:,:], 0, 1)
    plt.imshow(toshow, origin="lower")
    plt.grid(b=True, which='both', axis='both')
    
    plt.subplot(numsubplot, 1, 4)
    im = labels[:, :, 0]
    toshow = np.clip(im.T, -1000, 1000)
    plt.imshow(toshow, cmap='nipy_spectral', origin="lower")
    plt.grid(b=True, which='both', axis='both')
    
    plt.subplot(numsubplot, 1, 5)
    im = outputs[:, :, 0]
    toshow = np.clip(im.T, -1000, 1000)
    plt.imshow(toshow, cmap='nipy_spectral', origin="lower")
    plt.grid(b=True, which='both', axis='both')

    ax0 = plt.subplot(numsubplot, 1, 6)
    ax1 = ax0.twinx()
    pos, pnl = simul_pnl(prices, outputs, lowhigh_trade)
    _, pnl_optimist = simul_pnl(prices, outputs, lowhigh_trade, optimist=True)
    pos_best, pnl_best = simul_pnl(prices, labels, lowhigh_trade)
    plt.margins(0.0)
    ax0.plot(pnl, 'r')
    ax0.plot(pnl_optimist, 'r')
    ax1.plot(pos, 'b')
    ax1.plot(pos_best, 'y')
    plt.grid(b=True, which='both', axis='both')
    return figure
def simul_pnl(prices, outputs, lowhigh_trade, optimist=False):
    mid = outputs.shape[1] // 2
    buys = outputs[:, mid:, 0]
    sells = outputs[:, :mid, 0]
    bids_next = tf.pad(prices[1:, 1], [[0,1]], mode='REFLECT')
    asks_next = tf.pad(prices[1:, 2], [[0,1]], mode='REFLECT')
    low_next = tf.pad(lowhigh_trade[1:, 0], [[0,1]], mode='REFLECT')
    high_next = tf.pad(lowhigh_trade[1:, 1], [[0,1]], mode='REFLECT')
    price_next = 0.5 * (bids_next + asks_next)
#    tr_pr_nt = 0.5 * (low_next + high_next)
#    tr_pr_nt[np.isnan(tr_pr_nt)] = price_next[np.isnan(tr_pr_nt)]
#    price_next = 0.5 * (price_next + tr_pr_nt)

    b = tf.reduce_mean(buys, axis=1)
    s = tf.reduce_mean(sells, axis=1)

    targ_pos = b - s
    diff_pos = targ_pos - tf.pad(targ_pos, [[1,0]])[:-1]
    curr_pos = targ_pos

    if optimist:
        cashflow = -diff_pos * price_next
    else:
        cashflow = -diff_pos * tf.where(diff_pos > 0, asks_next, bids_next)
    cash = tf.cumsum(cashflow, axis=0)
    stock = curr_pos * price_next

    # print(curr_pos)
    return curr_pos, cash + stock
_ = batch_to_plot(c,a,b,b)

In [None]:
c.shape

In [None]:
10 / b.max() - 1

In [None]:
b2l = shapper.build_time_level_trade(a[0],c[0], sidebips=8, sidesteps=32)

In [None]:
b2l.min(), b2l.max()

In [None]:
c

In [None]:
ds = tf.data.Dataset.from_generator( 
     gen, 
     (tf.float32, tf.float32, tf.float32), 
     (tf.TensorShape([None, 64, 3]), tf.TensorShape([None, 64, 1]), tf.TensorShape([None, 2, 3])))
ds

In [None]:
list(ds.take(3).as_numpy_iterator()) 

In [None]:
# await next(file_gen)

In [None]:
rep = file_replayer.replayL2_async('BTCUSDT', shapper)

for i in range(100):
    await next(rep)

In [None]:
print(file_replayer.zipped_dates())

In [None]:
for i in range(100):
    batptr = await next(areplay)
print(f"bids:\n{batptr['bids'].head()}")
print(f"asks:\n{batptr['asks'].head()}")
print(f"prices:\n{batptr['price']}")
print(f"trades:\n{batptr['trades']}")

In [None]:
replayers = [file_replayer.replayL2_async(pair, await BookShapper.create()) for pair in MARKETS]
multi_replay = file_replayer.multireplayL2_async(replayers)
d = await next(multi_replay)

In [None]:
async def run(maxpoints=100):
    async for i in iter(range(maxpoints)):
        d = await next(multi_replay)
    #return d

task = asyncio.create_task(run())

In [None]:
await task

In [None]:
genarr = shapper.gen_array_async(market_replay=multi_replay, markets=MARKETS)
_ = await next(genarr)

In [None]:
genacc = shapper.accumulate_array(genarr, markets=MARKETS)
_ = await next(genacc)

In [None]:
every = 10
LENGTH = 128
x = []
async for n,sec in enumerate(genacc):
    allim = []
    for symb, data in sec.items():
        arr = np.stack(data['bs'][-LENGTH:])
        im = arr
        im[:,:,0] /= 10
        im += 0.5
        allim.append(im)
    allim = np.concatenate(allim, axis=1)
    if n % every == 0:
        plt.figure(figsize=(15,11))
        toshow = allim.transpose(1,0,2)
        toshow = np.clip(toshow, 0, 1)
        plt.imshow(toshow, origin="lower");
        plt.show()
#        pr = np.stack(data['ps'])[-LENGTH:, :-1]
#        plt.plot(pr)
#        plt.show()
#        print(symb)
        clear_output(wait=True)
#        plt.show()
#    if n == 100:
#        break
    pass