In [2]:
%load_ext autoreload
%autoreload 2

from tqdm import tqdm
import pandas as pd
import numpy as np
import gzip
import json
import math
from datetime import datetime
import matplotlib.dates as mdates
from mpl_toolkits.mplot3d import Axes3D
from IPython.display import display
%matplotlib inline


import sys
sys.path.append('..')
from helper.rl_framework import *
from helper.orderbook_container import OrderbookContainer
from helper.manage_orderbooks import *
from helper.orderbook_trader import *
from helper.Q_learning import QLearn

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
filename = '/home/axel/data/obs_2016-11_USDT_BTC_range1.2.dict'
# filename = '../../data/3000orderbooks'
currency_pair = 'USDT_BTC'

samples = 120
print("# Load Training Set")
orderbooks_train = load_orderbook_snapshot(infile=filename, first_line=0, last_line=samples)
print(orderbooks_train[0])

print("")
print("# Load Validation Set")
orderbooks_val = load_orderbook_snapshot(infile=filename, first_line=samples, last_line=2*samples)
print(orderbooks_val[0])

# Load Training Set


100%|██████████| 120/120 [00:10<00:00, 10.96it/s]


Loaded 120 orderbooks from file '/home/axel/data/obs_2016-11_USDT_BTC_range1.2.dict'.
OrderbookContainer from 2016-11-08T10:00
  663 bids (best: 705.0)
  475 asks (best: 705.450997)
  kind: 'orderbook'

# Load Validation Set


100%|██████████| 120/120 [00:09<00:00, 13.17it/s]

Loaded 120 orderbooks from file '/home/axel/data/obs_2016-11_USDT_BTC_range1.2.dict'.
OrderbookContainer from 2016-11-08T12:01
  679 bids (best: 705.5)
  473 asks (best: 707.823093)
  kind: 'orderbook'





### Settings

In [9]:
T = 4  # Time horizon: 10 periods -> P*T = 20 minutes
P = 15  # period length
print("T={}, P={}".format(T, P))

T=4, P=15


#### Split orderbook array into non-overlapping episodes

In [10]:
def create_episodes(orderbooks, episode_length):
    episode_windows = []
    for e in range(0, int(len(orderbooks) / episode_length)):
        window = [ob.copy() for ob in orderbooks[e*episode_length:(e+1)*episode_length]]

        # plot_episode(window, volume=V, outfile='graphs/episode_window{}'.format(e))
        episode_windows.append(window)
    return episode_windows

episode_windows_train = create_episodes(orderbooks_train, episode_length=T*P)
episode_windows_val = create_episodes(orderbooks_val, episode_length=T*P)

print("Training Episodes  : {}, episode length: {}, start at: {}".format(len(episode_windows_train), len(episode_windows_train[0]), episode_windows_train[0][0].timestamp))
print("Validation Episodes: {}, episode length: {}, start at: {}".format(len(episode_windows_val), len(episode_windows_val[0]), episode_windows_val[0][0].timestamp))
# plot_episode(episode_windows_val[0], volume=10)
# plot_episode(episode_windows_val[0], volume=100)

Training Episodes  : 2, episode length: 60, start at: 2016-11-08T10:00
Validation Episodes: 2, episode length: 60, start at: 2016-11-08T12:01


In [11]:
# STATE_DIM = 2
NUM_ACTIONS = 11
actions = list(np.linspace(-1, 5, num=NUM_ACTIONS))
print("available actions: {}".format(actions))

available actions: [-1.0, -0.40000000000000002, 0.19999999999999996, 0.79999999999999982, 1.3999999999999999, 2.0, 2.5999999999999996, 3.2000000000000002, 3.7999999999999998, 4.3999999999999995, 5.0]


In [12]:
def round_custombase(val, *, base):
    return float(round(float(val) / base) * base)

test = round_custombase(12.43, base=5)
print(type(test), test)

<class 'float'> 10.0


In [18]:
def optimal_strategy(V, T, period_length, vol_intervals, actions, verbose=True):
    timestamp = datetime.now()

    print("V: {}, T: {}, period_length: {}, vol_intervals: {}, num_actions: {}".format(V, T, period_length, vol_intervals, len(actions)))
    print("actions: {}".format(actions))
    volumes = np.linspace(0, 1, num=vol_intervals+1)[1:][::-1] # skip volumes=0
    
    volumes_base = float(V)/vol_intervals
    print("volumes_base: {}".format(volumes_base))
    print("volumes: {}".format(volumes))
    ql = QLearn(actions=actions, vol_intervals=vol_intervals, V=V, T=T, period_length=period_length)
    print("V: ", ql.V)

    H = T*period_length

    for tt in tqdm(range(T)[::-1]):
        trading_startpoint = period_length*tt
        time_left = T-tt

        for episode in tqdm(episode_windows_train):
            center = episode[trading_startpoint].get_center()
            # ask = episode[trading_startpoint].get_ask()
            initial_center = episode[0].get_center()
            
            for vol in volumes:
                if tt == 0:
                    if vol != 1.:
                        # at t=0 we always have 100% of the volume left.
                        print("x", vol)
                        break
                
                for a in actions:
                    state = ql.state_as_string(time_left=time_left, volume_left=vol)  #, orderbook=episode[trading_startpoint])
                    
                    if vol == 0:
                        volume_left = 0
                        cost = 0
                    else:
                        ots = OrderbookTradingSimulator(orderbooks=episode[trading_startpoint:], volume=vol*V, tradingperiods=T-tt,
                                                        period_length=period_length)
                        limit = center + a

                        ots.trade(limit = limit)  # agression_factor=a)
                    
                        volume_left = ots.volume
                        volume_left_rounded = round_custombase(volume_left, base=volumes_base)
                        
                        volume_traded = ots.history.volume_traded.values[-1]
                        volume_traded_rounded = round_custombase(volume_traded, base=volumes_base)
                        
                        assert volume_left_rounded + volume_traded_rounded - vol*V <= 1.e-8, "{} {} {} {}".format(
                            volume_left_rounded, volume_traded_rounded, vol, V)
                        
                        cashflow = ots.history.cashflow[-1]
                        
                        avg = ots.history.avg[-1]
                        
                        # manually compute costs, since we have to think in discrete volume steps (rounding ...)
                        cost = volume_traded_rounded * (avg - initial_center) / initial_center
                    
                    new_state = ql.state_as_string(time_left=time_left-1, volume_left=volume_left_rounded/V)  #, orderbook=ots.masterbook)
                    
                    print(state, a, cost, new_state)
                    
                    ql.learn(state, a, cost, new_state)
                    
            ql.plot_Q(outfile="../graphs/Q_function_{}_action".format(T-tt), outformat='pdf', z_represents='action', verbose=verbose)
            ql.plot_Q(outfile="../graphs/Q_function_{}_Q".format(T-tt), outformat='pdf', z_represents='Q', verbose=verbose)
        ql.save("../pickles/Q_function_{}".format(timestamp))
    plot_episode(episode, volume=V)
    return ql

In [19]:
V = 100
print("V={}, T={}, P={}".format(V, T, P))
ql = optimal_strategy(V=V, T=T, period_length=P, vol_intervals=10, actions=actions)
ql.plot_Q(z_represents='action')
ql.plot_Q(z_represents='Q')

  0%|          | 0/4 [00:00<?, ?it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

V=100, T=4, P=15
V: 100, T: 4, period_length: 15, vol_intervals: 10, num_actions: 11
actions: [-1.0, -0.40000000000000002, 0.19999999999999996, 0.79999999999999982, 1.3999999999999999, 2.0, 2.5999999999999996, 3.2000000000000002, 3.7999999999999998, 4.3999999999999995, 5.0]
volumes_base: 10.0
volumes: [ 1.   0.9  0.8  0.7  0.6  0.5  0.4  0.3  0.2  0.1]
V:  100


[A

['1.00', 1] -1.0 0.938119193652 ['0.00', 0]
['1.00', 1] -0.4 0.938119193652 ['0.00', 0]
['1.00', 1] 0.2 0.938119193652 ['0.00', 0]
['1.00', 1] 0.8 0.938119193652 ['0.00', 0]
['1.00', 1] 1.4 0.938119193652 ['0.00', 0]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.bids.sort_index(inplace=True, ascending=False)


['1.00', 1] 2.0 0.938119193652 ['0.00', 0]
['1.00', 1] 2.6 0.424431000136 ['0.00', 0]
['1.00', 1] 3.2 0.263732040578 ['0.00', 0]
['1.00', 1] 3.8 0.186049083874 ['0.00', 0]
['1.00', 1] 4.4 0.198646473392 ['0.00', 0]
['1.00', 1] 5.0 0.199439127685 ['0.00', 0]
['0.90', 1] -1.0 0.799518194562 ['0.00', 0]
['0.90', 1] -0.4 0.799518194562 ['0.00', 0]
['0.90', 1] 0.2 0.799518194562 ['0.00', 0]





KeyboardInterrupt: 

In [20]:
ql.plot_Q(z_represents='action')
ql.plot_Q(z_represents='Q')

NameError: name 'ql' is not defined

In [21]:
V=100
T=4
# P=15
ql = QLearn(actions = actions, vol_intervals=10)
# ql = ql.load("pickles/Q_function_e375_T4_P2_V200")
ql = ql.load("pickles/Q_function_e12_T4_P15_V100_I10")
ql.plot_Q(V, T, z_represents='action')
ql.plot_Q(V, T, z_represents='Q')

print(ql)
# ql.plot_Q(V, T, z_represents='both')

# for key in sorted(ql.q)[::-1]:
#     print("")
#     print(key)
#     print(ql.q[key])

TypeError: load() takes 1 positional argument but 2 were given

In [None]:
def run_Q(V, H, T, ql, episode_windows):
    costs_list = []
    period_length = int(H/T)

    for episode in tqdm(episode_windows):
        costs = {}
        volume = V
        
        ## Learned strategy
        ots = OrderbookTradingSimulator(orderbooks=episode, volume=volume, tradingperiods=T,
                                        period_length=period_length)
        for tt in range(1, T+1, 1)[::-1]:
            new_vol = round_custombase(ots.volume, base=ql.vol_intervals)    
            if new_vol > 0:
                state = state_as_string(time_left=tt, volume_left=new_vol/V)  #, orderbook=ots.get_next_masterbook())
                action = ql.chooseAction(state)

                # print(state, action)
                obs = episode[period_length * (T-tt)].copy()
                # obs = [elem.copy() for elem in obs_]
            
                ask = obs.get_ask()
                # center = ots.masterbook.get_center()
                limit = ask * (1. + (action/100.))
            else:
                # theoreticall done
                limit == None
            ots.trade(limit = limit, extrainfo={'ACTION':action})
        costs['learned'] = ots.history.cost.sum()
        # print("learned")
        # display(ots.history)
        
        ## limit + 1
        a=0.1
        lim = episode[0].get_ask() * (1. + (a/100.))
        # print("\n### Fixed limit at: {} (ASK+4) ###".format(lim))
        ots = OrderbookTradingSimulator(orderbooks=episode, volume=volume, tradingperiods=T,
                                        period_length=period_length)
        for i in range(T):
            ots.trade(limit = lim)
        costs['ask*1.001'] = ots.history.cost.sum()
        
        
        ## limit + 2.5
        a=0.2
        lim = episode[0].get_ask() * (1. + (a/100.))
        ots = OrderbookTradingSimulator(orderbooks=episode, volume=volume, tradingperiods=T,
                                        period_length=period_length)
        for i in range(T):
            ots.trade(limit = lim)
        costs['ask*1.002'] = ots.history.cost.sum()
        # print("1.002")
        # display(ots.history)
        
        
        ## limit + 3
        lim = episode[0].get_ask()
        ots = OrderbookTradingSimulator(orderbooks=episode, volume=volume, tradingperiods=T,
                                        period_length=period_length)
        for i in range(T):
            ots.trade(limit = lim)
        costs['ask*1'] = ots.history.cost.sum()
        
        ## limit + 4
        a=0.5
        lim = episode[0].get_ask() * (1. + (a/100.))
        # print("\n### Fixed limit at: {} (ASK+4) ###".format(lim))
        ots = OrderbookTradingSimulator(orderbooks=episode, volume=volume, tradingperiods=T,
                                        period_length=period_length)
        for i in range(T):
            ots.trade(limit = lim)
        costs['ask*1.005'] = ots.history.cost.sum()
        
        
        ## market order
        ots = OrderbookTradingSimulator(orderbooks=episode, volume=volume, tradingperiods=T,
                                        period_length=period_length)
        ots.trade(limit = None)
        costs['market'] = ots.history.cost.sum()
        
        
        costs_list.append(costs)
        
    return costs_list
        
print("T={}, P={}".format(T, P))
print(ql.q.keys())
costs_list_val = run_Q(V=100, H=T*P, T=T, ql=ql, episode_windows = episode_windows_val)
costs_list_train = run_Q(V=100, H=T*P, T=T, ql=ql, episode_windows = episode_windows_train)

In [None]:
V=30
T=5
P=2

train_RL(V=V, T=T, P=P, epochs=300, verbose=False, log=True)

In [None]:
visualize_Q(model, actions, V, T)

In [None]:
def run(V, T, P, epochs=1, overwrite_actions=None):
    for i_episode in range(epochs):
        obs = episode_windows[0]  # episode_windows[i]  # Testcase with always the same, identical episode_window

        ots = OrderbookTradingSimulator(volume=V, tradingperiods=T, period_length=P)

        time_left = P*T
        volume = V

        state = np.array([time_left, volume])  # , volume])
        # state = discretize_state(state)

        acc_cost = 0
        for step in range(T):
            qval = model.predict(state.reshape(1, STATE_DIM))
            
            action = actions[np.argmin(qval)]
            
            if overwrite_actions and step < len(overwrite_actions):
                action = overwrite_actions[step]

            ob = obs[step*P]

            orderbooks = obs[step*P:(step+1)*P]
            # info = ots.trade(orderbooks, limit=lim, verbose=False)
            info = ots.trade(orderbooks, agression_factor=action, verbose=False, extrainfo={'ACTION':action})

            time_left -= 1
            volume = ots.volume
            new_state = np.array([time_left, volume])  # , volume])
            cost = ots.history.cost.values[-1]

            acc_cost += cost

            state = new_state
            if volume == 0:
                break

        info = "{:4d}/{}: acc_cost: {:0.5f}, steps: {} (t={})"\
                .format(i_episode+1, epochs, acc_cost, step, ots.t)
        print(info)
    return ots.history
hist = run(V=V, T=T, P=P)  #, overwrite_actions=[0,0,0,0,0,0,0,0,0])
display(hist)
hist = run(V=V, T=T, P=P, overwrite_actions=[0.24]*10)
display(hist)

In [None]:
plot_episode(episode_windows[3], volume=50, figsize=(5,3))
episode_windows[3][90].plot(range_factor=1.015, figsize=(5,3))

In [None]:
def show_plots(x):
    # episode_windows[3][x].plot(range_factor=1.015, figsize=(5,3))
    return x

from ipywidgets import interact
interact(show_plots, x=5)

In [None]:
print(len(obs))
for vol in [1, 50, 100]:
    market_order_price = []
    ask_price = []
    for i, ob in enumerate(obs):
        market_order_price.append(ob.get_current_price(volume=vol))
        ask_price.append(vol*ob.get_ask())

    plt.plot(market_order_price, color='blue', marker='o', label='market order price')
    plt.plot(ask_price, color='red', marker='*', label='ask price')
                         
                         
    
    plt.title("Market order price for {} shares".format(vol))
    plt.ylabel("price")
    plt.xlabel("time")
    plt.legend(loc='best')
    plt.xlim((-1,21))
    plt.show()