In [1]:
%load_ext autoreload
%autoreload 2

from IPython.display import display
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import json

from BackwardImplementation_fromPaper import prepare_training_data, backward_implementation_from_paper

import sys
sys.path.append('..')
from helper.orderbook_trader import OrderbookTradingSimulator
from helper.Q_learning import QLearn, state_as_string, round_custombase
from helper.manage_orderbooks_v2 import OrderbookEpisodesGenerator
from datetime import datetime

Using TensorFlow backend.


In [2]:
filename = '../../data/obs_2016-11_USDT_BTC_range1.2.dict'
filename = '../../data/obs_2016-12_USDT_BTC_range1.2.dict'
filename = '../../data/obs_2016-11_BTC_ETH_range1.2.dict'
modelname = '../models/obs_2016-11_BTC_ETH_range1.2_30samples.json'
samples = 30
V = 100
vol_intervals = 10
T = 4
P = 15
actions = list(np.linspace(-0.4, 1.0, num=15))
print("V={}, T={}, P={}".format(V, T, P))
print("Actions: ", ", ".join(["{:1.2f}".format(a) for a in actions]))

V=100, T=4, P=15
Actions:  -0.40, -0.30, -0.20, -0.10, -0.00, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00


In [3]:
episodes_train = OrderbookEpisodesGenerator(filename=filename, episode_length=T*P)
print("Length of episodes_train: {}".format(len(episodes_train)))
# episode_windows_train = prepare_training_data(filename, samples=samples, T=T, P=P, first_line=0)

Length of episodes_train: 541


In [4]:
def optimal_strategy(traingdata, V, T, decisionfrequency, vol_intervals, actions, verbose=True, ql=None, modelname=None):
    timestamp = datetime.now()
    
    print("V: {}, T: {}, decisionfrequency: {}, vol_intervals: {}, num_actions: {}".format(V, T, decisionfrequency, vol_intervals, len(actions)))
    print("actions: {}".format(actions))
    volumes = np.linspace(1, 0, num=10+1)[:-1] # skip volumes=0
    
    volumes_base = float(V)/vol_intervals
    print("volumes_base: {}".format(volumes_base))
    print("volumes: {}".format(volumes))
    
    ql = ql or QLearn(actions=actions, vol_intervals=vol_intervals, V=V, T=T, decisionfrequency=decisionfrequency)

    for tt in tqdm(range(T)[::-1]):
        trading_startpoint = decisionfrequency*tt
        time_left = T-tt

        for e, episode in tqdm(enumerate(traingdata)):
            initial_center = episode[0].get_center()
            
            center = episode[trading_startpoint].get_center()
            ask = episode[trading_startpoint].get_ask()
            
            for vol in volumes:
                if tt == 0 and vol != 1.:
                    # at t=0 we always have 100% of the volume left.
                    break
                
                for a in actions:
                    state = state_as_string(time_left=time_left, volume_left=vol)  # , orderbook=episode[trading_startpoint])
                    
                    ots = OrderbookTradingSimulator(orderbooks=episode[trading_startpoint:], volume=vol*V, tradingperiods=T-tt,
                                                    decisionfrequency=decisionfrequency)
                    limit = ask * (1. + (a/100.))

                    ots.trade(limit = limit)  # agression_factor=a)

                    volume_left = ots.volume
                    volume_left_rounded = round_custombase(volume_left, base=volumes_base)

                    volume_traded = ots.history.volume_traded.values[-1]
                    volume_traded_rounded = round_custombase(volume_traded, base=volumes_base)

                    assert volume_left_rounded + volume_traded_rounded - vol*V <= 1.e-8, "{} {} {} {}".format(
                        volume_left_rounded, volume_traded_rounded, vol, V)

                    avg = ots.history.avg[-1]

                    # manually compute costs, since we have to think in discrete volume steps (rounding ...)
                    cost = volume_traded_rounded * (avg - initial_center) / initial_center
                    
                    new_state = state_as_string(time_left=time_left-1, volume_left=volume_left_rounded/V)  #, orderbook=ots.masterbook)
                    
                    ql.learn(state, a, cost, new_state)
               
            if e%5 == 0:
                ql.plot_Q(outfile="../graphs/Q_function_{}_action".format(T-tt), epoch=e, outformat='pdf', z_represents='action', verbose=verbose)
                ql.plot_Q(outfile="../graphs/Q_function_{}_Q".format(T-tt), epoch=e, outformat='pdf', z_represents='Q', verbose=verbose)
                modelname = modelname or "../models/Q_function_{}".format(timestamp)
                ql.save(outfile=modelname)
            
    # plot_episode(episode, volume=V)
    return ql

In [5]:
ql = optimal_strategy(traingdata=episodes_train[:12], V=V, T=T, decisionfrequency=P,
                      vol_intervals=vol_intervals, actions=actions)

  0%|          | 0/4 [00:00<?, ?it/s]
0it [00:00, ?it/s]

V: 100, T: 4, decisionfrequency: 15, vol_intervals: 10, num_actions: 15
actions: [-0.40000000000000002, -0.30000000000000004, -0.20000000000000004, -0.10000000000000003, -5.5511151231257827e-17, 0.099999999999999922, 0.19999999999999996, 0.29999999999999993, 0.39999999999999991, 0.49999999999999989, 0.59999999999999987, 0.69999999999999984, 0.79999999999999993, 0.8999999999999998, 1.0]
volumes_base: 10.0
volumes: [ 1.   0.9  0.8  0.7  0.6  0.5  0.4  0.3  0.2  0.1]


[A
1it [00:27, 27.93s/it]

Successfully saved '../graphs/Q_function_1_action.pdf'
Successfully saved '../graphs/Q_function_1_Q.pdf'
Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
2it [00:54, 27.39s/it][A
3it [01:28, 29.55s/it][A
4it [02:07, 32.35s/it][A
5it [02:36, 31.28s/it][A
6it [03:07, 31.36s/it]

Successfully saved '../graphs/Q_function_1_action.pdf'
Successfully saved '../graphs/Q_function_1_Q.pdf'
Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
7it [03:34, 29.91s/it][A
8it [03:59, 28.58s/it][A
9it [04:21, 26.60s/it][A
10it [04:47, 26.45s/it][A
11it [05:12, 25.85s/it]

Successfully saved '../graphs/Q_function_1_action.pdf'
Successfully saved '../graphs/Q_function_1_Q.pdf'
Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
12it [05:36, 25.47s/it][A
 25%|██▌       | 1/4 [05:36<16:50, 336.98s/it]
0it [00:00, ?it/s][A

Successfully saved '../graphs/Q_function_2_action.pdf'
Successfully saved '../graphs/Q_function_2_Q.pdf'



1it [00:23, 23.79s/it]

Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
2it [00:50, 24.65s/it][A
3it [01:14, 24.48s/it][A
4it [01:43, 25.91s/it][A
5it [02:06, 24.98s/it][A
6it [02:27, 23.72s/it]

Successfully saved '../graphs/Q_function_2_action.pdf'
Successfully saved '../graphs/Q_function_2_Q.pdf'
Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
7it [02:45, 22.15s/it][A
8it [03:02, 20.46s/it][A
9it [03:24, 20.82s/it][A
10it [03:44, 20.81s/it][A
11it [04:06, 20.99s/it]

Successfully saved '../graphs/Q_function_2_action.pdf'
Successfully saved '../graphs/Q_function_2_Q.pdf'
Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
12it [04:30, 22.02s/it][A
 50%|█████     | 2/4 [10:07<10:34, 317.08s/it]
0it [00:00, ?it/s][A

Successfully saved '../graphs/Q_function_3_action.pdf'
Successfully saved '../graphs/Q_function_3_Q.pdf'



1it [00:23, 23.63s/it]

Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
2it [00:53, 25.38s/it][A
3it [01:17, 25.04s/it][A
4it [01:45, 26.03s/it][A
5it [02:11, 25.94s/it][A

Successfully saved '../graphs/Q_function_3_action.pdf'
Successfully saved '../graphs/Q_function_3_Q.pdf'



6it [02:28, 23.17s/it]

Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
7it [02:48, 22.36s/it][A
8it [03:09, 21.90s/it][A
9it [03:30, 21.71s/it][A
10it [03:54, 22.40s/it][A

Successfully saved '../graphs/Q_function_3_action.pdf'
Successfully saved '../graphs/Q_function_3_Q.pdf'



11it [04:18, 22.92s/it]

Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
12it [04:42, 23.30s/it][A
 75%|███████▌  | 3/4 [14:50<05:06, 306.85s/it]
0it [00:00, ?it/s][A

Successfully saved '../graphs/Q_function_4_action.pdf'
Successfully saved '../graphs/Q_function_4_Q.pdf'



1it [00:06,  6.67s/it]

Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
2it [00:13,  6.73s/it][A
3it [00:22,  7.28s/it][A
4it [00:31,  7.82s/it][A
5it [00:38,  7.71s/it][A

Successfully saved '../graphs/Q_function_4_action.pdf'



6it [00:46,  7.72s/it]

Successfully saved '../graphs/Q_function_4_Q.pdf'
Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
7it [00:54,  7.80s/it][A
8it [01:02,  7.91s/it][A
9it [01:09,  7.74s/it][A
10it [01:17,  7.81s/it][A

Successfully saved '../graphs/Q_function_4_action.pdf'



11it [01:25,  7.89s/it]

Successfully saved '../graphs/Q_function_4_Q.pdf'
Saved: '../pickles/Q_function_2017-03-09 13:13:24.525452.json'


[A
12it [01:34,  8.00s/it][A
100%|██████████| 4/4 [16:24<00:00, 243.05s/it]
