In [1]:
%load_ext autoreload
%autoreload 2

from joblib import Parallel, delayed
import multiprocessing

from tqdm import tqdm, tqdm_notebook
import pandas as pd
import numpy as np
import gzip
import json
import math
from datetime import datetime
import seaborn as sns
import matplotlib.dates as mdates
from mpl_toolkits.mplot3d import Axes3D
from IPython.display import display
%matplotlib inline
import pickle

import sys
sys.path.append('..')
from helper.rl_framework import *
from helper.orderbook_container import OrderbookContainer
from helper.manage_orderbooks import *
from helper.orderbook_trader import *
from helper.evaluation import evaluate, plot_evaluation_costs
from helper.general_helpers import add_features_to_orderbooks, load_and_preprocess_historyfiles

from agents.BatchTree_Agent import RLAgent_BatchTree
from agents.QTable_Agent import QTable_Agent
from Runs.train_agents import trainer_BatchTree, trainer_QTable
# from Runs.train_fromSamples import train_BatchTree_fromSamples, train_Qtable_fromSamples

### Preprocess Data

In [2]:
histfiles = [
    "../../../../data/history/history_2016-11_USDT_BTC.csv",
    "../../../../data/history/history_2016-12_USDT_BTC.csv",
    "../../../../data/history/history_2017-01_USDT_BTC.csv",
    "../../../../data/history/history_2017-02_USDT_BTC.csv",
]

hist = load_and_preprocess_historyfiles(histfiles)

hist['future15_disc'] = pd.cut(hist.future15, bins=[-np.inf, -0.005, -0.001, 0.001, 0.005, np.inf], labels=False)
hist['future30_disc'] = pd.cut(hist.future30, bins=[-np.inf, -0.005, -0.001, 0.001, 0.005, np.inf], labels=False)
hist['future45_disc'] = pd.cut(hist.future45, bins=[-np.inf, -0.005, -0.001, 0.001, 0.005, np.inf], labels=False)
hist['spread_disc'] = pd.cut(hist.spread, bins=[0, 1, 2, np.inf], labels=False)
# display(hist.iloc[1021:1025,:])

In [3]:
# cached_episodes = list(episodes_train[:20])
# pickle.dump( cached_episodes, open( "cached_episodes.p", "wb" ) )
data = pickle.load( open( "cached_windows/tradingwindows_1611_USTD_BTC_20.p", "rb" ) )
print(data[0][0].timestamp)
print(len(data))
num_cores = multiprocessing.cpu_count()
data = Parallel(n_jobs=num_cores, verbose=10)(delayed(add_features_to_orderbooks)(orderbooks=window, hist=hist) 
                                                 for window in data[:20])
print(data[0][0])
print(data[17][0])
print(data[18][0])

2016-11-08T10:00
20


[Parallel(n_jobs=24)]: Done   3 out of  20 | elapsed:    0.9s remaining:    4.9s
[Parallel(n_jobs=24)]: Done   6 out of  20 | elapsed:    1.5s remaining:    3.6s
[Parallel(n_jobs=24)]: Done   9 out of  20 | elapsed:    2.1s remaining:    2.5s
[Parallel(n_jobs=24)]: Done  12 out of  20 | elapsed:    2.5s remaining:    1.7s
[Parallel(n_jobs=24)]: Done  15 out of  20 | elapsed:    3.1s remaining:    1.0s
[Parallel(n_jobs=24)]: Done  18 out of  20 | elapsed:    3.6s remaining:    0.4s


OrderbookContainer from 2016-11-08T10:00 (factor: 1)
  107 bids (best: 705.0)
  110 asks (best: 705.450997)
price: 705.2254744655137
  kind: 'orderbook'
  -spread: 0.4509970000000294
  -future15: -3.246150348523713e-08
  -future15_disc: 2.0
  -future30: -0.0006393032568402512
  -future30_disc: 2.0
  -future45: -0.0025756666407984463
  -future45_disc: 1.0
  -spread_disc: 0
  -direction: 0.0
  -direction_float: 1.000147010565701
OrderbookContainer from 2016-11-09T03:01 (factor: 1)
  108 bids (best: 709.7402417)
  153 asks (best: 711.4194982)
price: 710.5795392453285
  kind: 'orderbook'
  -spread: 1.679256500000065
  -future15: 0.0023660177939859306
  -future15_disc: 3.0
  -future30: 0.004592889793137767
  -future30_disc: 3.0
  -future45: 0.013398928448407288
  -future45_disc: 4.0
  -spread_disc: 1
  -direction: 2.0
  -direction_float: 1.010794206633247
OrderbookContainer from 2016-11-09T04:01 (factor: 1)
  90 bids (best: 717.1623583)
  111 asks (best: 718.7800273)
price: 717.970889067637

[Parallel(n_jobs=24)]: Done  20 out of  20 | elapsed:    4.0s finished


In [4]:
data_dec = pickle.load( open( '/home/axel/notebooks/orderbook_agent/orderbook_agent/cached_windows/obs_2016-12_USDT_BTC_maxVol100.p', "rb" ) )
print(len(data_dec))
num_cores = multiprocessing.cpu_count()
data_dec = Parallel(n_jobs=num_cores, verbose=10)(delayed(add_features_to_orderbooks)(orderbooks=window, hist=hist) 
                                                 for window in data_dec[:200])
print(len(data_dec))

743


[Parallel(n_jobs=24)]: Done   2 tasks      | elapsed:    0.7s
[Parallel(n_jobs=24)]: Done  13 tasks      | elapsed:    2.8s
[Parallel(n_jobs=24)]: Done  24 tasks      | elapsed:    5.1s
[Parallel(n_jobs=24)]: Done  37 tasks      | elapsed:    7.5s
[Parallel(n_jobs=24)]: Done  50 tasks      | elapsed:    9.9s
[Parallel(n_jobs=24)]: Done  65 tasks      | elapsed:   12.6s
[Parallel(n_jobs=24)]: Done  80 tasks      | elapsed:   15.4s
[Parallel(n_jobs=24)]: Done  97 tasks      | elapsed:   18.7s
[Parallel(n_jobs=24)]: Done 114 tasks      | elapsed:   23.4s
[Parallel(n_jobs=24)]: Done 133 tasks      | elapsed:   26.9s
[Parallel(n_jobs=24)]: Done 152 tasks      | elapsed:   30.5s
[Parallel(n_jobs=24)]: Done 174 out of 200 | elapsed:   34.4s remaining:    5.1s
[Parallel(n_jobs=24)]: Done 195 out of 200 | elapsed:   38.5s remaining:    1.0s
[Parallel(n_jobs=24)]: Done 200 out of 200 | elapsed:   39.5s finished


200


In [None]:
data_feb = pickle.load( open( "cached_windows/tradingwindows_1702_USTD_BTC.p", "rb" ) )
print(data_feb[0][0].timestamp)
print(len(data_feb))
num_cores = multiprocessing.cpu_count()
data_feb = Parallel(n_jobs=num_cores, verbose=10)(delayed(add_features_to_orderbooks)(orderbooks=window) 
                                                 for window in data_feb[:100])
print(len(data_feb))

### Train agents

In [5]:
T=4
P=15
V=70000
consume='cash'
print("T: {}, P: {}, V: {}, consume: '{}'".format(T, P, V, consume))

actions = np.linspace(-0.4, 1.0, num=15)
actions = range(-4,11)

T: 4, P: 15, V: 70000, consume: 'cash'


In [None]:
agent = trainer_QTable(orderbooks=data_dec[:96], V=V, T=T, consume=consume, actions=[round(a, 2) for a in actions],
                    limit_base='incStepUnits', vol_intervals=4,
                    period_length=P, agent_name='VolTimeDir_Dec0:96_I4',
                    state_variables=['volume', 'time', 'direction'], mode='backward')
agent.save(path="trainedAgents", overwrite=True)


In [None]:
agent.samples

In [None]:
agent.heatmap_Q()

In [None]:
display(agent.samples)

In [None]:
agent.learn_fromSamples()

In [None]:
### 

In [6]:
T=4
P=15
V=70000
consume='cash'
print("T: {}, P: {}, V: {}, consume: '{}'".format(T, P, V, consume))
print("")

random.seed(13)
np.random.seed(13)
# plot_episode(episodes_train[1], volume=100)
data[0][0].timestamp
# actions = range(-4,11)
actions = np.linspace(-0.4, 1.0, num=15)
actions = range(-4,11)

experiments = {
    # 'VolTime_noGuide': ['volume', 'time'],
    'VolTime': ['volume', 'time'],
    # 'VolTime_45actions': ['volume', 'time'],
    # 'VolTime_400est20depth_incStepUnits_noGuide': ['volume', 'time'],
    # 'VolTimeSpread': ['volume', 'time', 'spread'],
    #'fut15_market_100est': ['volume', 'time', 'future15_market', 'shares', 'spread', 'future15'],
    #'fut15_market_400est20depth': ['volume', 'time', 'future15_market', 'shares', 'spread', 'future15'],
    # 'fut15_ob': ['volume', 'time', 'future15_ob'],
    #'fut15': ['volume', 'time', 'future15'],
    # 'manySamplesVolTime': ['volume', 'time']
}

try:
    agent_collection
    print("agent_collection contains: {}".format(list(agent_collection.keys())))
except NameError:
    agent_collection = {}
    
    
for delete in ['VolTimeShares_400est20depth_currAsk']:
    if delete in agent_collection.keys():
        del(agent_collection[delete])

for agent_name in tqdm(experiments.keys()):
    if agent_name in agent_collection.keys():
        # skip training agent again
        continue
        
    random.seed(13)
    np.random.seed(13)

    state_variables = experiments[agent_name]
    print(agent_name, state_variables)
    # limit_base = 'currAsk',  'agression',   'init_center',   'incStepUnits'
    agent = trainer_BatchTree(orderbooks=data_dec[:1], V=V, T=T, consume=consume, actions=actions,
                    lim_stepsize=0.1, limit_base='incStepUnits',
                    period_length=P, epochs=40, agent_name=agent_name, guiding_agent=None,
                    random_start=False, state_variables=state_variables, mode='forward')
    
    agent_collection[agent_name] = agent
    print("")

  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

T: 4, P: 15, V: 70000, consume: 'cash'

VolTime ['volume', 'time']
RL-Type: <class 'agents.BatchTree_Agent.RLAgent_BatchTree'>
Number of orderbook windows: 1
Splitpoint: 0/1
Start parallel collection of samples in 'forward' mode (num_cores=24)


[A[Parallel(n_jobs=24)]: Done   1 tasks      | elapsed:   37.7s
[Parallel(n_jobs=24)]: Done   1 out of   1 | elapsed:   37.7s finished


  0%|          | 0/8 [00:00<?, ?it/s]

brain.fitted_Q_iteration_tree() - 141 samples


[A[A

 25%|██▌       | 2/8 [00:00<00:00,  9.40it/s][A[A

 38%|███▊      | 3/8 [00:00<00:00,  8.48it/s][A[A

 50%|█████     | 4/8 [00:00<00:00,  8.22it/s][A[A

 62%|██████▎   | 5/8 [00:00<00:00,  8.02it/s][A[A

 75%|███████▌  | 6/8 [00:00<00:00,  7.89it/s][A[A

 88%|████████▊ | 7/8 [00:00<00:00,  7.78it/s][A[A

100%|██████████| 8/8 [00:01<00:00,  7.63it/s][A[A

[A[A
100%|██████████| 1/1 [00:39<00:00, 40.00s/it][A
100%|██████████| 1/1 [00:40<00:00, 40.01s/it]

brain.samples.shape (141, 10)






In [None]:
epochs=5
for e in range(epochs):
    exploration =1.0/20**(e/epochs)
    print("{}: exploration = {}".format(e, exploration))

In [None]:
agent_QTable_dec.heatmap_Q()

# agent_QTable_dec.heatmap_Q()

In [None]:
# agent_collection['agent_QTable_nov'] = agent_QTable_nov
agent_collection['agent_QTable_dec'] = agent_QTable_dec
agent_collection['agent_QTable_dec_currAsk'] = agent_QTable_dec_currAsk
agent_collection['agent_QTable_dec_incStepUnits'] = agent_QTable_dec_incStepUnits
agent_collection['agent_QTable_dec_incStepUnits_direction'] = agent_QTable_dec_incStepUnits_direction
agent_collection['agent_QTable_dec_incStepUnits_direction_sub'] = agent_QTable_dec_incStepUnits_direction_sub
agent_collection['agent_QTable_dec_incStepUnits_direction_sub3'] = agent_QTable_dec_incStepUnits_direction_sub3
for agentname in agent_collection.keys():
    print("{}: {}".format(agentname, agent_collection[agentname].samples.shape))

In [None]:
def addMarketFeatures_toSamples(samples_df, hist, state_variables,
                                market_features, period_length=15):
    df = samples_df.copy()
    
    for i, f in tqdm(enumerate(market_features)):
        #
        f_n = "{}_n".format(f)
        if f in df.columns:
            df.drop(f, inplace=True, axis=1)
        if f_n in df.columns:
            df.drop(f_n, inplace=True, axis=1)
        
        df.insert(loc=2+i, column=f, value=df.timestamp.map(hist[f]),
                 allow_duplicates=True)
        df.insert(loc=df.shape[1],
                  column=f_n,
                  allow_duplicates=True,
                  value=(df.timestamp+pd.Timedelta(minutes=period_length)).map(hist[f]))
        if f not in state_variables:
            state_variables.append(f)

        if f in ['high24hr', 'low24hr', 'spread']:
            df[f] = df[f] / df.initial_center
            df[f_n] = df[f_n] / df.initial_center

    return df

df2 = addMarketFeatures_toSamples(samples_df=agent.samples, hist=hist, state_variables=['time', 'volume'],
                                 market_features=['future15', 'future15_disc', 'future30_disc', 'future45_disc', 'spread', 'spread_disc'])
print(df2.shape)
display(df2.describe())

In [None]:
for ag in agent_collection.keys():
    print(ag, agent_collection[ag].state_variables)

In [None]:
agent_collection['VolTime_400est20depth_incStepUnits_noGuide'].heatmap_Q()
agent_collection['VolTime_400est20depth_incStepUnits'].heatmap_Q()

In [None]:
agent_collection['agent_QTable_dec'].heatmap_Q(show_minima_count=True)
agent_collection['agent_QTable_dec_currAsk'].heatmap_Q(show_minima_count=True)
agent_collection['agent_QTable_dec_incStepUnits'].heatmap_Q(show_minima_count=True)

In [None]:
from helper.general_helpers import gauss_window
gauss = gauss_window(agent_QTable_dec.actions, a_idx=4, std=2)
plt.plot(gauss)
plt.show()

In [None]:
agent_QTable_dec_incStepUnits_direction.q.keys()

In [None]:
costs = None
costs, slippage = evaluate(
    testdata=data_dec[:96],
    agents=agent_collection,
    baseline='agent_QTable_dec',
    evaluate_actions=[5],
    costs=costs,
    verbose=False
)

display(costs.describe())
plot_evaluation_costs(costs, hline='agent_QTable_dec')
plot_evaluation_costs(slippage, hline='agent_QTable_dec')

In [None]:
costs = None
baseline = 'agent_QTable_dec'
costs, slippage = evaluate(
    testdata=data_dec[156:164],
    agents=agent_collection,
    baseline=baseline,
    evaluate_actions=[5],
    costs=costs,
    verbose=False
)

display(costs.describe())
plot_evaluation_costs(costs, hline=baseline)
plot_evaluation_costs(slippage, hline=baseline)

plt.figure(figsize=(12,8))
tmp = costs.subtract(costs[baseline].values, axis=0)
tmp.plot()
tmp.mean()

In [None]:
costs.plot(figsize=(16,12))
plt.fill_between(range(len(costs)), 0, costs['agent_QTable_dec'].values, alpha=0.5)

In [None]:
for e, elem in enumerate(data_dec[156:164]):
    print(e, elem[0].timestamp, elem[0].features['direction'])

In [None]:
agent_collection['VolTime'].heatmap_Q(vol_intervals=10)
agent_collection['agent_QTable_dec'].heatmap_Q(vol_intervals=10)
agent_collection['agent_QTable_dec'].interpolate_vol = True
agent_collection['agent_QTable_dec'].heatmap_Q(vol_intervals=10)

In [None]:
costs_oos = None
costs_oos = evaluate(
    testdata=data_dec[20:40],
    agents=agent_collection,
    baseline='agent_QTable_dec',
    evaluate_actions=[5],
    costs=costs_oos,
    verbose=False
)

display(costs_oos.describe())
plot_evaluation_costs(costs_oos, hline='agent_QTable_dec')

In [None]:
plt.figure(figsize=(14, 8))
orient = 'v'
bp = sns.boxplot(orient=orient, data=costs, showfliers=False)
ax = sns.swarmplot(data=costs, orient=orient, color=".25", alpha=0.4)
ax.axhline(0)