In [33]:
%load_ext autoreload
%autoreload 2

import joblib
from joblib import Parallel, delayed
import multiprocessing

from tqdm import tqdm, tqdm_notebook

import pandas as pd
import numpy as np
import gzip
import json
import math
from datetime import datetime
import seaborn as sns
import matplotlib.dates as mdates
from mpl_toolkits.mplot3d import Axes3D
from IPython.display import display
%matplotlib inline
import pickle, dill

import sys
sys.path.append('..')
from helper.rl_framework import *
from helper.orderbook_container import OrderbookContainer
from helper.manage_orderbooks import *
from helper.orderbook_trader import *
from helper.evaluation import evaluate, plot_evaluation_costs
from helper.general_helpers import add_features_to_orderbooks, load_and_preprocess_historyfiles

from agents.RL_Agent_Base import RLAgent_Base
from agents.NN_Agent import RLAgent_NN
from agents.QTable_Agent import QTable_Agent
from Runs.train_agents import trainer_BatchTree, trainer_NNAgent, trainer_QTable
# from Runs.train_fromSamples import train_BatchTree_fromSamples, train_Qtable_fromSamples

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocess Data

In [None]:
hist = pd.read_csv('ob_features70000_1611_1705.csv', index_col=0, parse_dates=[0])
features = hist.columns.drop('center_orig')
features

In [34]:
data_mini = pickle.load( open( "cached_windows/tradingwindows_1611_USTD_BTC_20.p", "rb" ) )
for window in data_mini:
    for ob in window:
        ob.features = {}
# num_cores = multiprocessing.cpu_count()
# data_mini = joblib.Parallel(n_jobs=num_cores, verbose=10)(joblib.delayed(add_features_to_orderbooks)(
#     orderbooks=window, hist=hist,
#     features=features
# ) for window in data_mini[:])
# print(len(data_mini), len(data_mini[0]))
# data = data_mini

In [None]:
# data_nov = pickle.load( open( "../cached_windows_60mins/obs_2016-11_USDT_BTC_maxVol100.p", "rb" ) )
# num_cores = multiprocessing.cpu_count()
# data_nov = joblib.Parallel(n_jobs=num_cores, verbose=10)(joblib.delayed(add_features_to_orderbooks)(
#     orderbooks=window, hist=hist,
#     features=features
# ) for window in data_nov[:])
# print(len(data_nov), len(data_nov[0]))
# data = data_nov

### Train agents

In [31]:
data_apr = pickle.load( open( '../cached_windows_60mins_V200/obs_2017-04_USDT_BTC_maxVol200.p', "rb" ) )

In [35]:
data_may = pickle.load( open( '../cached_windows_60mins_V200/obs_2017-05_USDT_BTC_maxVol200.p', "rb" ) )

In [32]:
T=4
P=15
V=70000
consume='cash'
state_variables = ['volume', 'time', 'level2data'] # + ['_a_{}'.format(a) for a in actions]
print("T: {}, P: {}, V: {}, consume: '{}'".format(T, P, V, consume))
print(state_variables)
actions = np.linspace(-0.4, 1.0, num=15)
actions = range(-4,11)
# actions = [0, 3, 5, 7]
print(actions[-1])

data = data_apr
print("Trading windows: {} (each one: {} minutes)".format(len(data), len(data[0])))

T: 4, P: 15, V: 70000, consume: 'cash'
['volume', 'time', 'level2data']
10
Trading windows: 719 (each one: 60 minutes)


[Parallel(n_jobs=24)]: Done   3 out of  20 | elapsed:  3.1min remaining: 17.8min


In [None]:
agent = trainer_BatchTree(orderbooks=data[-96:], V=V, T=T, consume=consume, actions=actions,
                    lim_stepsize=0.1, limit_base='currAsk',
                    period_length=P, epochs=60, agent_name='BT_Agent',
                    random_start=False, state_variables=state_variables, mode='forward', retraining=48)


In [None]:
agent.save(path='BT_Test')

In [None]:
agent.heatmap_Q(vol_intervals=5)

In [None]:
agent.learn_fromSamples(nb_it=T+1, verbose=True, n_estimators=400, max_depth=15)

In [None]:
data_may = pickle.load( open( '../cached_windows_60mins_V200/obs_2017-05_USDT_BTC_maxVol200.p', "rb" ) )

In [None]:
agent_collection = {'BT': agent,
                    'VolTime': RLAgent_Base.load(agent_name='QTable_1611-1704_T4_I8_VolTime', path="trainedAgents/longterm_1611_1704_currAsk", ignore_samples=True)}



In [None]:
data = data_may[-24:]
# baseline = 'test_Dec24_VolTime'
baseline = list(agent_collection.keys())[0]
costs, slippage = evaluate(
    testdata=data,
    agents=agent_collection,
    baseline=baseline,
    evaluate_actions=[2, 4, 5, 6],
    verbose=False
)
# slippage.to_csv('slippage_may_manyVars3Bins_fixedMarketVar.csv')   # slippage_apr_manyVars3Bins_simulatedTrades, fixedMarketVar
# display(costs.describe())
# plot_evaluation_costs(costs, hline=baseline)
display(slippage.describe())
#plot_evaluation_costs(slippage, hline="2", showfliers=False)

In [None]:
slippage.plot()

In [None]:
plot_evaluation_costs(slippage.iloc[:,:], hline="VolTime", showfliers=False, ylim=(0,350))

In [None]:
data_may = pickle.load(open("data_may_fixedMarketVar.p", "rb"))

In [None]:
data = data_may[:]
# baseline = 'test_Dec24_VolTime'
baseline = list(agent_collection.keys())[0]
costs, slippage = evaluate(
    testdata=data[:24],
    agents=agent_collection,
    baseline=baseline,
    evaluate_actions=[2, 4],
    verbose=False
)
display(slippage.describe())


##### QTable Agent

In [None]:
### QTable Agent
data = data_dec
T=4
P=15
agent = trainer_QTable(orderbooks=data[:24], V=V, T=T, consume=consume, actions=[round(a, 2) for a in actions],
                    limit_base='incStepUnits', vol_intervals=4,
                    period_length=P, agent_name='test2_Dec24_VolTime',
                    state_variables=['volume', 'time'], mode='backward')
agent.save(path="trainedAgents", overwrite=True)

In [None]:
agent.heatmap_Q()

In [None]:
### QTable Agent
# agent_VolTime = trainer_QTable(orderbooks=data[:1], V=V, T=T, consume=consume, actions=[round(a, 2) for a in actions],
#                     limit_base='incStepUnits', vol_intervals=4,
#                     period_length=P, agent_name='VolTime_Dec0:96_I4',
#                     state_variables=['volume', 'time'], mode='backward')
agent_VolTime.samples = agent.samples
agent_VolTime.learn_fromSamples(reset_brain=True)
agent_VolTime.save(path="trainedAgents", overwrite=True)

In [None]:
### BatchTree Agent
random.seed(13)
np.random.seed(13)
agent = trainer_BatchTree(orderbooks=data[:1], V=V, T=T, consume=consume, actions=actions,
                    lim_stepsize=0.1, limit_base='incStepUnits',
                    period_length=P, epochs=40, agent_name='BT_60min_VolTimeDir_Dec0:96_nogauss', guiding_agent=None,
                    random_start=False, state_variables=['volume', 'time', 'direction'], mode='forward')
agent.save(path="trainedAgents", overwrite=True)

In [None]:
random.seed(13)
np.random.seed(13)
data[0][0].timestamp

actions = np.linspace(-0.4, 1.0, num=15)
actions = range(-4,11)

experiments = {
    # 'VolTime_noGuide': ['volume', 'time'],
    'VolTime': ['volume', 'time', 'direction'],
    # 'VolTime_45actions': ['volume', 'time'],
    # 'VolTime_400est20depth_incStepUnits_noGuide': ['volume', 'time'],
    # 'VolTimeSpread': ['volume', 'time', 'spread'],
    #'fut15_market_100est': ['volume', 'time', 'future15_market', 'shares', 'spread', 'future15'],
    #'fut15_market_400est20depth': ['volume', 'time', 'future15_market', 'shares', 'spread', 'future15'],
    # 'fut15_ob': ['volume', 'time', 'future15_ob'],
    #'fut15': ['volume', 'time', 'future15'],
    # 'manySamplesVolTime': ['volume', 'time']
}

try:
    agent_collection
    print("agent_collection contains: {}".format(list(agent_collection.keys())))
except NameError:
    agent_collection = {}
    
    
for delete in ['VolTimeShares_400est20depth_currAsk']:
    if delete in agent_collection.keys():
        del(agent_collection[delete])

for agent_name in tqdm(experiments.keys()):
    if agent_name in agent_collection.keys():
        # skip training agent again
        continue
        
    random.seed(13)
    np.random.seed(13)

    state_variables = experiments[agent_name]
    print(agent_name, state_variables)
    # limit_base = 'currAsk',  'agression',   'init_center',   'incStepUnits'
    agent = trainer_BatchTree(orderbooks=data[:1], V=V, T=T, consume=consume, actions=actions,
                    lim_stepsize=0.1, limit_base='incStepUnits',
                    period_length=P, epochs=4, agent_name=agent_name, guiding_agent=None,
                    random_start=False, state_variables=state_variables, mode='forward')
    
    agent_collection[agent_name] = agent
    print("")

In [None]:
agent.model

In [None]:
agent.agent_name = 'BT_VolTime'

In [None]:
agent.save(path="trainedAgents", overwrite=True)

In [None]:
agent.load(agent_name="VolTime", path="test")

In [None]:
ag = RLAgent_Base.load(agent_name='VolTime', path='test')
ag

In [None]:
agent.actions

In [None]:
epochs=5
for e in range(epochs):
    exploration =1.0/20**(e/epochs)
    print("{}: exploration = {}".format(e, exploration))

In [None]:
agent_QTable_dec.heatmap_Q()