This is a tutorial on how to use the Agent class in the Agent.py file. 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns; sns.set()
import DeepRL_Agent

# First we make a simple data set.

In [2]:
agg = pd.read_csv("/Users/Joseph/Desktop/Data/Bear_SynDta.csv") #This is synthetic data generated using our order book.
agg[agg.columns[0]] = pd.to_datetime(agg[agg.columns[0]])
agg = agg.set_index(agg.columns[0])

Next we define some other features. VIB is the volume imbalance, QV is the expanding standard deviation of the price. QVAR is the quadratic variation of the price process. Order imblance is the average of the signed order flow. Pchange is the change in price since the beginning of the day. We aggregate the values over each day. Since we are selling, the price column is automatically the bid price. 

In [3]:
%%time
agg["VIB"] = agg["Ask Price"]*agg["Ask Size"] - agg["Bid Price"]*agg["Bid Size"]
days = agg.index.strftime("%Y-%m-%d").unique()
vals = np.array((agg.loc[days[0]]["Price"][0]-agg.loc[days[0]]["Price"]).values)
QV = agg.loc[days[0]]["Price"].rolling(window = 5*60, min_periods = 0).std()
QVAR = np.append([0],np.diff(agg.loc[days[0]]["Price"].values)**2)
QVAR = np.cumsum(QVAR)
order_imbalance = agg.loc[days[0]]["Sign"].rolling(window = 5*60, min_periods = 0).mean()
for day in days[1:]:
    append_ = (agg.loc[day]["Price"][0]-agg.loc[day]["Price"]).values
    vals = np.append(vals,append_)
    QV = np.append(QV, agg.loc[day]["Price"].rolling(window = 5*60,min_periods= 0).std())
    QVAR_ = np.append([0],np.diff(agg.loc[day]["Price"].values)**2)
    QVAR_ = np.cumsum(QVAR_)
    QVAR = np.append(QVAR, QVAR_)
    order_imbalance = np.append(order_imbalance, agg.loc[day]["Sign"].rolling(window = 5*60, min_periods = 0).mean())
agg["Pchange"] = vals
agg["QV"] = QV
agg["QVAR"] = QVAR
agg["OI"] = order_imbalance
agg = agg.fillna(0)
keys_lob = {agg.columns[i]: i for i in range(agg.shape[1])}

CPU times: user 10.4 s, sys: 698 ms, total: 11.1 s
Wall time: 14.3 s


In [4]:
def subset_day(df):
    #Subsets the data as done in the paper.
    days = df.index.strftime("%Y-%m-%d").unique()

    episodes = []
    episodes.append(df[days[0]].values)
    for day in days[1:]:
        #print(day)
        episodes.append(df[day].values)

    episodes = np.array(episodes)

    return episodes
#Subset every half and hour between 10:00 am and 3:30 pm. 
#Could reaggregate all the data over the half an hour intervals. 

'''
Remove trades occuring before 10:00 am, aggregate by each day, and the split each episode into half an hour increments. 
'''
agg = agg[agg.index.strftime("%H") != "09"]
daily_episodes = subset_day(agg)
half_hour_eps = []
for ep in daily_episodes:
    for j in range(11):
        half_hour_eps.append(ep[60*30*j:60*30*(j+1)])

In [37]:
#Define our stuff.
train_eps = half_hour_eps[:75] #Only training on the first 20 episodes for speed, can increase. 
test_eps = half_hour_eps[100:125] #More like val eps. 
keys = keys_lob
alpha = 0.01
epsilon = 0.99
tau = 0.75
num_samps = 250
gamma = 0.99
init_inv = 10
rho = 15
length = 5
features = [ "OI", "QV"] #Can add more features too.

In [38]:
RL_agent = DeepRL_Agent.Agent()

In [39]:
#Initialize our specific model. 
num_inputs = 3 + len(features)
RL_agent.default_model(num_inputs)

In [40]:
RL_agent.train(alpha, epsilon, tau, num_samps, gamma, init_inv, rho, length, features, 
    train_eps, test_eps, keys)

2 [ 3.          5.          2.         -0.06        0.34688641]
1 3 [2.       3.       1.       0.02     0.139763]
2 4 [ 1.          2.          2.         -0.05333333  0.28172258]
4 0 [ 5.         10.          4.          0.01666667  0.17675163]
4 1 [ 4.          6.          4.         -0.04333333  0.26492918]
2 2 [3.         2.         2.         0.06333333 0.24822541]
0 3 [2.         0.         0.         0.02       0.13221669]
0 4 [1.         0.         0.         0.         0.14943198]
4 0 [ 5.         10.          4.          0.04        0.21800682]
4 1 [ 4.          6.          4.         -0.08        0.31306483]
0 2 [3.         2.         0.         0.02       0.20891887]
1 3 [2.         2.         1.         0.01666667 0.16114344]
1 4 [ 1.          1.          1.         -0.04333333  0.25731051]
2 0 [ 5.         10.          2.          0.01666667  0.12967806]
3 1 [ 4.          8.          3.         -0.01333333  0.14784413]
2 2 [ 3.00000000e+00  5.00000000e+00  2.00000000e+00

KeyboardInterrupt: 

In [None]:
test_eps = half_hour_eps[125:]

In [None]:
RL_agent.Performance_Evaluation(init_inv, test_eps, alpha, length, features, keys, RL_agent.Main_Model)

Future directions to update:

1. Getting more computing power to do more definitive tests. 
2. One model per ticker or one model for all tickers?
3. Many order book models rely on Bayesian dynamics, potentially try and leverage some algorithms in Bayesian RL.
4. Combine the agent with a prediction model of the short term price dynamics and train both the agent and the predictive model in an online fashion. 