In [2]:
import numpy as np
import importlib
import syntheticChrissAlmgren as sca
import td3_agent
from td3_agent import TD3
import utils
import importlib
import price_models
import rewards as rw
importlib.reload(rw)
importlib.reload(price_models)
importlib.reload(utils)
importlib.reload(sca)
importlib.reload(td3_agent)

from collections import deque
importlib.reload(sca)

# Create simulation environment
rf = rw.CjOeCriterion(
    per_step_inventory_aversion=0.01,
    terminal_inventory_aversion=0.0,
    inventory_exponent=2.0,
    terminal_time=1.0
)

env = sca.MarketEnvironment(reward_function=rf)

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent = TD3(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(), random_seed=0)

# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr = 1e-6

# Set the number of episodes to run the simulation
episodes = 5000

shortfall_hist = np.array([])
shortfall_deque = deque(maxlen=100)

for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb = tr)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
        # Predict the best action for the current state. 
        action = agent.act(cur_state, add_noise = True)
        # Action is performed and new state, reward, info are received. 
        new_state, reward, done, info = env.step(action)
        # current state, action, reward, new state are stored in the experience replay
        agent.step(cur_state, action, reward, new_state, done)

        # roll over new state
        cur_state = new_state

        if info.done:
            shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall)
            shortfall_deque.append(info.implementation_shortfall)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque)))        

print('\nAverage Implementation Shortfall: ${:,.2f} \n'.format(np.mean(shortfall_hist)))

  noise = torch.FloatTensor([self.noise.sample() for _ in range(len(actions))]).to(device)


Episode [100/5000]	Average Shortfall: $298,939,184.95
Episode [200/5000]	Average Shortfall: $291,942,207.16
Episode [300/5000]	Average Shortfall: $217,240,898.41
Episode [400/5000]	Average Shortfall: $170,565,051.72
Episode [500/5000]	Average Shortfall: $167,340,987.96
Episode [600/5000]	Average Shortfall: $42,774,037.28
Episode [700/5000]	Average Shortfall: $635,360.88
Episode [800/5000]	Average Shortfall: $687,861.72
Episode [900/5000]	Average Shortfall: $667,931.85
Episode [1000/5000]	Average Shortfall: $579,304.33
Episode [1100/5000]	Average Shortfall: $572,600.37
Episode [1200/5000]	Average Shortfall: $617,773.53
Episode [1300/5000]	Average Shortfall: $604,536.35
Episode [1400/5000]	Average Shortfall: $685,579.13
Episode [1500/5000]	Average Shortfall: $669,446.19
Episode [1600/5000]	Average Shortfall: $599,104.82
Episode [1700/5000]	Average Shortfall: $700,718.55
Episode [1800/5000]	Average Shortfall: $598,708.84
Episode [1900/5000]	Average Shortfall: $663,808.95
Episode [2000/500

In [3]:
def evaluate_agent(env, agent, episodes=100):
    shortfalls = []
    for ep in range(episodes):
        state = env.reset(ep)
        env.start_transactions()
        done = False
        while not done:
            action, = agent.act(state, add_noise=False)
            state, _, done, info = env.step(action)
        shortfalls.append(info.implementation_shortfall)
    return np.array(shortfalls)

def evaluate_ac(env, episodes=100):
    frac_schedule = env.get_trade_list() / env.total_shares  
    shortfalls = []
    for ep in range(episodes):
        state = env.reset(ep)
        env.start_transactions()
        done = False
        t = 0
        while not done:
            action = frac_schedule[t]  
            state, _, done, info = env.step(action)
            t += 1
        shortfalls.append(info.implementation_shortfall)
    return np.array(shortfalls)

In [4]:
agent_sfs = evaluate_agent(env, agent, episodes=1000)
ac_sfs    = evaluate_ac(env, episodes=1000)

print("Agent: mean=${:.2f}, std=${:.2f}".format(agent_sfs.mean(), agent_sfs.std()))
print("AC   : mean=${:.2f}, std=${:.2f}".format(ac_sfs.mean(),    ac_sfs.std()))

Agent: mean=$666096.83, std=$410591.73
AC   : mean=$1732920.49, std=$2359937.15
