# Deep Reinforcement Learning for Optimal Execution of Portfolio Transactions     

In [7]:
import utils
# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [5]:
#!pip install statsmodels
!pip uninstall statsmodels
!pip install numpy scipy patsy pandas
!pip install statsmodels

Found existing installation: statsmodels 0.13.5
Uninstalling statsmodels-0.13.5:
  Would remove:
    /home/capeie/Yeetaf/autotrading_preprocessing/moonshot/lib/python3.7/site-packages/statsmodels-0.13.5.dist-info/*
    /home/capeie/Yeetaf/autotrading_preprocessing/moonshot/lib/python3.7/site-packages/statsmodels/*
Proceed (Y/n)? ^C
[31mERROR: Operation cancelled by user[0m[31m


In [8]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [9]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,500000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,500000,Trader's Risk Aversion for Agent 1:,1e-06
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,0.0001
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [17]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent
from gym import spaces
from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)

        # def market_function(state,action,reward):
        #     market_actions = spaces.Discrete(2)
        #     print("Market Actions:",market_actions)
        #     bt = state
        #     sell_matrix,buy_matrix = np.zeros((2,2))
        #     print('Reward1: ',reward,'\nbt matrix',bt,'\nSelling Matrix:',sell_matrix,'Buy Matrix: ',buy_matrix)
        #     trade_matrix = sell_matrix * buy_matrix
        
        # market_function(cur_state1,action1,reward1)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

Market Actions: Discrete(2)
Reward1:  [-0.00475753] 
bt matrix [0. 0. 0. 0. 0. 0. 1. 1.] 
Selling Matrix: [0. 0.] Buy Matrix:  [0. 0.]
Market Actions: Discrete(2)
Reward1:  [0.80862112] 
bt matrix [0.         0.         0.         0.         0.         0.
 0.98333333 0.366866  ] 
Selling Matrix: [0. 0.] Buy Matrix:  [0. 0.]
Market Actions: Discrete(2)
Reward1:  [0.80154953] 
bt matrix [ 0.          0.          0.          0.          0.         -0.00295437
  0.96666667  0.123802  ] 
Selling Matrix: [0. 0.] Buy Matrix:  [0. 0.]
Market Actions: Discrete(2)
Reward1:  [0.81328884] 
bt matrix [ 0.          0.          0.          0.         -0.00295437 -0.00193157
  0.95        0.03419   ] 
Selling Matrix: [0. 0.] Buy Matrix:  [0. 0.]
Market Actions: Discrete(2)
Reward1:  [0.81968311] 
bt matrix [ 0.          0.          0.         -0.00295437 -0.00193157 -0.01219288
  0.93333333  0.007236  ] 
Selling Matrix: [0. 0.] Buy Matrix:  [0. 0.]
Market Actions: Discrete(2)
Reward1:  [0.86538624] 
b

KeyboardInterrupt: 

In [5]:
shortfall = np.array(shortfall_list)

In [6]:
np.save('1e-6_1e-6_cooporation_shorfall_list.npy',shortfall)

In [3]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))


1e-06 1e-06
[1. 1.]
[0.761694 0.656324]
[0.603648 0.454928]
[0.44365  0.334226]
[0.305346 0.25539 ]
[0.20247  0.202642]
[0.13316  0.148788]
[0.09197 0.10399]
[0.064072 0.074902]
[0.044238 0.052522]
[0.03257  0.036602]
[0.02397  0.024466]
[0.018556 0.01732 ]
[0.013314 0.011942]
[0.009696 0.008204]
[0.006774 0.005622]
[0.004728 0.003898]
[0.003236 0.002704]
[0.00228  0.001762]
[0.00165 0.00114]
[0.001234 0.000778]
[0.000932 0.000566]
[0.000674 0.000392]
[0.000506 0.00029 ]
[0.000376 0.000212]
[0.000294 0.00015 ]
[0.000224 0.000108]
[1.66e-04 7.40e-05]
[1.14e-04 5.40e-05]
[8.2e-05 4.2e-05]
[5.8e-05 3.4e-05]
[4.0e-05 2.8e-05]
[2.6e-05 2.2e-05]
[1.8e-05 1.8e-05]
[1.2e-05 1.4e-05]
[8.e-06 1.e-05]
[6.e-06 8.e-06]
[4.e-06 6.e-06]
[2.e-06 4.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-

In [12]:
np.save('1e-6_1e-6_competition_trajectory_1500.npy',trajectory)

NameError: name 'trajectory' is not defined

In [13]:
%matplotlib inline

import matplotlib.pyplot as plt

import utils

# We set the default figure size
plt.rcParams['figure.figsize'] = [17.0, 7.0]


# Set the number of days to sell all shares (i.e. the liquidation time)
l_time = 60

# Set the number of trades
n_trades = 60

# Set the trader's risk aversion
t_risk = 1e-6

# Plot the trading list and trading trajectory. If show_trl = True, the data frame containing the values of the
# trading list and trading trajectory is printed
utils.plot_trade_list(lq_time = l_time, nm_trades = n_trades, tr_risk = t_risk, show_trl = True)

TypeError: reset() got an unexpected keyword argument 'lamb'