In This notebook, I tried different action functions on ABM approach to see and compare the results     

In [None]:
import utils
financial_params, ac_params = utils.get_env_param()

In [None]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [None]:
ac_params

0,1,2,3
Total Number of Shares to Sell:,1000000,Fixed Cost of Selling per Share:,$0.062
Starting Price per Share:,$50.00,Trader's Risk Aversion:,1e-06
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [None]:
import syntheticChrissAlmgren as sca

class MarketEnvironmentWithTradingRate(sca.MarketEnvironment):
    def step(self, action):

        class Info(object):
            pass
        info = Info()

        info.done = False

        if self.transacting and (self.timeHorizon == 0 or abs(self.shares_remaining) < self.tolerance):
            self.transacting = False
            info.done = True
            info.implementation_shortfall = self.total_shares * self.startingPrice - self.totalCapture
            info.expected_shortfall = self.get_expected_shortfall(self.total_shares)
            info.expected_variance = self.singleStepVariance * self.tau * self.totalSRSQ
            info.utility = info.expected_shortfall + self.llambda * info.expected_variance

        if self.k == 0:
            info.price = self.prevImpactedPrice
        else:
            info.price = self.prevImpactedPrice + np.sqrt(self.singleStepVariance * self.tau) * random.normalvariate(0, 1)

        if self.transacting:

            if isinstance(action, np.ndarray):
                action = action.item()

            # Interpret action as trading rate (fraction of total shares per remaining step)
            trading_rate = action
            sharesToSellNow = self.shares_remaining * trading_rate / max(self.timeHorizon, 1)

            if self.timeHorizon < 2:
                sharesToSellNow = self.shares_remaining

            info.share_to_sell_now = np.around(sharesToSellNow)

            info.currentPermanentImpact = self.permanentImpact(info.share_to_sell_now)
            info.currentTemporaryImpact = self.temporaryImpact(info.share_to_sell_now)

            info.exec_price = info.price - info.currentTemporaryImpact

            self.totalCapture += info.share_to_sell_now * info.exec_price

            self.logReturns.append(np.log(info.price/self.prevPrice))
            self.logReturns.popleft()

            self.shares_remaining -= info.share_to_sell_now

            self.totalSSSQ += info.share_to_sell_now ** 2
            self.totalSRSQ += self.shares_remaining ** 2

            self.timeHorizon -= 1
            self.prevPrice = info.price
            self.prevImpactedPrice = info.price - info.currentPermanentImpact

            currentUtility = self.compute_AC_utility(self.shares_remaining)
            reward = (abs(self.prevUtility) - abs(currentUtility)) / abs(self.prevUtility)
            self.prevUtility = currentUtility

            if self.shares_remaining <= 0:

                info.implementation_shortfall  = self.total_shares * self.startingPrice - self.totalCapture

                info.done = True
        else:
            reward = 0.0

        self.k += 1

        state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, self.shares_remaining / self.total_shares])

        return (state, np.array([reward]), info.done, info)


In [None]:
import numpy as np
import random
import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

env = MarketEnvironmentWithTradingRate()

agent = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(), random_seed=0)

lqt = 60

n_trades = 60

tr = 1e-6

episodes = 5000

shortfall_hist = np.array([])
shortfall_deque = deque(maxlen=100)

for episode in range(episodes):
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb = tr)

    env.start_transactions()

    for i in range(n_trades + 1):

        action = agent.act(cur_state, add_noise = True)

        new_state, reward, done, info = env.step(action)

        agent.step(cur_state, action, reward, new_state, done)

        cur_state = new_state

        if info.done:
            shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall)
            shortfall_deque.append(info.implementation_shortfall)
            break

    if (episode + 1) % 100 == 0:
        print('\rEpisode [{}/{}]\tAverage Shortfall: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque)))

print('\nAverage Implementation Shortfall: ${:,.2f} \n'.format(np.mean(shortfall_hist)))

Episode [100/5000]	Average Shortfall: $6,102.43
Episode [200/5000]	Average Shortfall: $116,176.03
Episode [300/5000]	Average Shortfall: $79,417.30
Episode [400/5000]	Average Shortfall: $493,787.50
Episode [500/5000]	Average Shortfall: $141,244.34
Episode [600/5000]	Average Shortfall: $169,543.42
Episode [700/5000]	Average Shortfall: $108,679.47
Episode [800/5000]	Average Shortfall: $441,443.37
Episode [900/5000]	Average Shortfall: $260,681.47
Episode [1000/5000]	Average Shortfall: $-203,502.03
Episode [1100/5000]	Average Shortfall: $77,381.66
Episode [1200/5000]	Average Shortfall: $377,723.26
Episode [1300/5000]	Average Shortfall: $184,884.08
Episode [1400/5000]	Average Shortfall: $228,473.41
Episode [1500/5000]	Average Shortfall: $555,222.39
Episode [1600/5000]	Average Shortfall: $180,360.93
Episode [1700/5000]	Average Shortfall: $364,180.38
Episode [1800/5000]	Average Shortfall: $120,910.82
Episode [1900/5000]	Average Shortfall: $344,477.11
Episode [2000/5000]	Average Shortfall: $-10

In [None]:
import syntheticChrissAlmgren as sca

class MarketEnvironmentWithTotalSharesPortion(sca.MarketEnvironment):
    def step(self, action):

        class Info(object):
            pass
        info = Info()

        info.done = False

        if self.transacting and (self.timeHorizon == 0 or abs(self.shares_remaining) < self.tolerance):
            self.transacting = False
            info.done = True
            info.implementation_shortfall = self.total_shares * self.startingPrice - self.totalCapture
            info.expected_shortfall = self.get_expected_shortfall(self.total_shares)
            info.expected_variance = self.singleStepVariance * self.tau * self.totalSRSQ
            info.utility = info.expected_shortfall + self.llambda * info.expected_variance

        if self.k == 0:
            info.price = self.prevImpactedPrice
        else:
            info.price = self.prevImpactedPrice + np.sqrt(self.singleStepVariance * self.tau) * random.normalvariate(0, 1)

        if self.transacting:

            if isinstance(action, np.ndarray):
                action = action.item()

            # Action as percentage of original total shares
            sharesToSellNow = min(self.total_shares * action, self.shares_remaining)

            if self.timeHorizon < 2:
                sharesToSellNow = self.shares_remaining

            info.share_to_sell_now = np.around(sharesToSellNow)

            info.currentPermanentImpact = self.permanentImpact(info.share_to_sell_now)
            info.currentTemporaryImpact = self.temporaryImpact(info.share_to_sell_now)

            info.exec_price = info.price - info.currentTemporaryImpact

            self.totalCapture += info.share_to_sell_now * info.exec_price

            self.logReturns.append(np.log(info.price/self.prevPrice))
            self.logReturns.popleft()

            self.shares_remaining -= info.share_to_sell_now

            
            self.totalSSSQ += info.share_to_sell_now ** 2
            self.totalSRSQ += self.shares_remaining ** 2

           
            self.timeHorizon -= 1
            self.prevPrice = info.price
            self.prevImpactedPrice = info.price - info.currentPermanentImpact

            currentUtility = self.compute_AC_utility(self.shares_remaining)
            reward = (abs(self.prevUtility) - abs(currentUtility)) / abs(self.prevUtility)
            self.prevUtility = currentUtility

            if self.shares_remaining <= 0:

                info.implementation_shortfall  = self.total_shares * self.startingPrice - self.totalCapture

                info.done = True
        else:
            reward = 0.0

        self.k += 1

        state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, self.shares_remaining / self.total_shares])

        return (state, np.array([reward]), info.done, info)


In [None]:
import numpy as np
import random
import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

env = MarketEnvironmentWithTotalSharesPortion()

agent = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(), random_seed=0)

lqt = 60

n_trades = 60

tr = 1e-6

episodes = 5000

shortfall_hist = np.array([])
shortfall_deque = deque(maxlen=100)

for episode in range(episodes):
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb = tr)

    env.start_transactions()

    for i in range(n_trades + 1):

        action = agent.act(cur_state, add_noise = True)

        new_state, reward, done, info = env.step(action)

        agent.step(cur_state, action, reward, new_state, done)

        cur_state = new_state

        if info.done:
            shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall)
            shortfall_deque.append(info.implementation_shortfall)
            break

    if (episode + 1) % 100 == 0:
        print('\rEpisode [{}/{}]\tAverage Shortfall: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque)))

print('\nAverage Implementation Shortfall: ${:,.2f} \n'.format(np.mean(shortfall_hist)))

Episode [100/5000]	Average Shortfall: $1,830,407.23
Episode [200/5000]	Average Shortfall: $2,129,784.18
Episode [300/5000]	Average Shortfall: $2,536,508.46
Episode [400/5000]	Average Shortfall: $2,562,500.00
Episode [500/5000]	Average Shortfall: $2,562,500.00
Episode [600/5000]	Average Shortfall: $2,562,500.00
Episode [700/5000]	Average Shortfall: $2,562,500.00
Episode [800/5000]	Average Shortfall: $2,562,500.00
Episode [900/5000]	Average Shortfall: $2,562,500.00
Episode [1000/5000]	Average Shortfall: $2,562,500.00
Episode [1100/5000]	Average Shortfall: $2,562,500.00
Episode [1200/5000]	Average Shortfall: $2,562,500.00
Episode [1300/5000]	Average Shortfall: $2,562,500.00
Episode [1400/5000]	Average Shortfall: $2,559,600.09
Episode [1500/5000]	Average Shortfall: $2,459,824.99
Episode [1600/5000]	Average Shortfall: $1,871,845.58
Episode [1700/5000]	Average Shortfall: $1,514,916.56
Episode [1800/5000]	Average Shortfall: $1,068,776.57
Episode [1900/5000]	Average Shortfall: $928,384.86
Epis

In [None]:
import syntheticChrissAlmgren as sca

class MarketEnvironmentWithVWAP(sca.MarketEnvironment):
    def step(self, action):

        class Info(object):
            pass
        info = Info()

        info.done = False

        if self.transacting and (self.timeHorizon == 0 or abs(self.shares_remaining) < self.tolerance):
            self.transacting = False
            info.done = True
            info.implementation_shortfall = self.total_shares * self.startingPrice - self.totalCapture
            info.expected_shortfall = self.get_expected_shortfall(self.total_shares)
            info.expected_variance = self.singleStepVariance * self.tau * self.totalSRSQ
            info.utility = info.expected_shortfall + self.llambda * info.expected_variance

        if self.k == 0:
            info.price = self.prevImpactedPrice
        else:
            info.price = self.prevImpactedPrice + np.sqrt(self.singleStepVariance * self.tau) * random.normalvariate(0, 1)

        if self.transacting:

            if isinstance(action, np.ndarray):
                action = action.item()

            # Action as percentage of expected daily volume
            max_daily_participation = 0.01  # 1% of daily volume
            sharesToSellNow = self.dtv * max_daily_participation * action 
            sharesToSellNow = min(sharesToSellNow, self.shares_remaining)

            if self.timeHorizon < 2:
                sharesToSellNow = self.shares_remaining

            info.share_to_sell_now = np.around(sharesToSellNow)

            info.currentPermanentImpact = self.permanentImpact(info.share_to_sell_now)
            info.currentTemporaryImpact = self.temporaryImpact(info.share_to_sell_now)

            info.exec_price = info.price - info.currentTemporaryImpact

            self.totalCapture += info.share_to_sell_now * info.exec_price

            self.logReturns.append(np.log(info.price/self.prevPrice))
            self.logReturns.popleft()

            self.shares_remaining -= info.share_to_sell_now

            
            self.totalSSSQ += info.share_to_sell_now ** 2
            self.totalSRSQ += self.shares_remaining ** 2

           
            self.timeHorizon -= 1
            self.prevPrice = info.price
            self.prevImpactedPrice = info.price - info.currentPermanentImpact

            currentUtility = self.compute_AC_utility(self.shares_remaining)
            reward = (abs(self.prevUtility) - abs(currentUtility)) / abs(self.prevUtility)
            self.prevUtility = currentUtility

            if self.shares_remaining <= 0:

                info.implementation_shortfall  = self.total_shares * self.startingPrice - self.totalCapture

                info.done = True
        else:
            reward = 0.0

        self.k += 1

        state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, self.shares_remaining / self.total_shares])

        return (state, np.array([reward]), info.done, info)


In [None]:
import numpy as np
import random
import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

env = MarketEnvironmentWithVWAP()

agent = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(), random_seed=0)

lqt = 60

n_trades = 60

tr = 1e-6

episodes = 5000

shortfall_hist = np.array([])
shortfall_deque = deque(maxlen=100)

for episode in range(episodes):
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb = tr)

    env.start_transactions()

    for i in range(n_trades + 1):

        action = agent.act(cur_state, add_noise = True)

        new_state, reward, done, info = env.step(action)

        agent.step(cur_state, action, reward, new_state, done)

        cur_state = new_state

        if info.done:
            shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall)
            shortfall_deque.append(info.implementation_shortfall)
            break

    if (episode + 1) % 100 == 0: 
        print('\rEpisode [{}/{}]\tAverage Shortfall: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque)))

print('\nAverage Implementation Shortfall: ${:,.2f} \n'.format(np.mean(shortfall_hist)))

Episode [100/5000]	Average Shortfall: $253,439.13
Episode [200/5000]	Average Shortfall: $558,111.99
Episode [300/5000]	Average Shortfall: $310,511.88
Episode [400/5000]	Average Shortfall: $347,387.60
Episode [500/5000]	Average Shortfall: $444,285.04
Episode [600/5000]	Average Shortfall: $245,131.66
Episode [700/5000]	Average Shortfall: $232,443.81
Episode [800/5000]	Average Shortfall: $381,483.32
Episode [900/5000]	Average Shortfall: $394,096.27
Episode [1000/5000]	Average Shortfall: $184,805.61
Episode [1100/5000]	Average Shortfall: $239,851.33
Episode [1200/5000]	Average Shortfall: $297,301.29
Episode [1300/5000]	Average Shortfall: $251,118.26
Episode [1400/5000]	Average Shortfall: $263,581.92
Episode [1500/5000]	Average Shortfall: $384,343.13
Episode [1600/5000]	Average Shortfall: $294,317.07
Episode [1700/5000]	Average Shortfall: $420,773.06
Episode [1800/5000]	Average Shortfall: $322,486.62
Episode [1900/5000]	Average Shortfall: $296,942.28
Episode [2000/5000]	Average Shortfall: $

In [None]:
import syntheticChrissAlmgren as sca

class MarketEnvironmentWithTimeWeighted(sca.MarketEnvironment):
    def step(self, action):

        class Info(object):
            pass
        info = Info()

        info.done = False

        if self.transacting and (self.timeHorizon == 0 or abs(self.shares_remaining) < self.tolerance):
            self.transacting = False
            info.done = True
            info.implementation_shortfall = self.total_shares * self.startingPrice - self.totalCapture
            info.expected_shortfall = self.get_expected_shortfall(self.total_shares)
            info.expected_variance = self.singleStepVariance * self.tau * self.totalSRSQ
            info.utility = info.expected_shortfall + self.llambda * info.expected_variance

        if self.k == 0:
            info.price = self.prevImpactedPrice
        else:
            info.price = self.prevImpactedPrice + np.sqrt(self.singleStepVariance * self.tau) * random.normalvariate(0, 1)

        if self.transacting:

            if isinstance(action, np.ndarray):
                action = action.item()

              # Action scaled by remaining time horizon
            time_factor = self.timeHorizon / self.num_n
            sharesToSellNow = self.shares_remaining * action * time_factor

            if self.timeHorizon < 2:
                sharesToSellNow = self.shares_remaining

            info.share_to_sell_now = np.around(sharesToSellNow)

            info.currentPermanentImpact = self.permanentImpact(info.share_to_sell_now)
            info.currentTemporaryImpact = self.temporaryImpact(info.share_to_sell_now)

            info.exec_price = info.price - info.currentTemporaryImpact

            self.totalCapture += info.share_to_sell_now * info.exec_price

            self.logReturns.append(np.log(info.price/self.prevPrice))
            self.logReturns.popleft()

            self.shares_remaining -= info.share_to_sell_now

            
            self.totalSSSQ += info.share_to_sell_now ** 2
            self.totalSRSQ += self.shares_remaining ** 2

           
            self.timeHorizon -= 1
            self.prevPrice = info.price
            self.prevImpactedPrice = info.price - info.currentPermanentImpact

            currentUtility = self.compute_AC_utility(self.shares_remaining)
            reward = (abs(self.prevUtility) - abs(currentUtility)) / abs(self.prevUtility)
            self.prevUtility = currentUtility

            if self.shares_remaining <= 0:

                info.implementation_shortfall  = self.total_shares * self.startingPrice - self.totalCapture

                info.done = True
        else:
            reward = 0.0

        self.k += 1

        state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, self.shares_remaining / self.total_shares])

        return (state, np.array([reward]), info.done, info)


In [None]:
import numpy as np
import random
import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

env = MarketEnvironmentWithTimeWeighted()

agent = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(), random_seed=0)

lqt = 60

n_trades = 60

tr = 1e-6

episodes = 5000

shortfall_hist = np.array([])
shortfall_deque = deque(maxlen=100)

for episode in range(episodes):
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb = tr)

    env.start_transactions()

    for i in range(n_trades + 1):

        action = agent.act(cur_state, add_noise = True)

        new_state, reward, done, info = env.step(action)

        agent.step(cur_state, action, reward, new_state, done)

        cur_state = new_state

        if info.done:
            shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall)
            shortfall_deque.append(info.implementation_shortfall)
            break

    if (episode + 1) % 100 == 0:
        print('\rEpisode [{}/{}]\tAverage Shortfall: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque)))

print('\nAverage Implementation Shortfall: ${:,.2f} \n'.format(np.mean(shortfall_hist)))

Episode [100/5000]	Average Shortfall: $2,304,080.60
Episode [200/5000]	Average Shortfall: $2,562,500.00
Episode [300/5000]	Average Shortfall: $2,562,500.00
Episode [400/5000]	Average Shortfall: $2,562,500.00
Episode [500/5000]	Average Shortfall: $2,562,500.00
Episode [600/5000]	Average Shortfall: $2,562,500.00
Episode [700/5000]	Average Shortfall: $2,562,500.00
Episode [800/5000]	Average Shortfall: $2,562,500.00
Episode [900/5000]	Average Shortfall: $2,562,500.00
Episode [1000/5000]	Average Shortfall: $2,562,500.00
Episode [1100/5000]	Average Shortfall: $2,562,500.00
Episode [1200/5000]	Average Shortfall: $2,562,500.00
Episode [1300/5000]	Average Shortfall: $2,562,500.00
Episode [1400/5000]	Average Shortfall: $2,562,500.00
Episode [1500/5000]	Average Shortfall: $2,562,500.00
Episode [1600/5000]	Average Shortfall: $2,562,500.00
Episode [1700/5000]	Average Shortfall: $2,562,500.00
Episode [1800/5000]	Average Shortfall: $2,562,500.00
Episode [1900/5000]	Average Shortfall: $2,562,500.00
Ep

In [None]:
import syntheticChrissAlmgren as sca

class MarketEnvironmentWithVolatilityAdjusted(sca.MarketEnvironment):
    def step(self, action):

        class Info(object):
            pass
        info = Info()

        info.done = False

        if self.transacting and (self.timeHorizon == 0 or abs(self.shares_remaining) < self.tolerance):
            self.transacting = False
            info.done = True
            info.implementation_shortfall = self.total_shares * self.startingPrice - self.totalCapture
            info.expected_shortfall = self.get_expected_shortfall(self.total_shares)
            info.expected_variance = self.singleStepVariance * self.tau * self.totalSRSQ
            info.utility = info.expected_shortfall + self.llambda * info.expected_variance

        if self.k == 0:
            info.price = self.prevImpactedPrice
        else:
            info.price = self.prevImpactedPrice + np.sqrt(self.singleStepVariance * self.tau) * random.normalvariate(0, 1)


        if self.transacting:

            if isinstance(action, np.ndarray):
                action = action.item()

            # Action adjusted for current market volatility
            recent_volatility = np.std(list(self.logReturns))
            vol_adjustment = recent_volatility / self.dpv 
            sharesToSellNow = self.shares_remaining * action * vol_adjustment

            if self.timeHorizon < 2:
                sharesToSellNow = self.shares_remaining

            info.share_to_sell_now = np.around(sharesToSellNow)

            info.currentPermanentImpact = self.permanentImpact(info.share_to_sell_now)
            info.currentTemporaryImpact = self.temporaryImpact(info.share_to_sell_now)

            info.exec_price = info.price - info.currentTemporaryImpact

            self.totalCapture += info.share_to_sell_now * info.exec_price

            self.logReturns.append(np.log(info.price/self.prevPrice))
            self.logReturns.popleft()

            self.shares_remaining -= info.share_to_sell_now

            
            self.totalSSSQ += info.share_to_sell_now ** 2
            self.totalSRSQ += self.shares_remaining ** 2

           
            self.timeHorizon -= 1
            self.prevPrice = info.price
            self.prevImpactedPrice = info.price - info.currentPermanentImpact

            currentUtility = self.compute_AC_utility(self.shares_remaining)
            reward = (abs(self.prevUtility) - abs(currentUtility)) / abs(self.prevUtility)
            self.prevUtility = currentUtility

            if self.shares_remaining <= 0:

                info.implementation_shortfall  = self.total_shares * self.startingPrice - self.totalCapture

                info.done = True
        else:
            reward = 0.0

        self.k += 1

        state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, self.shares_remaining / self.total_shares])

        return (state, np.array([reward]), info.done, info)


In [None]:
import numpy as np
import random
import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

env = MarketEnvironmentWithVolatilityAdjusted()

agent = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(), random_seed=0)

lqt = 60

n_trades = 60

tr = 1e-6

episodes = 5000

shortfall_hist = np.array([])
shortfall_deque = deque(maxlen=100)

for episode in range(episodes):
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb = tr)

    env.start_transactions()

    for i in range(n_trades + 1):

        action = agent.act(cur_state, add_noise = True)

        new_state, reward, done, info = env.step(action)

        agent.step(cur_state, action, reward, new_state, done)

        cur_state = new_state

        if info.done:
            shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall)
            shortfall_deque.append(info.implementation_shortfall)
            break

    if (episode + 1) % 100 == 0: 
        print('\rEpisode [{}/{}]\tAverage Shortfall: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque)))

print('\nAverage Implementation Shortfall: ${:,.2f} \n'.format(np.mean(shortfall_hist)))

Episode [100/5000]	Average Shortfall: $825,072.85
Episode [200/5000]	Average Shortfall: $611,186.97
Episode [300/5000]	Average Shortfall: $686,566.16
Episode [400/5000]	Average Shortfall: $870,225.03
Episode [500/5000]	Average Shortfall: $810,280.56
Episode [600/5000]	Average Shortfall: $833,474.32
Episode [700/5000]	Average Shortfall: $639,577.59
Episode [800/5000]	Average Shortfall: $1,038,493.91
Episode [900/5000]	Average Shortfall: $655,694.85
Episode [1000/5000]	Average Shortfall: $426,853.50
Episode [1100/5000]	Average Shortfall: $449,407.12
Episode [1200/5000]	Average Shortfall: $451,880.65
Episode [1300/5000]	Average Shortfall: $420,497.62
Episode [1400/5000]	Average Shortfall: $401,983.88
Episode [1500/5000]	Average Shortfall: $530,264.79
Episode [1600/5000]	Average Shortfall: $498,652.40
Episode [1700/5000]	Average Shortfall: $587,349.40
Episode [1800/5000]	Average Shortfall: $545,474.83
Episode [1900/5000]	Average Shortfall: $500,896.83
Episode [2000/5000]	Average Shortfall:

In [None]:
import syntheticChrissAlmgren as sca

class MarketEnvironmentWithMarketConditions(sca.MarketEnvironment):
    def step(self, action):

        class Info(object):
            pass
        info = Info()

        info.done = False

        if self.transacting and (self.timeHorizon == 0 or abs(self.shares_remaining) < self.tolerance):
            self.transacting = False
            info.done = True
            info.implementation_shortfall = self.total_shares * self.startingPrice - self.totalCapture
            info.expected_shortfall = self.get_expected_shortfall(self.total_shares)
            info.expected_variance = self.singleStepVariance * self.tau * self.totalSRSQ
            info.utility = info.expected_shortfall + self.llambda * info.expected_variance

        if self.k == 0:
            info.price = self.prevImpactedPrice
        else:
            info.price = self.prevImpactedPrice + np.sqrt(self.singleStepVariance * self.tau) * random.normalvariate(0, 1)


        if self.transacting:

            if isinstance(action, np.ndarray):
                action = action.item()

            # Action adjusted based on bid-ask spread and recent price movement
            spread_factor = self.basp / self.prevPrice 
            price_momentum = self.logReturns[-1] 
            adjustment = 1 + (spread_factor * price_momentum)
            sharesToSellNow = self.shares_remaining * action * adjustment

            if self.timeHorizon < 2:
                sharesToSellNow = self.shares_remaining

            info.share_to_sell_now = np.around(sharesToSellNow)

            info.currentPermanentImpact = self.permanentImpact(info.share_to_sell_now)
            info.currentTemporaryImpact = self.temporaryImpact(info.share_to_sell_now)

            info.exec_price = info.price - info.currentTemporaryImpact

            self.totalCapture += info.share_to_sell_now * info.exec_price

            self.logReturns.append(np.log(info.price/self.prevPrice))
            self.logReturns.popleft()

            self.shares_remaining -= info.share_to_sell_now

            
            self.totalSSSQ += info.share_to_sell_now ** 2
            self.totalSRSQ += self.shares_remaining ** 2

           
            self.timeHorizon -= 1
            self.prevPrice = info.price
            self.prevImpactedPrice = info.price - info.currentPermanentImpact

            currentUtility = self.compute_AC_utility(self.shares_remaining)
            reward = (abs(self.prevUtility) - abs(currentUtility)) / abs(self.prevUtility)
            self.prevUtility = currentUtility

            if self.shares_remaining <= 0:

                info.implementation_shortfall  = self.total_shares * self.startingPrice - self.totalCapture

                info.done = True
        else:
            reward = 0.0

        self.k += 1

        state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, self.shares_remaining / self.total_shares])

        return (state, np.array([reward]), info.done, info)


In [None]:
import numpy as np
import random
import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

env = MarketEnvironmentWithMarketConditions()

agent = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(), random_seed=0)

lqt = 60

n_trades = 60

tr = 1e-6

episodes = 5000

shortfall_hist = np.array([])
shortfall_deque = deque(maxlen=100)

for episode in range(episodes):
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb = tr)

    env.start_transactions()

    for i in range(n_trades + 1):

        action = agent.act(cur_state, add_noise = True)

        new_state, reward, done, info = env.step(action)

        agent.step(cur_state, action, reward, new_state, done)

        cur_state = new_state

        if info.done:
            shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall)
            shortfall_deque.append(info.implementation_shortfall)
            break

    if (episode + 1) % 100 == 0:
        print('\rEpisode [{}/{}]\tAverage Shortfall: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque)))

print('\nAverage Implementation Shortfall: ${:,.2f} \n'.format(np.mean(shortfall_hist)))

Episode [100/5000]	Average Shortfall: $2,276,855.75
Episode [200/5000]	Average Shortfall: $2,562,256.53
Episode [300/5000]	Average Shortfall: $2,562,500.00
Episode [400/5000]	Average Shortfall: $2,562,500.00
Episode [500/5000]	Average Shortfall: $2,562,500.00
Episode [600/5000]	Average Shortfall: $2,562,500.00
Episode [700/5000]	Average Shortfall: $2,562,500.00
Episode [800/5000]	Average Shortfall: $2,562,500.00
Episode [900/5000]	Average Shortfall: $2,562,500.00
Episode [1000/5000]	Average Shortfall: $2,562,500.00
Episode [1100/5000]	Average Shortfall: $2,562,500.00
Episode [1200/5000]	Average Shortfall: $2,562,500.00
Episode [1300/5000]	Average Shortfall: $2,562,500.00
Episode [1400/5000]	Average Shortfall: $2,562,500.00
Episode [1500/5000]	Average Shortfall: $2,562,500.00
Episode [1600/5000]	Average Shortfall: $2,562,500.00
Episode [1700/5000]	Average Shortfall: $2,562,500.00
Episode [1800/5000]	Average Shortfall: $2,562,500.00
Episode [1900/5000]	Average Shortfall: $2,562,500.00
Ep

In [None]:
from ddpg_agent import Agent,OUNoise
from model import Actor, Critic
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class MultiDimensionalAgent(Agent):
    """
    DDPG Agent override for multi-dimensional actions (3D in this case)
    """
    def __init__(self, state_size, action_size=3, random_seed=0):
        super(MultiDimensionalAgent, self).__init__(state_size, action_size, random_seed)

        self.noise = OUNoise(size=action_size, seed=random_seed)

        self.actor_local = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed).to(device)

        self.actor_optimizer = torch.optim.Adam(self.actor_local.parameters(), lr=1e-4)

        self.soft_update(self.actor_local, self.actor_target, 1.0)

    def act(self, state, add_noise=True):
        """Returns actions for given state as per current policy."""
        state = torch.from_numpy(state).float().to(device)
        self.actor_local.eval()
        with torch.no_grad():
            action = self.actor_local(state).cpu().data.numpy()
        self.actor_local.train()
        if add_noise:
            action += self.noise.sample() 
        action = (action + 1.0) / 2.0 
        return np.clip(action, 0, 1)
    
    

In [None]:
import syntheticChrissAlmgren as sca

class MarketEnvironmentWithMultiDimensional(sca.MarketEnvironment):
    def step(self, action):

        class Info(object):
            pass
        info = Info()

        info.done = False

        if self.transacting and (self.timeHorizon == 0 or abs(self.shares_remaining) < self.tolerance):
            self.transacting = False
            info.done = True
            info.implementation_shortfall = self.total_shares * self.startingPrice - self.totalCapture
            info.expected_shortfall = self.get_expected_shortfall(self.total_shares)
            info.expected_variance = self.singleStepVariance * self.tau * self.totalSRSQ
            info.utility = info.expected_shortfall + self.llambda * info.expected_variance

        if self.k == 0:
            info.price = self.prevImpactedPrice
        else:
            info.price = self.prevImpactedPrice + np.sqrt(self.singleStepVariance * self.tau) * random.normalvariate(0, 1)


        if self.transacting:

            # Action vector: [fraction_to_sell, urgency_factor, risk_tolerance]
            fraction_to_sell, urgency, risk_factor = action
            base_sell = self.shares_remaining * fraction_to_sell
            sharesToSellNow = base_sell * urgency * risk_factor

            if self.timeHorizon < 2:
                sharesToSellNow = self.shares_remaining

            info.share_to_sell_now = np.around(sharesToSellNow)

            info.currentPermanentImpact = self.permanentImpact(info.share_to_sell_now)
            info.currentTemporaryImpact = self.temporaryImpact(info.share_to_sell_now)

            info.exec_price = info.price - info.currentTemporaryImpact

            self.totalCapture += info.share_to_sell_now * info.exec_price

            self.logReturns.append(np.log(info.price/self.prevPrice))
            self.logReturns.popleft()

            self.shares_remaining -= info.share_to_sell_now

            
            self.totalSSSQ += info.share_to_sell_now ** 2
            self.totalSRSQ += self.shares_remaining ** 2

           
            self.timeHorizon -= 1
            self.prevPrice = info.price
            self.prevImpactedPrice = info.price - info.currentPermanentImpact

            currentUtility = self.compute_AC_utility(self.shares_remaining)
            reward = (abs(self.prevUtility) - abs(currentUtility)) / abs(self.prevUtility)
            self.prevUtility = currentUtility

            if self.shares_remaining <= 0:

                info.implementation_shortfall  = self.total_shares * self.startingPrice - self.totalCapture

                info.done = True
        else:
            reward = 0.0

        self.k += 1

        state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, self.shares_remaining / self.total_shares])

        return (state, np.array([reward]), info.done, info)


In [None]:
import numpy as np
import random
import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

env = MarketEnvironmentWithMultiDimensional()

agent = MultiDimensionalAgent(state_size=env.observation_space_dimension(), action_size=3, random_seed=0)

lqt = 60

n_trades = 60

tr = 1e-6

episodes = 5000

shortfall_hist = np.array([])
shortfall_deque = deque(maxlen=100)

for episode in range(episodes):
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb = tr)

    env.start_transactions()

    for i in range(n_trades + 1):

        action = agent.act(cur_state, add_noise = True)

        new_state, reward, done, info = env.step(action)

        agent.step(cur_state, action, reward, new_state, done)

        cur_state = new_state

        if info.done:
            shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall)
            shortfall_deque.append(info.implementation_shortfall)
            break

    if (episode + 1) % 100 == 0: 
        print('\rEpisode [{}/{}]\tAverage Shortfall: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque)))

print('\nAverage Implementation Shortfall: ${:,.2f} \n'.format(np.mean(shortfall_hist)))

Episode [100/5000]	Average Shortfall: $2,213,899.39
Episode [200/5000]	Average Shortfall: $2,562,499.75
Episode [300/5000]	Average Shortfall: $2,562,499.75
Episode [400/5000]	Average Shortfall: $2,562,499.75
Episode [500/5000]	Average Shortfall: $2,562,499.75
Episode [600/5000]	Average Shortfall: $2,562,499.75
Episode [700/5000]	Average Shortfall: $2,562,499.75
Episode [800/5000]	Average Shortfall: $2,562,499.75
Episode [900/5000]	Average Shortfall: $2,562,499.75
Episode [1000/5000]	Average Shortfall: $2,562,499.75
Episode [1100/5000]	Average Shortfall: $2,562,499.75
Episode [1200/5000]	Average Shortfall: $2,562,499.75
Episode [1300/5000]	Average Shortfall: $2,562,499.75
Episode [1400/5000]	Average Shortfall: $2,562,499.75
Episode [1500/5000]	Average Shortfall: $2,562,499.75
Episode [1600/5000]	Average Shortfall: $2,562,499.75
Episode [1700/5000]	Average Shortfall: $2,562,499.75
Episode [1800/5000]	Average Shortfall: $2,562,499.75
Episode [1900/5000]	Average Shortfall: $2,562,499.75
Ep