In [3]:
from tracemalloc import start
from unicodedata import name
import gym 
from gym import spaces 
from gym.utils import seeding 
import numpy as np 
from Simulator.simulations import OptionSimulation

In [15]:
class TradingEnv(gym.Env):
    trading_days = 252 #Number of trading days in one year 
    num_of_shares = 100 #Vanilla options contract size
    """
    Trading Enviroment class with all the modules related 
    to performing trading under a controlled simulation environment.

    Parameters:
        num_simulations: The number of GBM and BS simulations to run for the agent to train on
        num_contracts: The number of contracts the agent will hold.
        multiplier: Float value required for the intensity of the bid-offer spread 
        tick_size: Used for computing the cost relative to the midpoint of the bid-offer spread
        kappa: The risk factor of the portfolio
    """

    def __init__(self,num_simulations=int,num_contracts=int,
    multiplier=float,tick_size=float,kappa=float):

        self.num_simulations = 100
        self.num_contracts = num_contracts 
        self.multiplier = multiplier 
        self.tick_size = tick_size
        self.kappa = kappa  

        os = OptionSimulation(100,self.num_simulations) 
        
        #can add a maturity term
        self.sim_prices = os.GBM(50,0.5,time_increment=1)
        self.days_to_expiry = os.ttm #Creates an array of days left to expiry 
        self.option_price_path, self.option_delta_path = os.BS_call(self.days_to_expiry,self.sim_prices,100,0.01,0,0)
        
        #Action space (Discrete)
        self.num_actions = self.num_contracts*self.num_of_shares #Number of actions 
        self.action_space = spaces.Discrete(1001,start=-self.num_actions) #Discrete action space 

        if self.num_contracts > 10:
            raise ValueError("The maximum number of contracts in the simulation cannot be more than 10.")

    def _cost_of_trade(self,n):
        #n: Number of shares 
        cost = self.multiplier * self.tick_size * (np.abs(n) * 0.01*n*n)
        return cost 

    def _wealth_of_trade(self,pt,n):
        #W_{t} = q_{t} - c_{t} (pt: Price of the stock at time 't')
        ct = self._cost_of_trade(n)
        wt = pt - ct 
        return wt 

    def reward(self, pt, n):
        '''
        Computes the reward given to the agent

        Parameters:
            pt: Price at time 't'
            n: Number of shares at time 't'
        '''
        wt = self._wealth_of_trade(pt, n)
        rwd = wt - (self.kappa*0.5)*(wt**2) 
        return rwd 
    
    def reset(self, episode):
        # repeatedly go through available simulated paths (if needed)
        self.t = 0
        self.path = episode
        ttm = self.days_to_expiry[0]-1

        n = 500 #no of shares
        #print(self.sim_prices[self.path])
        #print(self.sim_prices[self.path,0])
        #print(self.option_delta_path[self.path,self.t])
        
        #K : strike price (to be given)

        price =  round(self.sim_prices[self.path,self.t])
        self.nt = n
        price_ttm = round(self.sim_prices[self.path,ttm])
        
        self.state = [price , ttm, self.nt, price_ttm]

        return self.state
    
    def delta(self, ttm):
        delta = self.option_delta_path[self.path, ttm]
        return delta

    def step(self,action):
        
        self.t = self.t + 1 
        price =  round(self.sim_prices[self.path,self.t])
        self.nt = self.nt + action
        ttm = self.days_to_expiry[self.t] 
        price_ttm = round(self.sim_prices[self.path,ttm])
        
        R = round(self.reward(price, self.nt)) 
        
        self.state = [price , ttm, self.nt, price_ttm]
        
        if ttm == 1 & self.path == (self.num_simulations-1):
            done = True
        elif ttm == 1:
            episode = self.path + 1 
            self.reset(episode)
            done = False
        else:
            done = False
        return self.state, R, done


In [16]:
if __name__ == "__main__":
    env = TradingEnv(num_simulations=100,num_contracts=5,multiplier=1.0,
    tick_size=0.1,kappa=0.1)

In [17]:
env = TradingEnv(num_simulations=100,num_contracts=5,multiplier=1.0,
    tick_size=0.1,kappa=0.1)

In [18]:
env.reset(0)

[100, 49, 500, 102]

In [19]:
# baseline policy

state = env.reset(0)
while True:
    pt, ttm, nt, pt_ttm = state
    delta = (pt - pt_ttm)
    #print(ttm)
    #print(nt)
    delta = env.delta(ttm)
    action = -100*round(delta)-nt
    #print(action)
    pervious_state = state
    next_state, reward, done = env.step(action)
    state = next_state
    if done:
        break
    print(next_state, reward, done)

[97, 49, -100, 102] -41673 False
[100, 48, -100, 114] -41400 False
[103, 47, -100, 114] -41127 False
[111, 46, -100, 120] -40405 False
[114, 45, -100, 120] -40136 False
[111, 44, -100, 125] -40405 False
[113, 43, -100, 120] -40225 False
[115, 42, -100, 121] -40046 False
[122, 41, -100, 120] -39422 False
[113, 40, -100, 125] -40225 False
[117, 39, -100, 128] -39867 False
[121, 38, -100, 135] -39511 False
[123, 37, -100, 128] -39333 False
[117, 36, -100, 121] -39867 False
[117, 35, -100, 116] -39867 False
[115, 34, -100, 108] -40046 False
[112, 33, -100, 115] -40315 False
[113, 32, -100, 126] -40225 False
[118, 31, -100, 132] -39778 False
[120, 30, -100, 125] -39600 False
[121, 29, -100, 120] -39511 False
[126, 28, -100, 122] -39068 False
[129, 27, -100, 121] -38803 False
[119, 26, -100, 114] -39689 False
[127, 25, -100, 127] -38979 False
[114, 24, -100, 119] -40136 False
[121, 23, -100, 129] -39511 False
[122, 22, -100, 126] -39422 False
[120, 21, -100, 121] -39600 False
[125, 20, -100,

[103, 23, -100, 137] -41127 False
[114, 22, -100, 137] -40136 False
[117, 21, -100, 135] -39867 False
[115, 20, -100, 140] -40046 False
[120, 19, -100, 148] -39600 False
[124, 18, -100, 136] -39245 False
[124, 17, -100, 136] -39245 False
[116, 16, -100, 129] -39957 False
[108, 15, -100, 123] -40675 False
[108, 14, -100, 120] -40675 False
[104, 13, -100, 112] -41037 False
[100, 12, -100, 107] -41400 False
[100, 11, -100, 108] -41400 False
[100, 10, -100, 107] -41400 False
[97, 9, -100, 102] -41673 False
[97, 8, -100, 103] -41673 False
[98, 7, -100, 108] -41582 False
[95, 6, -100, 114] -41856 False
[102, 5, -100, 109] -41218 False
[100, 4, -100, 112] -41400 False
[95, 3, -100, 99] -41856 False
[94, 2, 0, 102] -348 False
[100, 49, 500, 83] -42223 False
[93, 49, 0, 83] -339 False
[94, 48, 0, 80] -348 False
[99, 47, 0, 71] -391 False
[90, 46, 0, 75] -315 False
[91, 45, 0, 76] -323 False
[82, 44, 0, 73] -254 False
[80, 43, 0, 69] -240 False
[77, 42, 0, 67] -219 False
[69, 41, 0, 68] -169 Fal

[149, 13, 0, 87] -961 False
[157, 12, 0, 84] -1075 False
[151, 11, 0, 85] -989 False
[141, 10, 0, 81] -853 False
[144, 9, 0, 85] -893 False
[136, 8, 0, 91] -789 False
[156, 7, 0, 92] -1061 False
[148, 6, 0, 95] -947 False
[149, 5, 0, 95] -961 False
[136, 4, 0, 98] -789 False
[139, 3, 0, 99] -827 False
[140, 2, 0, 103] -840 False
[100, 49, 500, 96] -37579 False
[101, 49, 0, 96] -409 False
[103, 48, 0, 99] -427 False
[106, 47, 0, 102] -456 False
[110, 46, -100, 106] -40495 False
[124, 45, -100, 105] -39245 False
[133, 44, -100, 104] -38451 False
[135, 43, -100, 103] -38276 False
[131, 42, -100, 120] -38627 False
[126, 41, -100, 119] -39068 False
[138, 40, -100, 127] -38014 False
[141, 39, -100, 124] -37753 False
[153, 38, -100, 134] -36717 False
[140, 37, -100, 128] -37840 False
[148, 36, -100, 134] -37147 False
[135, 35, -100, 129] -38276 False
[142, 34, -100, 128] -37666 False
[130, 33, -100, 136] -38715 False
[127, 32, -100, 132] -38979 False
[131, 31, -100, 145] -38627 False
[134, 30

[73, 31, 0, 69] -193 False
[73, 30, 0, 64] -193 False
[71, 29, 0, 71] -181 False
[71, 28, 0, 66] -181 False
[71, 27, 0, 64] -181 False
[69, 26, 0, 68] -169 False
[71, 25, 0, 71] -181 False
[68, 24, 0, 69] -163 False
[64, 23, 0, 71] -141 False
[66, 22, 0, 71] -152 False
[71, 21, 0, 71] -181 False
[64, 20, 0, 73] -141 False
[69, 19, 0, 73] -169 False
[72, 18, 0, 76] -187 False
[71, 17, 0, 87] -181 False
[78, 16, 0, 85] -226 False
[83, 15, 0, 87] -261 False
[92, 14, 0, 86] -331 False
[92, 13, 0, 86] -331 False
[88, 12, 0, 86] -299 False
[84, 11, 0, 86] -269 False
[86, 10, 0, 91] -284 False
[82, 9, 0, 93] -254 False
[86, 8, 0, 91] -284 False
[87, 7, 0, 86] -291 False
[88, 6, 0, 89] -299 False
[85, 5, 0, 90] -276 False
[87, 4, 0, 95] -291 False
[91, 3, 0, 97] -323 False
[87, 2, 0, 97] -291 False
[100, 49, 500, 107] -339 False
[93, 49, -100, 107] -42039 False
[94, 48, -100, 108] -41948 False
[94, 47, -100, 104] -41948 False
[94, 46, -100, 109] -41948 False
[98, 45, -100, 110] -41582 False
[1

[75, 45, 0, 48] -206 False
[82, 44, 0, 50] -254 False
[76, 43, 0, 51] -213 False
[73, 42, 0, 52] -193 False
[73, 41, 0, 48] -193 False
[76, 40, 0, 48] -213 False
[72, 39, 0, 51] -187 False
[69, 38, 0, 52] -169 False
[62, 37, 0, 52] -130 False
[63, 36, 0, 53] -135 False
[62, 35, 0, 53] -130 False
[64, 34, 0, 57] -141 False
[66, 33, 0, 63] -152 False
[65, 32, 0, 62] -146 False
[65, 31, 0, 63] -146 False
[67, 30, 0, 64] -157 False
[61, 29, 0, 65] -125 False
[61, 28, 0, 67] -125 False
[60, 27, 0, 65] -120 False
[57, 26, 0, 62] -105 False
[58, 25, 0, 58] -110 False
[62, 24, 0, 57] -130 False
[65, 23, 0, 60] -146 False
[67, 22, 0, 61] -157 False
[65, 21, 0, 61] -146 False
[64, 20, 0, 67] -141 False
[63, 19, 0, 65] -135 False
[62, 18, 0, 65] -130 False
[63, 17, 0, 66] -135 False
[57, 16, 0, 64] -105 False
[53, 15, 0, 62] -87 False
[53, 14, 0, 63] -87 False
[52, 13, 0, 62] -83 False
[52, 12, 0, 69] -83 False
[51, 11, 0, 72] -79 False
[48, 10, 0, 76] -67 False
[48, 9, 0, 73] -67 False
[52, 8, 0

[135, 6, 0, 89] -776 False
[134, 5, 0, 89] -764 False
[125, 4, 0, 92] -656 False
[117, 3, 0, 94] -567 False
[112, 2, 0, 101] -515 False
[100, 49, 500, 88] -40675 False
[97, 49, 0, 88] -373 False
[98, 48, 0, 84] -382 False
[104, 47, 0, 81] -437 False
[96, 46, 0, 73] -365 False
[93, 45, 0, 69] -339 False
[93, 44, 0, 66] -339 False
[91, 43, 0, 71] -323 False
[98, 42, 0, 71] -382 False
[97, 41, 0, 72] -373 False
[100, 40, 0, 72] -400 False
[107, 39, 0, 79] -465 False
[110, 38, 0, 85] -495 False
[107, 37, 0, 87] -465 False
[105, 36, 0, 83] -446 False
[95, 35, 0, 80] -356 False
[94, 34, 0, 85] -348 False
[90, 33, 0, 80] -315 False
[85, 32, 0, 82] -276 False
[77, 31, 0, 82] -219 False
[80, 30, 0, 80] -240 False
[77, 29, 0, 79] -219 False
[77, 28, 0, 76] -219 False
[72, 27, 0, 73] -187 False
[69, 26, 0, 72] -169 False
[72, 25, 0, 72] -187 False
[72, 24, 0, 69] -187 False
[73, 23, 0, 72] -193 False
[76, 22, 0, 77] -213 False
[79, 21, 0, 77] -233 False
[80, 20, 0, 80] -240 False
[82, 19, 0, 77] 

[131, 14, -100, 109] -38627 False
[132, 13, -100, 112] -38539 False
[143, 12, -100, 119] -37579 False
[145, 11, -100, 104] -37406 False
[129, 10, -100, 106] -38803 False
[133, 9, -100, 103] -38451 False
[136, 8, -100, 106] -38189 False
[127, 7, -100, 99] -38979 False
[139, 6, 0, 101] -827 False
[132, 5, -100, 102] -38539 False
[124, 4, -100, 105] -39245 False
[128, 3, -100, 99] -38891 False
[127, 2, 0, 101] -679 False
[100, 49, 500, 94] -39333 False
[99, 49, 0, 94] -391 False
[99, 48, 0, 94] -391 False
[96, 47, 0, 86] -365 False
[97, 46, 0, 94] -373 False
[99, 45, 0, 96] -391 False
[103, 44, 0, 104] -427 False
[112, 43, -100, 106] -40315 False
[120, 42, -100, 108] -39600 False
[120, 41, -100, 109] -39600 False
[122, 40, -100, 107] -39422 False
[108, 39, -100, 101] -40675 False
[102, 38, -100, 86] -41218 False
[106, 37, 0, 85] -456 False
[97, 36, 0, 86] -373 False
[104, 35, 0, 88] -437 False
[104, 34, 0, 89] -437 False
[105, 33, 0, 92] -446 False
[107, 32, 0, 98] -465 False
[111, 31, 0,

[74, 6, -100, 107] -43800 False
[78, 5, -100, 111] -43426 False
[81, 4, -100, 114] -43147 False
[78, 3, -100, 107] -43426 False
[80, 2, -100, 101] -43240 False
[100, 49, 500, 114] -43147 False
[95, 49, -100, 114] -41856 False
[94, 48, -100, 115] -41948 False
[95, 47, -100, 115] -41856 False
[91, 46, -100, 120] -42223 False
[87, 45, -100, 131] -42591 False
[91, 44, -100, 143] -42223 False
[89, 43, -100, 150] -42407 False
[87, 42, -100, 147] -42591 False
[87, 41, -100, 139] -42591 False
[91, 40, -100, 134] -42223 False
[92, 39, -100, 119] -42131 False
[96, 38, -100, 128] -41765 False
[96, 37, -100, 125] -41765 False
[96, 36, -100, 137] -41765 False
[97, 35, -100, 150] -41673 False
[96, 34, -100, 148] -41765 False
[100, 33, -100, 129] -41400 False
[102, 32, -100, 127] -41218 False
[103, 31, -100, 142] -41127 False
[108, 30, -100, 143] -40675 False
[119, 29, -100, 149] -39689 False
[118, 28, -100, 138] -39778 False
[122, 27, -100, 126] -39422 False
[126, 26, -100, 122] -39068 False
[124, 2

[66, 26, 0, 69] -152 False
[70, 25, 0, 70] -175 False
[69, 24, 0, 66] -169 False
[67, 23, 0, 70] -157 False
[69, 22, 0, 75] -169 False
[69, 21, 0, 70] -169 False
[64, 20, 0, 70] -141 False
[67, 19, 0, 74] -157 False
[65, 18, 0, 72] -146 False
[63, 17, 0, 67] -135 False
[67, 16, 0, 72] -157 False
[66, 15, 0, 75] -152 False
[67, 14, 0, 77] -157 False
[64, 13, 0, 79] -141 False
[64, 12, 0, 80] -141 False
[64, 11, 0, 77] -141 False
[64, 10, 0, 83] -141 False
[64, 9, 0, 89] -141 False
[64, 8, 0, 91] -141 False
[64, 7, 0, 99] -141 False
[62, 6, 0, 99] -130 False
[58, 5, 0, 103] -110 False
[60, 4, -100, 102] -45120 False
[62, 3, -100, 100] -44930 False
[63, 2, 0, 97] -135 False
[100, 49, 500, 110] -163 False
[105, 49, -100, 110] -40946 False
[100, 48, -100, 103] -41400 False
[103, 47, -100, 104] -41127 False
[103, 46, -100, 109] -41127 False
[103, 45, -100, 115] -41127 False
[98, 44, -100, 106] -41582 False
[103, 43, -100, 98] -41127 False
[106, 42, 0, 96] -456 False
[111, 41, 0, 97] -505 Fal

[111, 9, -100, 102] -40405 False
[119, 8, -100, 108] -39689 False
[119, 7, -100, 106] -39689 False
[109, 6, -100, 106] -40585 False
[115, 5, -100, 115] -40046 False
[123, 4, -100, 112] -39333 False
[116, 3, -100, 102] -39957 False
[112, 2, -100, 98] -40315 False
[100, 49, 500, 125] -475 False
[99, 49, -100, 125] -41491 False
[95, 48, -100, 110] -41856 False
[97, 47, -100, 105] -41673 False
[95, 46, -100, 101] -41856 False
[97, 45, -100, 100] -41673 False
[93, 44, -100, 95] -42039 False
[91, 43, 0, 94] -323 False
[85, 42, 0, 98] -276 False
[87, 41, 0, 96] -291 False
[83, 40, 0, 102] -261 False
[86, 39, -100, 103] -42684 False
[83, 38, -100, 103] -42961 False
[83, 37, -100, 107] -42961 False
[100, 36, -100, 105] -41400 False
[97, 35, -100, 106] -41673 False
[105, 34, -100, 104] -40946 False
[105, 33, -100, 99] -40946 False
[109, 32, 0, 98] -485 False
[106, 31, 0, 99] -456 False
[103, 30, 0, 103] -427 False
[98, 29, -100, 106] -41582 False
[96, 28, -100, 107] -41765 False
[107, 27, -100, 

[90, 26, 0, 81] -315 False
[86, 25, 0, 86] -284 False
[81, 24, 0, 90] -247 False
[79, 23, 0, 80] -233 False
[78, 22, 0, 88] -226 False
[73, 21, 0, 92] -193 False
[76, 20, 0, 96] -213 False
[70, 19, 0, 95] -175 False
[70, 18, 0, 98] -175 False
[74, 17, 0, 104] -200 False
[74, 16, -100, 107] -43800 False
[76, 15, -100, 108] -43613 False
[74, 14, -100, 111] -43800 False
[77, 13, -100, 104] -43519 False
[73, 12, -100, 111] -43893 False
[77, 11, -100, 103] -43519 False
[71, 10, -100, 97] -44081 False
[72, 9, 0, 102] -187 False
[71, 8, -100, 104] -44081 False
[72, 7, -100, 96] -43987 False
[68, 6, 0, 92] -163 False
[75, 5, 0, 93] -206 False
[71, 4, 0, 96] -181 False
[64, 3, 0, 98] -141 False
[66, 2, 0, 92] -152 False
[100, 49, 500, 143] -146 False
[98, 49, -100, 143] -41582 False
[98, 48, -100, 143] -41582 False
[104, 47, -100, 138] -41037 False
[107, 46, -100, 133] -40765 False
[112, 45, -100, 140] -40315 False
[115, 44, -100, 142] -40046 False
[112, 43, -100, 141] -40315 False
[103, 42, -1

[121, 17, -100, 107] -39511 False
[110, 16, -100, 103] -40495 False
[111, 15, -100, 102] -40405 False
[104, 14, -100, 107] -41037 False
[107, 13, -100, 106] -40765 False
[105, 12, -100, 100] -40946 False
[114, 11, -100, 97] -40136 False
[115, 10, 0, 91] -546 False
[120, 9, 0, 89] -600 False
[118, 8, 0, 92] -578 False
[122, 7, 0, 96] -622 False
[136, 6, 0, 103] -789 False
[139, 5, -100, 104] -37927 False
[139, 4, -100, 102] -37927 False
[131, 3, -100, 93] -38627 False
[145, 2, 0, 96] -906 False
[100, 49, 500, 62] -827 False
[100, 49, 0, 62] -400 False
[88, 48, 0, 60] -299 False
[91, 47, 0, 60] -323 False
[90, 46, 0, 62] -315 False
[92, 45, 0, 68] -331 False
[89, 44, 0, 63] -307 False
[84, 43, 0, 56] -269 False
[84, 42, 0, 59] -269 False
[79, 41, 0, 55] -233 False
[82, 40, 0, 57] -254 False
[84, 39, 0, 58] -269 False
[84, 38, 0, 61] -269 False
[83, 37, 0, 66] -261 False
[83, 36, 0, 64] -261 False
[79, 35, 0, 66] -233 False
[82, 34, 0, 63] -254 False
[78, 33, 0, 62] -226 False
[78, 32, 0,

IndexError: index 100 is out of bounds for axis 0 with size 100

In [15]:
env.state

[110, 1, 0, 96]

In [16]:
env.action_space

Discrete(1001, start=-500)