In [2]:
from tracemalloc import start
from unicodedata import name
import gym 
from gym import spaces 
from gym.utils import seeding 
import numpy as np 
from simulations import OptionSimulation

ModuleNotFoundError: No module named 'Simulator'

In [3]:
class TradingEnv(gym.Env):
    trading_days = 252 #Number of trading days in one year 
    num_of_shares = 100 #Vanilla options contract size
    
    def __init__(self,num_simulations=int,num_contracts=int,
    multiplier=float,tick_size=float,kappa=float):
        
        """
        Trading Enviroment class with all the modules related 
        to performing trading under a controlled simulation environment.

        Parameters:
            num_simulations: The number of GBM and BS simulations to run for the agent to train on
            num_contracts: The number of contracts the agent will hold.
            multiplier: Float value required for the intensity of the bid-offer spread 
            tick_size: Used for computing the cost relative to the midpoint of the bid-offer spread
            kappa: The risk factor of the portfolio
        """

        self.num_simulations = num_simulations
        self.num_contracts = num_contracts 
        self.multiplier = multiplier 
        self.tick_size = tick_size
        self.kappa = kappa  

        os = OptionSimulation(100,self.num_simulations) 
        
        #can add a maturity term
        self.sim_prices = os.GBM(50,0.1,time_increment=1)
        self.days_to_expiry_normalized = os.ttm/self.trading_days #Only to be used for the calculation of BS call price
        self.days_to_expiry = os.ttm #Creates an array of days left to expiry 
        self.option_price_path, self.option_delta_path = os.BS_call(self.days_to_expiry_normalized,self.sim_prices,100,0.01,0,0)
        
        #Action space (Discrete)
        self.num_actions = self.num_contracts*self.num_of_shares #Number of actions 
        self.action_space = spaces.Discrete(1001,start=-self.num_actions) #Discrete action space 

        if self.num_contracts > 10:
            raise ValueError("The maximum number of contracts in the simulation cannot be more than 10.")

    @classmethod
    def change_base_params(cls,shares=None,days=None):
        cls.num_of_shares = shares 
        cls.trading_days = days 
        print(f'Number of shares per contract changed to {cls.num_of_shares} shares\n')
        print(f'Number of trading days changed to {cls.trading_days} shares\n')
            
            
    def _cost_of_trade(self,n):
        #n: Number of shares 
        cost = self.multiplier * self.tick_size * (np.abs(n) * 0.01*n*n)
        return cost 

    def _wealth_of_trade(self,pt,n):
        #W_{t} = q_{t} - c_{t} (pt: Price of the stock at time 't')
        ct = self._cost_of_trade(n)
        wt = pt - ct 
        return wt 

    def reward(self, pt, n):
        '''
        Computes the reward given to the agent

        Parameters:
            pt: Price at time 't'
            n: Number of shares at time 't'

        Returns: 
            rwd: The reward value from the trade
        '''
        wt = self._wealth_of_trade(pt, n)
        rwd = wt - (self.kappa*0.5)*(wt**2) 
        return rwd

    def take_action(self,ttm,nt):
        '''
        Takes the next action according to the policy

        Parameters: 
            ttm: Time remaining to option's maturity 
            nt: Number of shares held at time 't'
        '''
        return -100 * round(self.delta(ttm)) - nt
    
    def reset(self, path):
        '''
        Resets the environment in order to start a new episode for the simulation

        Parameters:
            path: Path index on which the environment needs to be reset

        Returns: 
            self.state: The state vector of the agent
        '''
        # repeatedly go through available simulated paths (if needed)
        self.t = 0
        self.path = path
        ttm = self.days_to_expiry[0]-1

        n = self.num_of_shares #no of shares
        
        #K : strike price (to be given)

        #sim_prices -> (path,t)
        price =  round(self.sim_prices[self.path,self.t],2)
        self.nt = n #Number of shares at time 't'
        price_ttm = round(self.sim_prices[self.path,ttm],2)
        
        self.state = [price, ttm, self.nt, price_ttm]

        return self.state
    
    def delta(self, ttm):
        #Returns the option delta 
        delta = self.option_delta_path[self.path, ttm]
        return delta

    def step(self,action):
        '''
        Step function to allow the agent to transition into the next state of the episode 

        Parameters: 
            action: The action the agent takes

        Returns: 
            self.state: The state vector of the agent 
            R: The reward value 
            done: Boolean value of whether the episode is over or not
        '''
        self.t = self.t + 1 
        price =  round(self.sim_prices[self.path,self.t])
        self.nt = self.nt + action
        ttm = self.days_to_expiry[self.t] 
        price_ttm = round(self.sim_prices[self.path,ttm])
        
        reward = round(self.reward(price, self.nt)) 
        self.state = [price , ttm, self.nt, price_ttm]
        
        if ttm == 0:
            done = True
        else:
            done = False
        
        return self.state, reward, done

In [4]:
env = TradingEnv(num_simulations=100,num_contracts=5,multiplier=1.0,
    tick_size=0.1,kappa=0.1)

NameError: name 'OptionSimulation' is not defined

In [30]:
env.reset(path = 0)

[100.0, 49, 100, 99.73]

In [27]:
# baseline policy

state = env.reset(0)
for _ in range(50):
    pt, ttm, nt, pt_ttm = state
    # delta = (pt - pt_ttm)
    #print(ttm)
    #print(nt)
    action = env.take_action(ttm, nt)
    #print(action)
    pervious_state = state
    next_state, reward, done = env.step(action)
    state = next_state
    if done:
        break
    print(next_state, reward, done)

[98, 49, -100, 105] -41582 False
[98, 48, -100, 106] -41582 False
[97, 47, -100, 106] -41673 False
[98, 46, -100, 106] -41582 False
[98, 45, -100, 104] -41582 False
[98, 44, -100, 102] -41582 False
[98, 43, -100, 103] -41582 False
[97, 42, -100, 102] -41673 False
[97, 41, -100, 103] -41673 False
[98, 40, -100, 102] -41582 False
[98, 39, -100, 103] -41582 False
[99, 38, -100, 104] -41491 False
[99, 37, -100, 104] -41491 False
[99, 36, -100, 103] -41491 False
[99, 35, -100, 105] -41491 False
[100, 34, -100, 103] -41400 False
[100, 33, -100, 102] -41400 False
[101, 32, -100, 103] -41309 False
[99, 31, -100, 103] -41491 False
[98, 30, -100, 101] -41582 False
[98, 29, -100, 99] -41582 False
[97, 28, 0, 99] -373 False
[98, 27, 0, 99] -382 False
[95, 26, 0, 96] -356 False
[95, 25, 0, 95] -356 False
[96, 24, 0, 95] -365 False
[99, 23, 0, 98] -391 False
[99, 22, 0, 97] -391 False
[99, 21, 0, 98] -391 False
[101, 20, 0, 98] -409 False
[103, 19, 0, 99] -427 False
[103, 18, 0, 101] -427 False
[102

IndexError: index 50 is out of bounds for axis 1 with size 50