In [1]:
from tracemalloc import start
from unicodedata import name
import gym 
from gym import spaces 
from gym.utils import seeding 
import numpy as np 
from Simulator.simulations import OptionSimulation

In [211]:
class TradingEnv(gym.Env):
    trading_days = 252 #Number of trading days in one year 
    num_of_shares = 100 #Vanilla options contract size
    """
    Trading Enviroment class with all the modules related 
    to performing trading under a controlled simulation environment.

    Parameters:
        num_simulations: The number of GBM and BS simulations to run for the agent to train on
        num_contracts: The number of contracts the agent will hold.
        multiplier: Float value required for the intensity of the bid-offer spread 
        tick_size: Used for computing the cost relative to the midpoint of the bid-offer spread
        kappa: The risk factor of the portfolio
    """

    def __init__(self,num_simulations=int,num_contracts=int,
    multiplier=float,tick_size=float,kappa=float):

        self.num_simulations = num_simulations
        self.num_contracts = num_contracts 
        self.multiplier = multiplier 
        self.tick_size = tick_size
        self.kappa = kappa  

        os = OptionSimulation(100,self.num_simulations) 

        self.sim_prices = os.GBM(50,5,0.5,time_increment=1)
        self.days_to_expiry = os.ttm #Creates an array of days left to expiry 
        self.option_price_path, self.option_delta_path = os.BS_call(self.days_to_expiry,self.sim_prices,100,0.05,0,0)

        #Action space (Discrete)
        self.num_actions = self.num_contracts*self.num_of_shares #Number of actions 
        self.action_space = spaces.Discrete(1001,start=-self.num_actions) #Discrete action space 

        if self.num_contracts > 10:
            raise ValueError("The maximum number of contracts in the simulation cannot be more than 10.")

    def _cost_of_trade(self,n):
        #n: Number of shares 
        cost = self.multiplier * self.tick_size * (np.abs(n) * 0.01*n*n)
        return cost 

    def _wealth_of_trade(self,pt,n):
        #W_{t} = q_{t} - c_{t} (pt: Price of the stock at time 't')
        ct = self._cost_of_trade(n)
        wt = pt - ct 
        return wt 

    def reward(self, pt, n):
        '''
        Computes the reward given to the agent

        Parameters:
            pt: Price at time 't'
            n: Number of shares at time 't'
        '''
        wt = self._wealth_of_trade(pt, n)
        rwd = wt - (self.kappa*0.5)*(wt**2) 
        return rwd 
    
    def reset(self, path):
        # repeatedly go through available simulated paths (if needed)
        self.t = 0
        self.path = path
        ttm = self.days_to_expiry[0]-1

        nt = 100 #no of shares
        #print(self.sim_prices[self.path])
        #print(self.sim_prices[self.path,0])
        #print(self.option_price_path[self.path,self.t])

        price =  round(self.sim_prices[self.path,self.t])
        action = - round(100*(self.option_delta_path[self.path, self.t])) - nt
        nt = action
        
        self.state = [price , action, ttm, nt]

        return self.state

    def step(self,action):
        
        self.t = self.t + 1 
        price =  round(self.sim_prices[self.path,self.t])
        nt = action
        ttm = self.days_to_expiry[self.t] 
        action = - 100*round(self.option_delta_path[self.path, self.t]) - nt
        R = self.reward(price, nt)
        nt = action 
        
        self.state = [price , action, ttm, nt]
        if ttm == self.t+1:
            done = True
        else:
            done = False
        
        return self.state, R, done


In [212]:
if __name__ == "__main__":
    env = TradingEnv(num_simulations=100,num_contracts=5,multiplier=1.0,
    tick_size=0.1,kappa=0.1)

In [213]:
env = TradingEnv(num_simulations=100,num_contracts=5,multiplier=1.0,
    tick_size=0.1,kappa=0.1)

In [214]:
env.reset(path = 1)

[100, -165, 249, -165]

In [215]:
state = env.reset(1)
for i in range(10):
    action = state[1]
    state, reward, done = env.step(action)
    print(state, reward, done)

[98, 65, 249, 65] -969810.85078125 False
[97, -165, 248, -165] -1755.15703125 False
[99, 65, 247, 65] -969370.48828125 False
[110, -165, 246, -165] -1519.6945312500002 False
[114, 65, 245, 65] -962777.05078125 False
[116, -165, 244, -165] -1416.71953125 False
[107, 65, 243, 65] -965851.1882812501 False
[99, -165, 242, -165] -1717.83203125 False
[97, 65, 241, 65] -970251.3132812501 False
[104, -165, 240, -165] -1626.26953125 False
