In [9]:
from tracemalloc import start
from unicodedata import name
import gym 
from gym import spaces 
from gym.utils import seeding 
import numpy as np 
from simulations import OptionSimulation




In [98]:
def hello():
    if 5 < 7:
        pass
    print('ok')

In [10]:
action = spaces.Discrete(1001,start=-100) #Discrete action space 

In [93]:
action.n

1001

In [11]:
from tracemalloc import start
from unicodedata import name
import gym 
from gym import spaces 
from gym.utils import seeding 
import numpy as np 
from simulations import OptionSimulation


class TradingEnv(gym.Env):
    trading_days = 252 #Number of trading days in one year 
    num_of_shares = 100 #Vanilla options contract size
    
    def __init__(self,total_episodes=int,num_contracts=int,
    multiplier=float,tick_size=float,kappa=float):
        
        """
        Trading Enviroment class with all the modules related 
        to performing trading under a controlled simulation environment.

        Parameters:
            num_simulations: The number of GBM and BS simulations to run for the agent to train on
            num_contracts: The number of contracts the agent will hold.
            multiplier: Float value required for the intensity of the bid-offer spread 
            tick_size: Used for computing the cost relative to the midpoint of the bid-offer spread
            kappa: The risk factor of the portfolio
        """

        self.total_episodes = total_episodes
        self.num_contracts = num_contracts 
        self.multiplier = multiplier 
        self.tick_size = tick_size
        self.kappa = kappa  

        os = OptionSimulation(100,self.total_episodes) 
        
        #can add a maturity term
        self.sim_prices = os.GBM(50,0.1,time_increment=1)
        self.days_to_expiry_normalized = os.ttm/self.trading_days #Only to be used for the calculation of BS call price
        self.days_to_expiry = os.ttm #Creates an array of days left to expiry 
        self.option_price_path, self.option_delta_path = os.BS_call(self.days_to_expiry_normalized,self.sim_prices,100,0.01,0,0)
        
        #Action space (Discrete)
        self.num_actions = self.num_contracts*self.num_of_shares #Number of actions 
        self.action_range = (self.num_actions * 2)+1 
        self.action_space = spaces.Discrete(self.action_range,start=-self.num_actions) #Discrete action space 

        if self.num_contracts > 10:
            raise ValueError("The maximum number of contracts in the simulation cannot be more than 10.")

    @classmethod
    def change_base_params(cls,shares=None,days=None):
        cls.num_of_shares = shares 
        cls.trading_days = days 
        print(f'Number of shares per contract changed to {cls.num_of_shares} shares\n')
        print(f'Number of trading days changed to {cls.trading_days} shares\n')
            

    def _cost_of_trade(self,n):
        #n: Number of shares 
        cost = self.multiplier * self.tick_size * (np.abs(n) * 0.01*n*n)
        return cost 

    def _wealth_of_trade(self,pt,n):
        #W_{t} = q_{t} - c_{t} (pt: Price of the stock at time 't')
        ct = self._cost_of_trade(n)
        wt = pt - ct 
        return wt 

    def reward(self, pt, n):
        '''
        Computes the reward given to the agent

        Parameters:
            pt: Price at time 't'
            n: Number of shares at time 't'

        Returns: 
            rwd: The reward value from the trade
        '''
        wt = self._wealth_of_trade(pt, n)
        rwd = wt - (self.kappa*0.5)*(wt**2) 
        return rwd 
        print('GG')

    def take_action(self,ttm,nt):
        '''
        Takes the next action according to the policy

        Parameters: 
            ttm: Time remaining to option's maturity 
            nt: Number of shares held at time 't'
        '''
        return -100 * round(self.delta(ttm)) - nt    
    
    def reset(self, episode):
        '''
        Resets the environment in order to start a new episode for the simulation

        Parameters:
            path: The path the agent is following 

        Returns: 
            self.state: The state vector of the agent
        '''
        # repeatedly go through available simulated paths (if needed)
        self.t = 0 #Time point within the time series 
        self.path = episode #The time series of reference 
        # _ttm_index = self.days_to_expiry[0]-1
        ttm = self.days_to_expiry[0]

        price =  round(self.sim_prices[self.path,self.t])
        self.nt = self.num_of_shares #Number of shares at time 't'
        # price_ttm = round(self.sim_prices[self.path,ttm])
        
        self.state = [price, ttm, self.nt]

        return self.state
    
    def delta(self, ttm):
        #Returns the option delta 
        delta = self.option_delta_path[self.path, ttm-1] #-1 due to Python indexing 
        return delta

    def step(self,action):
        '''
        Step function to allow the agent to transition into the next state of the episode 

        Parameters: 
            action: The action the agent takes

        Returns: 
            self.state: The state vector of the agent 
            R: The reward value 
            done: Boolean value of whether the episode is over or not
        '''
        self.t = self.t + 1 
        price =  round(self.sim_prices[self.path,self.t],2)
        self.nt = self.nt + action
        ttm = self.days_to_expiry[self.t]
        # price_ttm = round(self.sim_prices[self.path,ttm],2)
        
        reward = round(self.reward(price, self.nt)) 
        self.state = [price, ttm, self.nt]

        if ttm == 0:
            done = 1
            return self.state, reward, done 
        else:
            done = 0
    
        return self.state, reward, done 
        
        # if ttm == 0 & self.path == (self.num_simulations):
        #     done = 1 #1 = True 
        # elif ttm == 0:
        #     episode = self.path
        #     self.reset(episode)
        #     done = 0 #0 = False 
        # else:
        #     done = 0
        return self.state, reward, done

In [12]:
env = TradingEnv(total_episodes=100,num_contracts=5,multiplier=1.0,
    tick_size=0.1,kappa=0.1)

In [13]:
env.days_to_expiry

array([50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34,
       33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
       16, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0])

In [14]:
env.sim_prices.shape

(100, 51)

In [15]:
np.arange(50,-1,-1)

array([50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34,
       33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
       16, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0])

In [6]:
env.reset(0)

[100, 50, 100]

In [7]:
# env.take_action(50,100)
import numpy as np
nums = [1,2,3]
state = np.reshape(nums, [1,3])

In [8]:
state

array([[1, 2, 3]])

In [19]:
# baseline policy

state = env.reset(0)
for _ in range(50):
    pt, ttm, nt = state
    # delta = (pt - pt_ttm)
    #print(ttm)
    #print(nt)
    action = env.take_action(ttm, nt)
    #print(action)
    pervious_state = state
    next_state, reward, done = env.step(action)
    state = next_state 
    print(next_state, reward, done)

[101.21, 49, -100] -41290 0
[101.04, 48, -100] -41305 0
[101.2, 47, -100] -41291 0
[101.54, 46, -100] -41260 0
[102.08, 45, -100] -41211 0
[102.13, 44, -100] -41206 0
[102.13, 43, -100] -41206 0
[104.31, 42, -100] -41009 0
[106.28, 41, -100] -40830 0
[105.45, 40, -100] -40906 0
[108.23, 39, -100] -40654 0
[108.71, 38, -100] -40611 0
[108.68, 37, -100] -40614 0
[109.76, 36, -100] -40517 0
[110.1, 35, -100] -40486 0
[111.24, 34, -100] -40383 0
[111.01, 33, -100] -40404 0
[110.96, 32, -100] -40409 0
[111.35, 31, -100] -40374 0
[110.09, 30, -100] -40487 0
[108.89, 29, -100] -40595 0
[109.35, 28, -100] -40554 0
[108.1, 27, -100] -40666 0
[107.43, 26, -100] -40727 0
[107.9, 25, -100] -40684 0
[106.7, 24, -100] -40793 0
[106.76, 23, -100] -40787 0
[107.17, 22, -100] -40750 0
[108.4, 21, -100] -40639 0
[109.86, 20, -100] -40508 0
[109.92, 19, -100] -40502 0
[109.44, 18, -100] -40545 0
[109.69, 17, -100] -40523 0
[110.43, 16, -100] -40456 0
[110.84, 15, -100] -40419 0
[110.31, 14, -100] -40467 

In [2]:
import numpy as np

np.zeros((64,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0