# Environment Setup

Import libraries

In [6]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

Global variables

In [7]:
MAX_Money = 1000000

Environment Class
The action is the amount of shares to be sold/bought.(positive for buy, negative for sell)
The state space will contain open,high,low,close,volume divided by the highest price.
Info will have the balance, highest price, current price(the price at which the previous action was taken), time stamp, shares with us, maximum worth till now, minimum brokerage and brokerage rate.

In [10]:
class StockEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    
    def __init__(self,df,**kwargs):
        super(StockEnv,self).__init__()
        
        self.MAX_shares = 1000
        self.Min_Brokerage = 30
        self.Brokerage_rate = 0.001
        
        if "Balance" in kwargs.keys:
            Max_Money = kwargs["Balance"]
        if "Max_Shares" in kwargs.keys:
            self.Max_shares = kwargs["Shares"]
        if "Broke_limit" in kwargs.keys:
            self.Min_Brokerage = kwargs["Broke_limit"]
        if "Broke_rate" in kwargs.keys:
            self.Brokerage_rate = kwargs["Broke_rate"]
        
        self.df = df
        self.action_space = space.Discrete(2*self.MAX_shares+1)
        self.observation_space = space.Box(low=np.array([0,0,0,0,0]),high=np.array([1,1,1,1,1]),dtype=float16)
    
    def _get_price(self):
        return np.random.uniform(self.df.loc[self.current_step,"Open"],self.df.loc[self.current_step,"Close"])
    
    def _observe(self):
        frame = np.array([self.df.loc[self.current_step,'Open'],self.df.loc[self.current_step,'High'],self.df.loc[self.current_step,'Low'],self.df.loc[self.current_step,'Close'],self.df.loc[self.current_step,'Volume'] ])
        frame = frame / self.highest_price
        info = {
            'balance' : self.balance,
            'highest_price': self.highest_price,
            'current_price': self.current_price,
            'time': self.df.loc[self.current_step,'time_stamp'],
            'shares_held': self.shares_held,
            'max_worth': self.max_worth,
            'broke_limit': self.Min_Brokerage,
            'broke_rate': self.Brokerage_rate
        }
        
        return frame, info
        
    def reset(self,balance = MAX_Money,initial_shares = 0):
        self.current_step = np.random.randint(0,len(self.df.loc[:,'Open'].values)-1)
        self.balance = balance
        self.shares_held = initial_shares
        self.current_price = self._get_price() 
        self.net_worth = self.balance + initial_shares*self.current_price
        self.initial_worth = self.net_worth
        self.max_net_worth = self.net_worth
        self.highest_price = self.current_price
        return _observe(self)
    
    def _broke(self,amount):
        return max(amount * self.Brokerage_rate,self.Min_Brokerage)
    
    def _take_action(self,action):
        self.current_price = self._get_price()
        self.highest_price = max(self.highest_price,self.current_price)
        if action < 0:
            # sell
            action = -1*action
            if action > share_held:
                action = share_held
            amount_gained = action*current_price
            broke = self._broke(amount_gained)
            amount_gained -= broke
            if self.balance + amount_gained < 0:
                a1 = np.floor(self.balance/((self.Brokerage_rate-1)*self.current_price))
                action = np.floor(-(self.balance-self.Min_Brokerage)/self.current_price)
                if _broke(a1*self.current_price) == a1*self.current_price*self.Brokerage_rate:
                    action = max(a1,action)
                action = max(action,0)
                amount_gained = action*self.current_price
                amount_gained -= self._broke(amount_gained)
            balance +=amount_gained
            share_held = share_held-action
        elif action>0:
            #buy
            amount_required = self.current_price*action + self._broke(self.current_price*action)
            if amount_required > self.balance:
                a1 = np.floor(self.balance/((self.Brokerage_rate+1)*self.current_price))
                action = np.floor((self.balance-self.Min_Brokerage)/self.current_price)
                if _broke(a1*self.current_price) == a1*self.current_price*self.Brokerage_rate:
                    action = max(a1,action)
                action = max(action,0)
                amount_required = action*self.current_price
                amount_required -= self._broke(amount_required)
            balance -= amount_required
            shared_held += action
        reward = self.balance + self.share_held * self.current_price - self.net_worth
        self.net_worth = self.balance + self.share_held * self.current_price
        if self.net_worth > self.max_net_worth:
            self.max_net_worth = self.net_worth
        return reward
            
    def step(self,action):
        reward = self._take_action(action)
        self.current_step+=1
        if self.current_step > len(self.df.loc[:,'Open'].values)-1:
            self.current_step = 0
        
        done = self.net_worth<=0
        obs, info = self._observe()
        
        return obs, reward, done, info
    
    def render(self, mode='human', close = False):
        profit = self.net_worth - self.initial_worth
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares hold: {self.share_held}')
        print(f'Net Worth:{self.net_worth}')
        print(f'Profit: {profit}')