In [2]:
class Inventory():
    def __init__(self, starting_cash) -> None:
        self.profit = 0
        self.last_buy_price = None
        self.last_sell_price = None
        self.cash = starting_cash
        self.strategy_vol = 0

In [47]:
import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.registration import register

import numpy as np
import pandas as pd

class TradeEnv(gym.Env):
    
    def __init__(self, input_dim=6, window_size=5, max_steps=1000, df=None, inventory=Inventory(1000)):
        super(TradeEnv, self).__init__()
        self.window_size=window_size
        self.action_space = spaces.Discrete(3) 
        self.observation_space = spaces.Box(low=-np.ones(input_dim) * np.inf, high=np.ones(input_dim) * np.inf, shape=self.window_size, dtype=np.float64)
        

        self.df = df
        self.position = 0

        self.current_step = 0
        self.max_steps = max_steps  # episode terminates when day ends

        self.state = None

        self.hold_counter = 0
        self.impossible_sell_counter = 0
        self.impossible_buy_counter = 0

        self.current_price = 0

        self.inventory = inventory
        

    def step(self, action):
        self.current_step += 1
        self.df_counter += 1
        done = self.current_step == self.max_steps

        reward = self.calculate_pnl(action)
        self.state = self.update_state(self.df[self.df_counter])

        info = {"Profit": self.inventory["Profit"]}

        return self.state, reward, done, False, info
    
    def update_state(self, next_state):
        self.state = next_state
        return self.state
    

    def reset(self, seed=None):
        super().reset(seed=seed)
        self.__init__(self.input_dim, self.max_steps, self.initial_cash, self.df)
        self.df_counter = 0
        self.state = self.df[self.df_counter]

        info = {}

        return self.state, info
    
    def calculate_pnl(self, action):
        self.current_price = self.state[0]  # Assuming first element is the current price

        # Parameters for reward calculation
        buy_penalty = -0.1   # Small penalty for buying
        sell_reward_base = 1  # Base reward for selling
        hold_penalty_base = -0.01  # Base penalty for holding

        if action == 0:  # Buy
            if self.inventory["Holdings"] == 0:
                buy_quantity = self.inventory["Cash Balance"] / self.current_price
                self.inventory['Cash Balance'] = 0
                self.inventory["Holdings"] = buy_quantity
                self.inventory["Last Buy Price"] = self.current_price
                reward = buy_penalty
            else:
                # Penalty for trying to buy when already holding
                reward = -1

        elif action == 1:  # Sell
            if self.inventory["Holdings"] > 0:
                self.inventory["Cash Balance"] += self.inventory["Holdings"] * self.current_price
                profit = self.inventory["Cash Balance"] - (self.inventory["Holdings"] * self.inventory["Last Buy Price"])
                self.inventory["Holdings"] = 0
                self.inventory["Last Sell Price"] = self.current_price
                self.inventory['Profit'] += profit
                # Reward based on profit, with a base reward for selling
                reward = sell_reward_base + max(profit, 0)
            else:
                # Penalty for trying to sell when not holding
                reward = -1

        elif action == 2:  # Hold
            # Increasing penalty for holding, capped at a maximum value
            self.hold_counter += 1
            reward = max(hold_penalty_base * self.hold_counter, -1)
        
        print(self.inventory)

        return reward


    # def calculate_pnl(self, action):

    #     self.current_price = self.state[0]  # select open price for OHLCVT data

    #     if action == 0: # buy
    #         if self.inventory["Holdings"] == 0:   # need to have empty inventory to buy
    #             buy_quantity = self.inventory["Cash Balance"] / self.current_price
    #             self.inventory['Cash Balance'] = 0
    #             self.inventory["Holdings"] = buy_quantity
    #             self.hold_counter = 0
    #             self.inventory["Last Buy Price"] = self.current_price
    #             reward = 1
    #         else:
    #             self.impossible_buy_counter += 1
    #             reward = -self.impossible_buy_counter

    #     elif action == 1: # sell
    #         if self.inventory["Holdings"] > 0:  # need to own in order to sell
    #             self.inventory["Cash Balance"] += self.inventory["Holdings"] * self.current_price
    #             profit = self.inventory["Cash Balance"] - self.inventory["Holdings"] * self.inventory["Last Buy Price"]

    #             if profit > 0:
    #                 reward = np.exp(profit)
    #             else:
    #                 reward = 10 * profit

    #             self.inventory["Holdings"] = 0
    #             self.inventory["Last Sell Price"] = self.current_price
    #             self.hold_counter = 0
    #             self.inventory['Profit'] += profit
               
                
                
    #         else:
    #             self.impossible_sell_counter += 1
    #             reward = -self.impossible_sell_counter

    #     elif action == 2: # hold
    #         self.hold_counter += 1
    #         if self.hold_counter == 20:
    #             reward = -100
    #             self.hold_counter = 0
    #         reward = 0

        print(self.inventory)

        return reward