In [None]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import PPO # Library to train AI models in reinforcement learning  


class TradinEnv_v1(gym.Env):
    """
    Custom Trading Environment for Reinforcement Learning.

    The agent can take actions (buy, sell, hold) based on market indicators.
    The goal is to maximize profit while using stop-loss and take-profit levels.
    """
    def __init__(self, df):
        """
        Initialize the trading environment.

        Parameters:
        df (pd.DataFrame): Historical market data with indicators.
        """
        super(TradinEnv_v1, self).__init__()

        # Store the dataset and set initial step 
        self.df = df
        self.current_step = 0 
        self.balance = 1000 # Initial capital
        self.position = None # No active trade   
        self.pending_orders = []  # Store multiple pending orders

        # Define action space: 3 possible actions (0 = Hold, 1 = Buy, 2 = Sell)
        self.action_space = spaces.Discrete(3)

        # Define the observation space: all columns of the dataset
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(df.columns),), dtype=np.float32
        )

    def reset(self): 
        """
        Reset the environment at the start of a new episode.

        Returns:
        np.array: First observation (market indicators at step 0)
        """
        self.current_step = 0
        #self.balance = 1000
        self.position = None
        self.pending_orders = []
        return self._next_observation()

    def _next_observation(self):
        """
        Retrieve the current market state.

        Returns:
        np.array: Market indicators at the current step.
        """
        return self.df.iloc[self.current_step].values

    def step(self, action):
        """
        Execute an action (buy, sell, hold) and update the environment.

        Parameters:
        action (int): 0 (Hold), 1 (Buy), 2 (Sell)

        Returns:
        tuple: (new observation, reward, done flag, info)
        """
        current_price = self.df.iloc[self.current_step]['close']
        high = self.df.iloc[self.current_step]['high']
        low = self.df.iloc[self.current_step]['low']

        reward = 0
        done = False

        # Handle new actions
        if action == 1:  # Buy limit order
            self.position = "long"
            print(f"step ceck: \n Position: {self.position}, action: {action}, current step: {self.current_step}")
            self._place_order("buy", high)
        elif action == 2:  # Sell limit order
            self.position = "short"
            print(f"step ceck: step ceck: Position: {self.position}, action: {action}, current step: {self.current_step}")
            self._place_order("sell", low)
        # Process existing pending orders
        self._execute_pending_orders(current_price, high, low)
        if action == 0: 
            self.position = None


        # Move to the next step
        self.current_step += 1
        if self.current_step >= len(self.df) - 1:
            done = True
        
        return self._next_observation(), reward, done, {}

    def _place_order(self, order_type, entry_price):
        """
        Place a limit order.

        Parameters:
        order_type (str): 'buy' or 'sell'.
        entry_price (float): Price at which the order will be executed.
        """
        order = {
            "type": order_type,
            "entry_price": entry_price,
            "step_placed": self.current_step
        }
        self.pending_orders.append(order)

    def _execute_pending_orders(self, current_price, high, low):
        """
        Execute any pending limit orders if the price condition is met.

        Parameters:
        current_price (float): Current market price.
        high (float): Current high price.
        low (float): Current low price.
        """
        executed_orders = []

        for order in self.pending_orders:
            if order["type"] == "buy" and current_price >= order["entry_price"]: #substitute with HIgh ------------------
                executed_orders.append(order)
                self.balance +=  (self.balance*( (current_price - order["entry_price"])/current_price ) )    
                print(f"Executed orders = {executed_orders}") #TBR 
                print(f"Price exectued - Entry price = {current_price} - {order["entry_price"]}")#TBR
            elif order["type"] == "sell" and current_price <= order["entry_price"]:
                executed_orders.append(order)
                self.balance +=  (self.balance*( -(current_price - order["entry_price"])/current_price ) ) #substitute with low ------------------
                print(f"executed orders = {executed_orders}") #TBR 
                print(f" Price exectued - Entry price = {current_price} - {order["entry_price"]}") #TBR
        # Remove executed orders from the list
        self.pending_orders = [o for o in self.pending_orders if o not in executed_orders]
        print(f"pending orders = {self.pending_orders} \n")#TBR

    def render(self):
        """
        Print the current state of the environment (for debugging).
        """
        print(f"Step: {self.current_step}, Balance: {self.balance}, Position: {self.position}, Pending Orders: {self.pending_orders}")

# This class compared to the older one adds the element of "future", meaning that the order instead of being
# immediately executed, is opened as a pending order, and then executed at a later time. This code introduces
# limit orders by introducing the term self.pending_order["step_placed"] = self.current_step. This code stores the
# current step in a given place.


In [7]:
TradinEnv_v1._place_order(order_type= "buy", entry_price=25, stop_price=20)


TypeError: TradinEnv_v1._place_order() missing 1 required positional argument: 'self'