In [2]:
import gym
from gym import spaces
import numpy as np

class BESSMarketEnv(gym.Env):
    """
    Custom Environment for Battery Energy Storage System (BESS) participating in FCR and aFRR markets.
    The BESS participates in FCR for 4-hour blocks and decides whether to join the aFRR market each hour.
    """
    
    def __init__(self):
        super(BESSMarketEnv, self).__init__()
        
        # Define action space: 0 - Do nothing, 1 - Participate in FCR, 2 - Participate in aFRR
        self.action_space = spaces.Discrete(3)
        
        # Define observation space (State): SoC, Time, Market Prices, Availability for aFRR
        # State of Charge (SoC) [0, 100], Time (Hour) [0, 23], FCR Price, aFRR Price, is in FCR block [0, 1]
        self.observation_space = spaces.Box(low=np.array([0, 0, 0, 0, 0]),
                                            high=np.array([100, 23, 1000, 1000, 1]), dtype=np.float32)
        
        # Initial conditions
        self.state = None
        self.reset()

    def reset(self):
        """
        Reset the environment to an initial state.
        """
        # Initial state: Random initial SoC, starting time, random market prices, and not in an FCR block
        SoC = np.random.uniform(20, 80)
        time = np.random.randint(0, 24)
        FCR_price = np.random.uniform(100, 300)
        aFRR_price = np.random.uniform(50, 200)
        in_FCR_block = 0
        
        self.state = np.array([SoC, time, FCR_price, aFRR_price, in_FCR_block], dtype=np.float32)
        
        return self.state

    def step(self, action):
        """
        Execute one step in the environment based on the selected action.
        """
        SoC, time, FCR_price, aFRR_price, in_FCR_block = self.state
        
        # Update Time
        next_time = (time + 1) % 24
        
        # Set initial reward and define base energy cost (simplification)
        reward = 0
        degradation_cost = 0.1  # Simplified degradation cost per cycle
        
        # Action: 0 - Do nothing, 1 - Participate in FCR, 2 - Participate in aFRR
        if action == 0:
            reward -= degradation_cost  # Do nothing, battery still degrades slowly
        elif action == 1 and in_FCR_block == 0:
            # FCR participation: FCR blocks last 4 hours, earns FCR price * 4
            reward += FCR_price * 4
            in_FCR_block = 4  # Lock into FCR for the next 4 hours
        elif action == 2 and in_FCR_block == 0:
            # aFRR participation: Earn aFRR price per hour
            reward += aFRR_price
        else:
            reward -= 1  # Penalty for invalid action
        
        # Manage SoC (arbitrary increase or decrease, depending on market participation)
        if action == 1 or action == 2:
            SoC = max(0, SoC - 5)  # Example: reduce SoC by 5 units
        
        # Decrement FCR block duration
        if in_FCR_block > 0:
            in_FCR_block -= 1
        
        # Update market prices (random variation)
        FCR_price = max(100, FCR_price + np.random.uniform(-20, 20))
        aFRR_price = max(50, aFRR_price + np.random.uniform(-10, 10))
        
        # New state
        self.state = np.array([SoC, next_time, FCR_price, aFRR_price, in_FCR_block], dtype=np.float32)
        
        # Check if the episode is done (SoC reaches 0 or full, or the day ends)
        done = SoC == 0 or SoC == 100 or next_time == 0
        
        return self.state, reward, done, {}

    def render(self, mode='human'):
        """
        Render the environment's state in a human-readable format.
        """
        SoC, time, FCR_price, aFRR_price, in_FCR_block = self.state
        print(f"Time: {time}, SoC: {SoC}, FCR Price: {FCR_price}, aFRR Price: {aFRR_price}, In FCR Block: {in_FCR_block}")

# Instantiate and run the environment
env = BESSMarketEnv()

# Example usage
state = env.reset()
for _ in range(5):
    action = env.action_space.sample()
    next_state, reward, done, _ = env.step(action)
    env.render()
    if done:
        break


Time: 4.0, SoC: 64.27363586425781, FCR Price: 146.31324768066406, aFRR Price: 168.3808135986328, In FCR Block: 0.0
Time: 5.0, SoC: 59.27363586425781, FCR Price: 157.52667236328125, aFRR Price: 175.0477294921875, In FCR Block: 0.0
Time: 6.0, SoC: 54.27363586425781, FCR Price: 152.75933837890625, aFRR Price: 181.9376678466797, In FCR Block: 3.0
Time: 7.0, SoC: 54.27363586425781, FCR Price: 153.30026245117188, aFRR Price: 173.63999938964844, In FCR Block: 2.0
Time: 8.0, SoC: 49.27363586425781, FCR Price: 135.58494567871094, aFRR Price: 175.4073944091797, In FCR Block: 1.0


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
