Importing dependencies and creating custom environment

In [None]:
!pip install stable-baselines3 gymnasium

Collecting stable-baselines3
  Downloading stable_baselines3-2.7.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (

In [None]:
import time
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.evaluation import evaluate_policy           #helper function to evaluate the agent
from stable_baselines3.common.env_util import make_vec_env

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [None]:
import gymnasium as gym

In [None]:
# Redefine the environment with the correct import for spaces
class EnergyFlexEnv(gym.Env):
    def __init__(self, data_df, max_steps=8760): #1 year is 8760 hours
        super(EnergyFlexEnv, self).__init__()

        self.data = data_df.reset_index(drop=True)
        self.max_steps = min(max_steps, len(self.data))

        #battery specs
        self.battery_capacity = 300 #kWh
        self.max_charge_rate = 100 #charge/discharge power limit
        self.efficiency = 0.9 #roundtrip efficiency
        self.initial_soc = 0.5  # assuming start with 50% charge

        #action space with 3 actions
        #0: charge aggressively when costs are very low,
        #1: charge conservatively when costs are low,
        #2: when costs are average stay idle
        #3: discharge conservatively when costs are high
        #4: discharge aggressively when costs are very high
        self.action_space = spaces.Discrete(5) #

        #all the SoC, grid import, price and hour are normalized from 0-1
        self.observation_space = spaces.Box(
            low=np.array([0.0, 0.0, 0.0, 0.0]),
            high=np.array([1.0, 1.0, 1.0, 1.0]),
            dtype=np.float32
        )
        #action mapping: +ve = charge, -ve = discharge
        self.action_map = {
            0:100,
            1:50,
            2:0,
            3:-50,
            4:-100
        }

        #action descriptions
        self.action_names = {
            0:'charge_agr(low cost)',
            1:'charge_cons(low cost)',
            2:'idle',
            3:'discharge_cons(high cost)',
            4:'discharge_agr(high cost)'
        }

        #Initialize
        self.reset()

    def reset(self, seed=None):
        ''' reseting environment to initial state'''
        super().reset(seed=seed)
        self.current_step = 0
        self.soc = self.initial_soc #battery state of charge
        self.total_cost = 0
        self.done = False

        return self._get_state(), {}

    def _get_state(self):
        '''current obeservation state'''
        if self.current_step >= len(self.data):
            #if run out of data, return the last valid state
            row = self.data.iloc[-1]
        else:
            row = self.data.iloc[self.current_step]

        state = np.array([
            self.soc,                           # Battery SoC (0-1)
            row['grid_import_kw'],              # Normalized grid import
            row['export_price_p_kwh'],          # Normalized ToU price
            row['hour_norm']                    # Normalized hour
        ], dtype=np.float32)

        return state

    def step(self,action):
        '''execute one timestep'''
        if self.done:
            return self._get_state(), 0, True, {}

        #Get current data
        row = self.data.iloc[self.current_step]

        #action to battery power(kW)
        battery_power = self.action_map[action]

        #battery constraints
        battery_power = self._apply_battery_constraints(battery_power)

        #update battery SoC
        if battery_power > 0:
            # Charging
            energy_stored = battery_power * self.efficiency  # Account for charging losses
            self.soc += energy_stored / self.battery_capacity
        else:  # Discharging or idle
            energy_discharged = abs(battery_power) / self.efficiency  # Account for discharging losses
            self.soc -= energy_discharged / self.battery_capacity

        # Ensure SoC stays within bounds
        self.soc = np.clip(self.soc, 0.0, 1.0)

        # Calculate costs
        grid_import_original = row['grid_import_kw_original']  # kW
        price_original = row['export_price_p_kwh_original']    # p/kWh

        # Net grid import after battery action
        net_grid_import = grid_import_original + battery_power  # kW
        net_grid_import = max(0, net_grid_import)  # Can't export in this simple model

        # Cost calculation
        hourly_cost = (net_grid_import * price_original) / 100  # Convert pence to pounds
        self.total_cost += hourly_cost

        # Reward: negative cost (we want to minimize cost)
        reward = -hourly_cost

        # Move to next step
        self.current_step += 1
        self.done = self.current_step >= self.max_steps

        # Additional info
        info = {
            'soc': self.soc,
            'battery_power': battery_power,
            'action_name': self.action_names[action],
            'net_grid_import': net_grid_import,
            'hourly_cost': hourly_cost,
            'total_cost': self.total_cost,
            'price_p_kwh': price_original
        }

        return self._get_state(), reward, self.done, info

    def _apply_battery_constraints(self, desired_power):
        """Apply physical battery constraints"""
        # Power limits
        constrained_power = np.clip(desired_power, -self.max_charge_rate, self.max_charge_rate)

        # SoC limits
        if constrained_power > 0:  # Charging
            max_charge_power = (1.0 - self.soc) * self.battery_capacity  # kWh remaining
            constrained_power = min(constrained_power, max_charge_power)
        else:  # Discharging
            max_discharge_power = self.soc * self.battery_capacity  # kWh available
            constrained_power = max(constrained_power, -max_discharge_power)

        return constrained_power

# Instantiate the environment with the training data again
train_env = EnergyFlexEnv(data_df=train_df)

# Check the environment again
try:
    check_env(train_env, warn=True)
    print("Environment check passed!")
except Exception as e:
    print(f"Environment check failed: {e}")

Environment check failed: The `step()` method must return five values: obs, reward, terminated, truncated, info. Actual: 4 values returned.


  gym.logger.warn(
  gym.logger.warn(
