## Imports

In [1]:
import numpy as np
from stable_baselines3 import SAC
from envs.bess_env import BatteryEnv

## Test price data

In [2]:
# Dummy price series (â‚¬/MWh), 7 days with hourly resolution
price = np.random.uniform(20, 120, 24 * 7).astype(np.float32)

## Create environment

In [3]:
# Create the BESS environment
env = BatteryEnv(
    price,
    dt_hours=1.0,                 # simulation time step in hours
    capacity_kWh=100.0,           # battery energy capacity in kWh
    p_max_kW=50.0,                # max charge/discharge power in kW
    price_unit="EUR_per_MWh",     # defines unit of market prices
    price_sigma_rel=0.10,         # 10% price forecast uncertainty
    use_simple_cycle_count=True,  # use simplified Equivalent Full Cycle calculation
    deg_cost_per_EFC=120.0,       # degradation cost per Equivalent Full Cycle (EUR)
)

env

<envs.bess_env.BatteryEnv at 0x1e514f157d0>

## Reset and inspect initial state

In [4]:
# Reset environment and display initial observation
obs, info = env.reset()
print("Initial observation:", obs)
print("Info:", info)

Initial observation: [0.4398338  1.         0.         1.         0.         1.
 0.69456846 0.         0.        ]
Info: {}


## Step the environment manually

In [5]:
# Take a random action to test the transition logic
action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)

print("Action taken:", action)
print("New observation:", obs)
print("Reward:", reward)
print("Terminated:", terminated)
print("Info:", info)

Action taken: [-37.2226]
New observation: [ 1.0000000e-01  1.0000000e+00  2.5881904e-01  9.6592581e-01
  7.1725855e-04  9.9999976e-01  7.0717865e-01  0.0000000e+00
 -7.4445200e-01]
Reward: -30.517530910119163
Terminated: False
Info: {'price_true': 80.36727905273438, 'revenue_eur': 2.9914790032751624, 'deg_cost_eur': 23.509009913394326, 'penalty_eur': -10.0, 'efc_cum': 0.16991689671056603}


## Render a few steps

In [6]:
# Render a few steps to inspect environment behavior
obs, info = env.reset()
for _ in range(5):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    env.render()

t=   1  SOC=0.774  SoH=1.000  EFC_cum= 0.089  last_a= 18.72 kW
t=   2  SOC=0.869  SoH=1.000  EFC_cum= 0.136  last_a=  9.99 kW
t=   3  SOC=0.781  SoH=1.000  EFC_cum= 0.181  last_a= -8.41 kW
t=   4  SOC=0.799  SoH=1.000  EFC_cum= 0.190  last_a=  1.90 kW
t=   5  SOC=0.551  SoH=1.000  EFC_cum= 0.314  last_a=-23.58 kW


## One-shot SAC Training Test

In [7]:
# Create a SAC agent for quick testing
model = SAC("MlpPolicy", env, verbose=1)

# Train for a small number of steps to verify learning loop
model.learn(total_timesteps=2000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 168       |
|    ep_rew_mean     | -1.86e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 52        |
|    time_elapsed    | 12        |
|    total_timesteps | 672       |
| train/             |           |
|    actor_loss      | 31.4      |
|    critic_loss     | 17.3      |
|    ent_coef        | 0.888     |
|    ent_coef_loss   | -0.0031   |
|    learning_rate   | 0.0003    |
|    n_updates       | 571       |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 168       |
|    ep_rew_mean     | -1.07e+03 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 45        |
|    time_elapsed    | 29        |
|    total_timesteps | 1344    

<stable_baselines3.sac.sac.SAC at 0x1e516b8ae10>

## Let the trained agent act for a few steps

In [8]:
obs, info = env.reset()

for _ in range(5):
    action, _ = model.predict(obs, deterministic=False)
    obs, reward, terminated, truncated, info = env.step(action)
    env.render()

t=   1  SOC=0.345  SoH=1.000  EFC_cum= 0.028  last_a= -5.37 kW
t=   2  SOC=0.332  SoH=1.000  EFC_cum= 0.035  last_a= -1.25 kW
t=   3  SOC=0.342  SoH=1.000  EFC_cum= 0.040  last_a=  1.02 kW
t=   4  SOC=0.317  SoH=1.000  EFC_cum= 0.052  last_a= -2.34 kW
t=   5  SOC=0.368  SoH=1.000  EFC_cum= 0.078  last_a=  5.40 kW
