## Imports

In [1]:
import numpy as np
from stable_baselines3 import SAC
from envs.bess_env import BatteryEnv

## Test price data

In [2]:
# Dummy price series (â‚¬/MWh), 7 days with hourly resolution
price = np.random.uniform(20, 120, 24 * 7).astype(np.float32)

## Create environment

In [3]:
# Create the BESS environment
env = BatteryEnv(
    price,
    dt_hours=1.0,                 # simulation time step in hours
    capacity_kWh=100.0,           # battery energy capacity in kWh
    p_max_kW=50.0,                # max charge/discharge power in kW
    price_unit="EUR_per_MWh",     # defines unit of market prices
    price_sigma_rel=0.10,         # 10% price forecast uncertainty
    use_simple_cycle_count=True,  # use simplified Equivalent Full Cycle calculation
    deg_cost_per_EFC=120.0,       # degradation cost per Equivalent Full Cycle (EUR)
)

env

<envs.bess_env.BatteryEnv at 0x24769b90a90>

## Reset and inspect initial state

In [4]:
# Reset environment and display initial observation
obs, info = env.reset()
print("Initial observation:", obs)
print("Info:", info)

Initial observation: [0.50672317 1.         0.         1.         0.         1.
 0.46249595 0.         0.        ]
Info: {}


## Step the environment manually

In [5]:
# Take a random action to test the transition logic
action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)

print("Action taken:", action)
print("New observation:", obs)
print("Reward:", reward)
print("Terminated:", terminated)
print("Info:", info)

Action taken: [39.909637]
New observation: [8.8586473e-01 9.9810427e-01 2.5881904e-01 9.6592581e-01 7.1725855e-04
 9.9999976e-01 5.1815915e-01 0.0000000e+00 7.9819274e-01]
Reward: -24.99588288311847
Terminated: False
Info: {'price_true': 56.31195068359375, 'revenue_eur': -2.247389535950497, 'deg_cost_eur': 22.748493347167972, 'penalty_eur': 0.0, 'efc_cum': 0.18957077789306642}


## Render a few steps

In [6]:
# Render a few steps to inspect environment behavior
obs, info = env.reset()
for _ in range(5):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    env.render()

t=   1  SOC=0.900  SoH=0.998  EFC_cum= 0.212  last_a= 47.98 kW
t=   2  SOC=0.861  SoH=0.998  EFC_cum= 0.231  last_a= -3.71 kW
t=   3  SOC=0.479  SoH=0.996  EFC_cum= 0.422  last_a=-36.26 kW
t=   4  SOC=0.853  SoH=0.994  EFC_cum= 0.609  last_a= 39.34 kW
t=   5  SOC=0.900  SoH=0.994  EFC_cum= 0.632  last_a= 10.08 kW


## One-shot SAC Training Test

In [7]:
# Create a SAC agent for quick testing
model = SAC("MlpPolicy", env, verbose=1)

# Train for a small number of steps to verify learning loop
model.learn(total_timesteps=2000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 168       |
|    ep_rew_mean     | -1.66e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 48        |
|    time_elapsed    | 13        |
|    total_timesteps | 672       |
| train/             |           |
|    actor_loss      | 26.6      |
|    critic_loss     | 15.9      |
|    ent_coef        | 0.875     |
|    ent_coef_loss   | -0.0735   |
|    learning_rate   | 0.0003    |
|    n_updates       | 571       |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 168       |
|    ep_rew_mean     | -1.34e+03 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 46        |
|    time_elapsed    | 29        |
|    total_timesteps | 1344    

<stable_baselines3.sac.sac.SAC at 0x2476acd1b90>

## Let the trained agent act for a few steps

In [8]:
obs, info = env.reset()

for _ in range(5):
    action, _ = model.predict(obs, deterministic=False)
    obs, reward, terminated, truncated, info = env.step(action)
    env.render()

t=   1  SOC=0.539  SoH=1.000  EFC_cum= 0.016  last_a= -3.10 kW
t=   2  SOC=0.564  SoH=1.000  EFC_cum= 0.029  last_a=  2.67 kW
t=   3  SOC=0.599  SoH=1.000  EFC_cum= 0.046  last_a=  3.62 kW
t=   4  SOC=0.678  SoH=0.999  EFC_cum= 0.086  last_a=  8.31 kW
t=   5  SOC=0.659  SoH=0.999  EFC_cum= 0.095  last_a= -1.76 kW
