## Imports

In [11]:
import numpy as np
from stable_baselines3 import SAC

import sys
sys.path.append("../src")
from envs.bess_env import BatteryEnv

## Test price data

In [12]:
# Dummy price series (â‚¬/MWh), 7 days with hourly resolution
price = np.random.uniform(20, 120, 24 * 7).astype(np.float32)

## Create environment

In [15]:
# Create the BESS environment
env = BatteryEnv(
    price,
    dt_hours=1.0,                 # simulation time step in hours
    capacity_kWh=100.0,           # battery energy capacity in kWh
    p_max_kW=50.0,                # max charge/discharge power in kW
    price_unit="EUR_per_MWh",     # defines unit of market prices
    price_sigma_rel=0.10,         # 10% price forecast uncertainty
    use_simple_cycle_count=True,  # use simplified Equivalent Full Cycle calculation
    deg_cost_per_EFC=120.0,       # degradation cost per Equivalent Full Cycle (EUR)
)

env

<envs.bess_env.BatteryEnv at 0x1392b761a10>

## Reset and inspect initial state

In [14]:
# Reset environment and display initial observation
obs, info = env.reset()
print("Initial observation:", obs)
print("Info:", info)

Initial observation: [0.5640915  1.         0.         1.         0.34756446 0.        ]
Info: {}


## Step the environment manually

In [16]:
# Take a random action to test the transition logic
action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)

print("Action taken:", action)
print("New observation:", obs)
print("Reward:", reward)
print("Terminated:", terminated)
print("Info:", info)

Action taken: [14.60032]
New observation: [0.6503728  1.         0.03739119 0.9993007  0.3664052  0.        ]
Reward: -8.904609957871997
Terminated: False
Info: {'price_true': 39.89142990112305, 'revenue_eur': -0.5824276363235368, 'deg_cost_eur': 8.32218232154846, 'penalty_eur': 0.0, 'efc_cum': 0.06935151934623718}


## Render a few steps

In [17]:
# Render a few steps to inspect environment behavior
obs, info = env.reset()
for _ in range(5):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    env.render()

t=  1  SOC=0.719  SoH=1.000  EFC_cum= 0.128
t=  2  SOC=0.333  SoH=1.000  EFC_cum= 0.321
t=  3  SOC=0.568  SoH=1.000  EFC_cum= 0.439
t=  4  SOC=0.256  SoH=1.000  EFC_cum= 0.595
t=  5  SOC=0.100  SoH=1.000  EFC_cum= 0.672


## One-shot SAC Training Test

In [20]:
# Create a SAC agent for quick testing
model = SAC("MlpPolicy", env, verbose=1)

# Train for a small number of steps to verify learning loop
model.learn(total_timesteps=2000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 168       |
|    ep_rew_mean     | -1.93e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 37        |
|    time_elapsed    | 18        |
|    total_timesteps | 672       |
| train/             |           |
|    actor_loss      | 32.2      |
|    critic_loss     | 15.6      |
|    ent_coef        | 0.892     |
|    ent_coef_loss   | -0.00198  |
|    learning_rate   | 0.0003    |
|    n_updates       | 571       |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 168       |
|    ep_rew_mean     | -1.11e+03 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 33        |
|    time_elapsed    | 39        |
|    total_timesteps | 1344    

<stable_baselines3.sac.sac.SAC at 0x1393212a390>

## Let the trained agent act for a few steps

In [21]:
# Test the trained agent
obs, info = env.reset()

for _ in range(5):
    action, _ = model.predict(obs, deterministic=False)
    obs, reward, terminated, truncated, info = env.step(action)
    env.render()

t=  1  SOC=0.449  SoH=1.000  EFC_cum= 0.025
t=  2  SOC=0.493  SoH=1.000  EFC_cum= 0.047
t=  3  SOC=0.425  SoH=1.000  EFC_cum= 0.081
t=  4  SOC=0.419  SoH=1.000  EFC_cum= 0.085
t=  5  SOC=0.427  SoH=1.000  EFC_cum= 0.089
