In [1]:
import numpy as np
import pandas as pd

from utils.rule_based_controller import RuleBasedController

from envs.bess_env import BatteryEnv
from utils.csv_handler import load_price_data, save_records
from utils.eval_handler import evaluate_rollout
from utils.forecast_scenario import ForecastScenarioGenerator

In [2]:
# --------------------------------------------------
# Experiment config
# --------------------------------------------------
training_steps_list = [10_000, 20_000, 40_000, 60_000, 80_000, 100_000]
n_runs = 5
base_seed = 10

forecast_horizon_hours = 3.0
dt_hours = 0.25
H = int(round(forecast_horizon_hours / dt_hours))

# --------------------------------------------------
# Load data
# --------------------------------------------------
dfp_train, price_train, ts_train = load_price_data(
    csv_path="../../../data/electricity_price/dayahead_2024_11.csv",
    resolution="15min",
)

dfp_eval, price_eval, ts_eval = load_price_data(
    csv_path="../../../data/electricity_price/dayahead_2025_11.csv",
    resolution="15min",
    time_range=("2025-11-01", "2025-11-07"),
)

# --------------------------------------------------
# Forecast scenarios (fixed across runs)
# --------------------------------------------------
price_scenario_gen = ForecastScenarioGenerator(
    horizon_steps=H,
    sigma0=0.01,
    sigmaH=0.06,
    schedule="sqrt",
    base_seed=1234,
)

# --------------------------------------------------
# Storage for results
# --------------------------------------------------
records = []

In [3]:
# ==================================================
# Rule-Based baseline (no training)
# ==================================================
rb_env = BatteryEnv(
    price_series=price_eval,
    timestamps=ts_eval,
    dt_hours=dt_hours,
    capacity_kWh=50.0,
    p_max_kW=10.0,
    use_discrete_actions=False,         # continuous for RB
    use_price_forecast=True,
    forecast_horizon_hours=forecast_horizon_hours,
    episode_days=7.0,
    random_start=False,
    random_seed=45,
    price_scenario_gen=price_scenario_gen,
    scenario_id=0,
    vary_scenario_per_episode=False,
)

rb_controller = RuleBasedController(env=rb_env, price_history=price_train) 

rb_rollout = evaluate_rollout(model=rb_controller, env=rb_env)
rb_reward = float(np.sum(rb_rollout["reward"]))

print(f"\nRB baseline reward (eval week) = {rb_reward:.2f}")

rb_records = []
for steps in training_steps_list:
    rb_records.append({
        "agent": "RB",
        "training_steps": steps,
        "run_id": 0,
        "seed": 0,
        "total_reward": rb_reward,
    })

Episode finished after 672 steps

RB baseline reward (eval week) = 8.33


In [4]:
save_records(
    records=rb_records,
    out_path="results/learning_steps_records.csv",
    experiment_id="rb_baseline_v1",
)

[save_experiment_records] Saved 6 new rows (total=96) to results/learning_steps_records.csv
