In [1]:
%load_ext tensorboard
import tensorflow as tf
import numpy as np
import sys
import matplotlib.pyplot as plt
from IPython.display import clear_output
from stable_baselines3 import SAC
import requests
import datetime
import os
import gymnasium as gym
from gymnasium.spaces import Box
import csv
import pandas as pd
from gymnasium import Env

In [2]:
clear_output(wait=True)

try:
  !rm -rf boptestGym
except:
  pass
!git clone -b master https://github.com/ibpsa/project1-boptest-gym.git boptestGym

In [4]:
url = "http://localhost:80"

log_path = os.path.join('local_files', 'Logs', "SAC_5")

In [5]:
sys.path.insert(0,'boptestGym')
from boptestGymEnv import BoptestGymEnv

## Gettin Setpoint information from CSV

In [7]:
from datetime import datetime
DEFAULT_SETPOINT = 18.0

DAYS = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
DAY_TO_IDX = {d: i for i, d in enumerate(DAYS)}


# Helper functions to roll 15 minutes to next half an hour
def time_to_slot(t):
    """
    Convert time to half-hour slot.
    Rolls 15 minutes forward to next half hour.
    """
    if isinstance(t, str):
        t = datetime.strptime(t, "%H:%M" if len(t) == 5 else "%H:%M:%S")

    #minutes = t.hour * 60 + t.minute

    # roll 15 min forward
    #if minutes % 30 == 15:
        #minutes += 15

    return t.hour #(minutes // 30) % 48 


df = pd.read_csv("bk_setpoint_calendar.csv")

zones = sorted(df["Zone"].unique())
print(zones)
assert len(zones) == 3, "Expected exactly 3 zones"

ZONE_TO_IDX = {z: i for i, z in enumerate(zones)}


calendar = np.full((7, 24, 3), DEFAULT_SETPOINT, dtype=np.float32)


for _, row in df.iterrows():
    day = row["Day"]
    zone = row["Zone"]
    setpoint = row["Setpoint"]

    start_time = row["Start_Time"]
    end_time = row["End_Time"]

    day_idx = DAY_TO_IDX[day]
    zone_idx = ZONE_TO_IDX[zone]

    start_slot = time_to_slot(start_time)
    end_slot = time_to_slot(end_time)

    # Handle overnight wrap
    if end_slot <= start_slot:
        calendar[day_idx, start_slot:, zone_idx] = setpoint
        calendar[day_idx, :end_slot, zone_idx] = setpoint
    else:
        calendar[day_idx, start_slot:end_slot, zone_idx] = setpoint

print("Calendar shape:", calendar.shape)

['Cor', 'Nor', 'Sou']
Calendar shape: (7, 24, 3)


## Custom Reward / Action

In [9]:
kelvin = lambda c: c + 273.15
normalize = lambda x: (2.0 * (x - 288) / (302 - 288) - 1.0)

In [10]:
class BoptestGymEnvCustomReward(BoptestGymEnv):
    def __init__(self, *args, calendar=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.calendar = calendar
    
    def reset(self, *args, **kwargs):
        obs, info = super().reset(**kwargs)
        self.last_obs = obs
        self.calendar = calendar
        return obs, info

    def step(self, action):
        obs, reward, terminated, truncated, info = super().step(action)
        self.last_obs = obs
        return obs, reward, terminated, truncated, info

    def get_expected_setpoint(self, time):
        hour = time / 3600.0
        day = int(hour // 24) % 7
        hour = int(hour % 24)
        return self.calendar[day, hour]

    def get_reward(self):
        # Compute BOPTEST core kpis
        kpis = requests.get('{0}/kpi/{1}'.format(self.url, self.testid)).json()['payload']

        obs = self.last_obs
        sp_cor, sp_nor, sp_sou = self.get_expected_setpoint(obs[0])
        
        tdis_cor = abs(obs[2] - normalize(kelvin(sp_cor)))
        tdis_nor = abs(obs[3] - normalize(kelvin(sp_nor)))
        tdis_sou = abs(obs[4] - normalize(kelvin(sp_sou)))


        tdis_tot = tdis_cor + tdis_nor + tdis_sou
        # todo: search for best reward function
        reward = - (tdis_tot + kpis['ener_tot']) 

        # Record current objective integrand for next evaluation
        self.objective_integrand = reward
        
        #logging
        self.reward_log_path = os.path.join("local_files", "logs", f"sac_5_action_wrapped.csv")
        os.makedirs(os.path.dirname(self.reward_log_path), exist_ok=True)
        if not os.path.exists(self.reward_log_path):
            with open(self.reward_log_path, "w", newline="") as f:
                writer = csv.writer(f)
                writer.writerow([
                    "time","outside_temp", "t_cor", "t_nor", "t_sou", "tdis_tot","ener_tot", "reward"
                ])
        
        with open(self.reward_log_path, "a", newline="") as f:
            csv.writer(f).writerow([
            obs[0], obs[1], obs[2], obs[3], obs[4],
            tdis_tot, kpis['ener_tot'], reward])
        return reward

In [11]:
class DeltaTempActionWrapper(gym.ActionWrapper):
    def __init__(self, 
                 env: Env, 
                 initial_setpoints=[291.15, 291.15, 291.15],  #18°C
                 Tmin=291.15,              # 18°C
                 Tmax=297.15,              # 24°C
        ):
        super().__init__(env)
        self.Tmin = Tmin
        self.Tmax = Tmax
        self.initial_setpoints = initial_setpoints
        
        assert isinstance(env.action_space, gym.spaces.Box), "RescaleAction expects a Box action space"
        assert np.all(np.isfinite(env.action_space.low)) and np.all(np.isfinite(env.action_space.high)), \
            "Action space must have finite bounds"

        self.action_space = gym.spaces.Box(
            low=-2.0, high=2.0, shape=env.action_space.shape, dtype=int
        )

        self.last_setpoints = self.initial_setpoints

    def reset(self, **kwargs):
        self.last_setpoints = self.initial_setpoints
        obs, info = super().reset(**kwargs)
        return obs, info
    
    def action(self, action):
        new_setpoints = [291.15, 291.15, 291.15]
        new_setpoints[0] = self.last_setpoints[0] + action[0]
        new_setpoints[0] = np.clip(new_setpoints[1], self.Tmin, self.Tmax)
        new_setpoints[1] = self.last_setpoints[1] + action[1]
        new_setpoints[1] = np.clip(new_setpoints[0], self.Tmin, self.Tmax)
        new_setpoints[2] = self.last_setpoints[2] + action[2]
        new_setpoints[2] = np.clip(new_setpoints[2], self.Tmin, self.Tmax)

        self.last_setpoints = new_setpoints
        
        #add the delta to the observed temp
        return new_setpoints

In [12]:
class AddLastSetpointWrapper(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        old_space: Box = env.observation_space
        assert isinstance(old_space, Box), f"Expected Box, got {type(old_space)}"

        low  = np.concatenate([old_space.low,  np.array([291.0, 291.0, 291.0], dtype=int)]) #18 C
        high = np.concatenate([old_space.high, np.array([298.0, 298.0, 298.0], dtype=int)]) #24 C

        self.observation_space = Box(
            low=low,
            high=high,
            dtype=np.float32
        )

    def _get_last_setpoints(self):
        env = self.env
        while env:
            if hasattr(env, "last_setpoints"):
                return env.last_setpoints
            env = getattr(env, "env", None)
        raise RuntimeError("DeltaTempActionWrapper not found")
    
    def observation(self, obs: np.ndarray) -> np.ndarray:
        sp = self._get_last_setpoints()
        
        return np.concatenate([obs, sp])

    def _Tmin(self):
        env = self.env
        while env:
            if hasattr(env, "Tmin"):
                return env.Tmin
            env = getattr(env, "env", None)

    def _Tmax(self):
        env = self.env
        while env:
            if hasattr(env, "Tmax"):
                return env.Tmax
            env = getattr(env, "env", None)

## The Model Definition

In [14]:
start_cooling_day = 134 * 24 * 3600  # May 14 
end_cooling_day = 255 * 24 * 3600  # September 12

env = BoptestGymEnvCustomReward(
    url=url,
    testcase="multizone_office_simple_air",
    actions=[
        'hvac_oveZonSupCor_TZonHeaSet_u',
        'hvac_oveZonSupNor_TZonHeaSet_u',
        'hvac_oveZonSupSou_TZonHeaSet_u',
    ],
    observations={
        "time": (0,604800),
        "weaSta_reaWeaTDryBul_y": (258, 303), # (-15)/30 C
        "hvac_reaZonCor_TZon_y": (288, 302), # 15/29 C
        "hvac_reaZonNor_TZon_y": (288, 302), # 15/29 C
        "hvac_reaZonSou_TZon_y": (288, 302) # 15/29 C
    },
    random_start_time=False,
    start_time=5* 24* 3600, #TODO check which day the current FMU hols this is 5/1/2009 a monday
    #excluding_periods=[(start_cooling_day, end_cooling_day)],
    max_episode_length=7 * 24 * 3600,
    warmup_period=24 * 3600,
    step_period=1800,
    predictive_period=0,
    regressive_period=None,
    calendar = calendar
)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [15]:
env = DeltaTempActionWrapper(
    env,
    initial_setpoints=[291.15, 291.15, 291.15],
    Tmin=291.15,
    Tmax=297.15,
)

#env = AddLastSetpointWrapper(env)

In [16]:
from boptestGymEnv import NormalizedObservationWrapper
env = NormalizedObservationWrapper(env)

In [17]:
model = SAC(
        policy='MlpPolicy',
        env=env,
        verbose=1,
        learning_rate=1e-4,
        batch_size=1024,
        tau=0.005,
        gamma=0.99,
        tensorboard_log=log_path,
       seed=42,
    )

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [18]:
from stable_baselines3.common.callbacks import EvalCallback

steps_per_chunk = env.max_episode_length // env.step_period

eval_callback = EvalCallback(
        env,
        log_path="./logs/",
        eval_freq=25 * steps_per_chunk,
        deterministic=True,
        render=False
    )

In [19]:
episodes_no = 25 #can be 25 or 50 as the paper 
total_timesteps = 336 * episodes_no #336 = episode_length / step_period (604800 / 1800 = 336)
model.learn(total_timesteps= total_timesteps) 

Logging to local_files\Logs\SAC_5\SAC_8
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 336       |
|    ep_rew_mean     | -2.99e+05 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 2         |
|    time_elapsed    | 671       |
|    total_timesteps | 1344      |
| train/             |           |
|    actor_loss      | 651       |
|    critic_loss     | 3.91e+05  |
|    ent_coef        | 0.939     |
|    ent_coef_loss   | 0.0222    |
|    learning_rate   | 0.0001    |
|    n_updates       | 1243      |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 336       |
|    ep_rew_mean     | -2.99e+05 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 2         |
|    time_elapsed    | 1279      |
|    total_timesteps | 2688      |
| train/             |           |
|    actor_loss

<stable_baselines3.sac.sac.SAC at 0x1d614f635c0>

In [20]:
%tensorboard --logdir ./local_files/Logs --port 6088

Reusing TensorBoard on port 6088 (pid 15252), started 1:36:12 ago. (Use '!kill 15252' to kill it.)

In [21]:
env.stop()

In [22]:
model.save("local_files/Saved Models/sac_5_no_last_set")