In [1]:
%load_ext tensorboard

In [2]:
import tensorflow as tf
import numpy as np
import sys
import matplotlib.pyplot as plt
from IPython.display import clear_output
from stable_baselines3 import SAC
import requests
import datetime
import os
import gymnasium as gym
from gymnasium.spaces import Box
import pandas as pd
from gymnasium import Env

In [3]:
clear_output(wait=True)

try:
  !rm -rf boptestGym
except:
  pass
!git clone -b master https://github.com/ibpsa/project1-boptest-gym.git boptestGym

In [5]:
url = "http://localhost:80"
url_api = 'https://api.boptest.net'
test_case = "multizone_office_simple_air"
run_name = "SAC_" + datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
run_dir = os.path.join("local_files", "Saved Models", run_name)
os.makedirs(run_dir, exist_ok=True)
log_path = os.path.join('local_files', 'Logs', run_name)
model_path = os.path.join(run_dir, "SAC_model.zip")

In [6]:
sys.path.insert(0,'boptestGym')
from boptestGymEnv import BoptestGymEnv

## Gettin Setpoint information from CSV

In [8]:
from datetime import datetime
DEFAULT_SETPOINT = 18.0

DAYS = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
DAY_TO_IDX = {d: i for i, d in enumerate(DAYS)}


# Helper functions to roll 15 minutes to next half an hour
def time_to_slot(t):
    """
    Convert time to half-hour slot.
    Rolls 15 minutes forward to next half hour.
    """
    if isinstance(t, str):
        t = datetime.strptime(t, "%H:%M" if len(t) == 5 else "%H:%M:%S")

    #minutes = t.hour * 60 + t.minute

    # roll 15 min forward
    #if minutes % 30 == 15:
        #minutes += 15

    return t.hour #(minutes // 30) % 48 


df = pd.read_csv("bk_setpoint_calendar.csv")

zones = sorted(df["Zone"].unique())
print(zones)
assert len(zones) == 3, "Expected exactly 3 zones"

ZONE_TO_IDX = {z: i for i, z in enumerate(zones)}


calendar = np.full((7, 24, 3), DEFAULT_SETPOINT, dtype=np.float32)


for _, row in df.iterrows():
    day = row["Day"]
    zone = row["Zone"]
    setpoint = row["Setpoint"]

    start_time = row["Start_Time"]
    end_time = row["End_Time"]

    day_idx = DAY_TO_IDX[day]
    zone_idx = ZONE_TO_IDX[zone]

    start_slot = time_to_slot(start_time)
    end_slot = time_to_slot(end_time)

    # Handle overnight wrap
    if end_slot <= start_slot:
        calendar[day_idx, start_slot:, zone_idx] = setpoint
        calendar[day_idx, :end_slot, zone_idx] = setpoint
    else:
        calendar[day_idx, start_slot:end_slot, zone_idx] = setpoint

print("Calendar shape:", calendar.shape)

['Cor', 'Nor', 'Sou']
Calendar shape: (7, 24, 3)


In [9]:
kelvin = lambda c: c + 273.15

## Custom Reward / Action

In [11]:
class BoptestGymEnvCustomReward(BoptestGymEnv):
    def __init__(self, *args, calendar=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.calendar = calendar
    
    def reset(self, *args, **kwargs):
        obs, info = super().reset(**kwargs)
        self.last_obs = obs
        self.calendar = calendar
        return obs, info

    def step(self, action):
        obs, reward, terminated, truncated, info = super().step(action)
        self.last_obs = obs
        return obs, reward, terminated, truncated, info

    def get_expected_setpoint(self, time):
        hour = time / 3600.0
        day = int(hour // 24) % 7
        hour = int(hour % 24)
        return self.calendar[day, hour]
    
    def get_reward(self):
        # Compute BOPTEST core kpis
        kpis = requests.get('{0}/kpi/{1}'.format(self.url, self.testid)).json()['payload']

        ener_rew = 0
        if 0< kpis['ener_tot'] <1:
            ener_rew = 1 - (kpis['ener_tot']*kpis['ener_tot'])
        else:
            ener_rew = - kpis['ener_tot'] * kpis['ener_tot']

            
        obs = self.last_obs
        sp_cor, sp_nor, sp_sou = self.get_expected_setpoint(obs[0])
        
        tdis_cor = abs(obs[2] - kelvin(sp_cor))
        tdis_nor = abs(obs[3] - kelvin(sp_nor))
        tdis_sou = abs(obs[4] - kelvin(sp_sou))


        tdis_tot = tdis_cor + tdis_nor + tdis_sou
        # todo: search for best reward function
        reward = - (tdis_tot + ener_rew) 

        # Record current objective integrand for next evaluation
        self.objective_integrand = reward
        return reward

In [12]:
class AddExpectedSetpointWrapper(gym.ObservationWrapper):
    def __init__(self, env, calendar):
        super().__init__(env)
        self.calendar = calendar
        old_space: Box = env.observation_space
        assert isinstance(old_space, Box), f"Expected Box, got {type(old_space)}"

        low  = np.concatenate([old_space.low,  np.array([291.0, 291.0, 291.0], dtype=int)]) #18 C
        high = np.concatenate([old_space.high, np.array([297.0, 297.0, 297.0], dtype=int)]) #23 C

        self.observation_space = Box(
            low=low,
            high=high,
            dtype=np.float32
        )

    def observation(self, obs: np.ndarray) -> np.ndarray:
        time = obs[0]

        hour = time / 3600.0
        day = int(hour // 24) % 7
        hour = int(hour % 24)

        setpoint_cor = self.calendar[day][hour][0]
        setpoint_nor = self.calendar[day][hour][1]
        setpoint_sou = self.calendar[day][hour][2]
        
        expected_setpoints = np.array([kelvin(setpoint_cor), kelvin(setpoint_nor), kelvin(setpoint_sou)], dtype = np.float32)

        return np.concatenate([obs, expected_setpoints], axis=-1)

In [13]:
class DeltaTimeWrapper(gym.ActionWrapper):
    def __init__(self, env: Env):
        super().__init__(env)
        assert isinstance(env.action_space, gym.spaces.Box), "RescaleAction expects a Box action space"
        assert np.all(np.isfinite(env.action_space.low)) and np.all(np.isfinite(env.action_space.high)), \
            "Action space must have finite bounds"

        
        self.action_space = gym.spaces.Box(
            low=-2.0, high=2.0, shape=env.action_space.shape, dtype=int
        )
        
    def action(self, action):
        obs = self.env.last_obs
        #add the delta to the observed temp
        return [action[0] / 2 + obs[2], action[1] / 2 + obs[3], action[2] / 2 + obs[4]]

## The Model Definition

In [15]:
start_cooling_day = 134 * 24 * 3600  # May 14 
end_cooling_day = 255 * 24 * 3600  # September 12

env = BoptestGymEnvCustomReward(
    url=url,
    testcase=test_case,
    actions=[
        'hvac_oveZonSupCor_TZonHeaSet_u',
        'hvac_oveZonSupNor_TZonHeaSet_u',
        'hvac_oveZonSupSou_TZonHeaSet_u',
    ],
    observations={
        "time": (0,604800),
        "weaSta_reaWeaTDryBul_y": (258, 303), # (-15)/30 C
        "hvac_reaZonCor_TZon_y": (288, 302), # 15/29 C
        "hvac_reaZonNor_TZon_y": (288, 302), # 15/29 C
        "hvac_reaZonSou_TZon_y": (288, 302) # 15/29 C
    },
    random_start_time=False,
    start_time            = 305*24*3600, #nov 1 like the paper
    max_episode_length    = 30 * 24*3600, #1 month testing period
    warmup_period=24 * 3600,
    step_period=1800,
    predictive_period=0,
    regressive_period=None,
    calendar = calendar
)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [16]:
env = AddExpectedSetpointWrapper(env, calendar)

In [17]:
env = DeltaTimeWrapper(env)

In [18]:
model = SAC('MlpPolicy', env, verbose=1, learning_rate=0.0003, gamma=0.99, batch_size=64,
                tensorboard_log=log_path)
SAC.load("local_files/testing/SAC_model.zip", env = env)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


Exception: code() argument 13 must be str, not int


<stable_baselines3.sac.sac.SAC at 0x1be1075baa0>

In [20]:
import requests

done = False
obs, _ = env.reset()
rows = []

from IPython.display import clear_output
while not done:
    # Clear the display output at each step
    clear_output(wait=True)
    # Compute control signal
    action, _ = model.predict(obs, deterministic=True)
    kpis = requests.get('{0}/kpi/{1}'.format(url, env.testid)).json()['payload']
    # if isinstance(action, (tuple, list, np.ndarray)):
    #     action = int(np.array(action).flatten()[0])
    # Print the current operative temperature and decided action
    print('-------------------------------------------------------------------')
    print(obs)
    print(action)
    print('-------------------------------------------------------------------')
    # Implement action
    rows.append({
        "time": obs[0],
        "T_out": obs[1],
        "T_cor": obs[2],
        "T_nor": obs[3],
        "T_sou": obs[4],
        "action_cor": action[0],
        "action_nor": action[1],
        "action_sou": action[2],
        "energy_kWh": kpis["ener_tot"],
        "discomfort": kpis["tdis_tot"]
    })
    obs, reward, terminated, truncated, info = env.step(action)  # send the action to the environment
    done = (terminated or truncated)

-------------------------------------------------------------------
[7200.       274.48     294.69717  293.3178   294.45572  291.15
  291.15     291.15   ]
[-2. -2. -2.]
-------------------------------------------------------------------


KeyboardInterrupt: 

In [22]:
import pandas as pd

df = pd.DataFrame(rows)

df.to_csv("local_files/testing/sac_5.csv", index=False)

In [24]:
env.stop()