In [1]:
%load_ext tensorboard

In [2]:
import tensorflow as tf
import numpy as np
import sys
import matplotlib.pyplot as plt
from IPython.display import clear_output
from stable_baselines3 import SAC
import requests
import datetime
import os
import gymnasium as gym
from gymnasium.spaces import Box
import pandas as pd
from gymnasium import Env

In [3]:
clear_output(wait=True)

try:
  !rm -rf boptestGym
except:
  pass
!git clone -b master https://github.com/ibpsa/project1-boptest-gym.git boptestGym

In [5]:
url = "http://localhost:80"
url_api = 'https://api.boptest.net'
test_case = "multizone_office_simple_air"
run_name = "SAC_" + datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
run_dir = os.path.join("local_files", "Saved Models", run_name)
os.makedirs(run_dir, exist_ok=True)
log_path = os.path.join('local_files', 'Logs', run_name)
model_path = os.path.join(run_dir, "SAC_model.zip")

In [6]:
sys.path.insert(0,'boptestGym')
from boptestGymEnv import BoptestGymEnv

## Gettin Setpoint information from CSV

In [8]:
from datetime import datetime
DEFAULT_SETPOINT = 18.0

DAYS = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
DAY_TO_IDX = {d: i for i, d in enumerate(DAYS)}


# Helper functions to roll 15 minutes to next half an hour
def time_to_slot(t):
    """
    Convert time to half-hour slot.
    Rolls 15 minutes forward to next half hour.
    """
    if isinstance(t, str):
        t = datetime.strptime(t, "%H:%M" if len(t) == 5 else "%H:%M:%S")

    #minutes = t.hour * 60 + t.minute

    # roll 15 min forward
    #if minutes % 30 == 15:
        #minutes += 15

    return t.hour #(minutes // 30) % 48 


df = pd.read_csv("bk_setpoint_calendar.csv")

zones = sorted(df["Zone"].unique())
print(zones)
assert len(zones) == 3, "Expected exactly 3 zones"

ZONE_TO_IDX = {z: i for i, z in enumerate(zones)}


calendar = np.full((7, 24, 3), DEFAULT_SETPOINT, dtype=np.float32)


for _, row in df.iterrows():
    day = row["Day"]
    zone = row["Zone"]
    setpoint = row["Setpoint"]

    start_time = row["Start_Time"]
    end_time = row["End_Time"]

    day_idx = DAY_TO_IDX[day]
    zone_idx = ZONE_TO_IDX[zone]

    start_slot = time_to_slot(start_time)
    end_slot = time_to_slot(end_time)

    # Handle overnight wrap
    if end_slot <= start_slot:
        calendar[day_idx, start_slot:, zone_idx] = setpoint
        calendar[day_idx, :end_slot, zone_idx] = setpoint
    else:
        calendar[day_idx, start_slot:end_slot, zone_idx] = setpoint

print("Calendar shape:", calendar.shape)

['Cor', 'Nor', 'Sou']
Calendar shape: (7, 24, 3)


In [9]:
kelvin = lambda c: c + 273.15
normalize = lambda x: (2.0 * (x - 288) / (302 - 288) - 1.0)

## Custom Reward / Action

In [11]:
class BoptestGymEnvCustomReward(BoptestGymEnv):
    def __init__(self, *args, calendar=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.calendar = calendar
    
    def reset(self, *args, **kwargs):
        obs, info = super().reset(**kwargs)
        self.last_obs = obs
        self.calendar = calendar
        return obs, info

    def step(self, action):
        obs, reward, terminated, truncated, info = super().step(action)
        self.last_obs = obs
        return obs, reward, terminated, truncated, info

    def get_expected_setpoint(self, time):
        hour = time / 3600.0
        day = int(hour // 24) % 7
        hour = int(hour % 24)
        return self.calendar[day, hour]

    def get_reward(self):
        # Compute BOPTEST core kpis
        kpis = requests.get('{0}/kpi/{1}'.format(self.url, self.testid)).json()['payload']

        obs = self.last_obs
        sp_cor, sp_nor, sp_sou = self.get_expected_setpoint(obs[0])
        
        tdis_cor = abs(obs[2] - normalize(kelvin(sp_cor)))
        tdis_nor = abs(obs[3] - normalize(kelvin(sp_nor)))
        tdis_sou = abs(obs[4] - normalize(kelvin(sp_sou)))


        tdis_tot = tdis_cor + tdis_nor + tdis_sou
        # todo: search for best reward function
        reward = - (tdis_tot + kpis['ener_tot']) 

        # Record current objective integrand for next evaluation
        self.objective_integrand = reward
        return reward

In [12]:
class DeltaTempActionWrapper(gym.ActionWrapper):
    def __init__(self, 
                 env: Env, 
                 initial_setpoints=[291.15, 291.15, 291.15],  #18°C
                 Tmin=291.15,              # 18°C
                 Tmax=297.15,              # 24°C
        ):
        super().__init__(env)
        self.Tmin = Tmin
        self.Tmax = Tmax
        self.initial_setpoints = initial_setpoints
        
        assert isinstance(env.action_space, gym.spaces.Box), "RescaleAction expects a Box action space"
        assert np.all(np.isfinite(env.action_space.low)) and np.all(np.isfinite(env.action_space.high)), \
            "Action space must have finite bounds"

        self.action_space = gym.spaces.Box(
            low=-2.0, high=2.0, shape=env.action_space.shape, dtype=int
        )

        self.last_setpoints = self.initial_setpoints

    def reset(self, **kwargs):
        self.last_setpoints = self.initial_setpoints
        obs, info = super().reset(**kwargs)
        return obs, info
    
    def action(self, action):
        new_setpoints = [291.15, 291.15, 291.15]
        new_setpoints[0] = self.last_setpoints[0] + action[0]
        new_setpoints[0] = np.clip(new_setpoints[1], self.Tmin, self.Tmax)
        new_setpoints[1] = self.last_setpoints[1] + action[1]
        new_setpoints[1] = np.clip(new_setpoints[0], self.Tmin, self.Tmax)
        new_setpoints[2] = self.last_setpoints[2] + action[2]
        new_setpoints[2] = np.clip(new_setpoints[2], self.Tmin, self.Tmax)

        self.last_setpoints = new_setpoints
        
        #add the delta to the observed temp
        return new_setpoints

## The Model Definition

In [14]:
start_cooling_day = 134 * 24 * 3600  # May 14 
end_cooling_day = 255 * 24 * 3600  # September 12

env = BoptestGymEnvCustomReward(
    url=url,
    testcase=test_case,
    actions=[
        'hvac_oveZonSupCor_TZonHeaSet_u',
        'hvac_oveZonSupNor_TZonHeaSet_u',
        'hvac_oveZonSupSou_TZonHeaSet_u',
    ],
    observations={
        "time": (0,604800),
        "weaSta_reaWeaTDryBul_y": (258, 303), # (-15)/30 C
        "hvac_reaZonCor_TZon_y": (288, 302), # 15/29 C
        "hvac_reaZonNor_TZon_y": (288, 302), # 15/29 C
        "hvac_reaZonSou_TZon_y": (288, 302) # 15/29 C
    },
    random_start_time=False,
    start_time            = 305*24*3600, #nov 1 like the paper
    max_episode_length    = 30 * 24*3600, #1 month testing period
    warmup_period=24 * 3600,
    step_period=1800,
    predictive_period=0,
    regressive_period=None,
    calendar = calendar
)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [15]:
env = DeltaTempActionWrapper(
    env,
    initial_setpoints=[291.15, 291.15, 291.15],
    Tmin=291.15,
    Tmax=297.15,
)

#env = AddLastSetpointWrapper(env)

In [16]:
from boptestGymEnv import NormalizedObservationWrapper
env = NormalizedObservationWrapper(env)

In [17]:
model = SAC.load("local_files/Saved Models/sac_5_no_last_set.zip")

In [18]:
import requests

done = False
obs, _ = env.reset()
rows = []

from IPython.display import clear_output
while not done:
    # Clear the display output at each step
    clear_output(wait=True)
    # Compute control signal
    action, _ = model.predict(obs, deterministic=True)
    kpis = requests.get('{0}/kpi/{1}'.format(url, env.testid)).json()['payload']
    # if isinstance(action, (tuple, list, np.ndarray)):
    #     action = int(np.array(action).flatten()[0])
    # Print the current operative temperature and decided action
    print('-------------------------------------------------------------------')
    print(obs)
    print(action)
    print('-------------------------------------------------------------------')
    # Implement action
    rows.append({
        "time": obs[0],
        "T_out": obs[1],
        "T_cor": obs[2],
        "T_nor": obs[3],
        "T_sou": obs[4],
        "action_cor": action[0],
        "action_nor": action[1],
        "action_sou": action[2],
        "energy_kWh": kpis["ener_tot"],
        "discomfort": kpis["tdis_tot"]
    })
    obs, reward, terminated, truncated, info = env.step(action)  # send the action to the environment
    done = (terminated or truncated)

-------------------------------------------------------------------
[ 0.7083334  -0.0946669  -0.27713013 -0.38388932 -0.41566247]
[ 0.90903974  0.49414253 -1.19150186]
-------------------------------------------------------------------


In [19]:
import pandas as pd

df = pd.DataFrame(rows)

df.to_csv("local_files/testing/sac_5.csv", index=False)

In [20]:
env.stop()