In [1]:
import numpy as np
import sys
import matplotlib.pyplot as plt
from IPython.display import clear_output
from stable_baselines3 import DQN
import requests
import csv
import datetime 
from datetime import datetime
import pandas as pd

In [2]:
clear_output(wait=True)

In [3]:
url_api = 'https://api.boptest.net'
url = "http://localhost:80"

In [4]:
sys.path.insert(0,'boptestGym')
from boptestGymEnv import BoptestGymEnv

In [5]:
from datetime import datetime
DEFAULT_SETPOINT = 18.0

DAYS = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
DAY_TO_IDX = {d: i for i, d in enumerate(DAYS)}


# Helper functions to roll 15 minutes to next half an hour
def time_to_slot(t):
    """
    Convert time to half-hour slot.
    Rolls 15 minutes forward to next half hour.
    """
    if isinstance(t, str):
        t = datetime.strptime(t, "%H:%M" if len(t) == 5 else "%H:%M:%S")

    #minutes = t.hour * 60 + t.minute

    # roll 15 min forward
    #if minutes % 30 == 15:
        #minutes += 15

    return t.hour #(minutes // 30) % 48 


df = pd.read_csv("bk_setpoint_calendar.csv")

zones = sorted(df["Zone"].unique())
print(zones)
assert len(zones) == 3, "Expected exactly 3 zones"

ZONE_TO_IDX = {z: i for i, z in enumerate(zones)}


calendar = np.full((7, 24, 3), DEFAULT_SETPOINT, dtype=np.float32)


for _, row in df.iterrows():
    day = row["Day"]
    zone = row["Zone"]
    setpoint = row["Setpoint"]

    start_time = row["Start_Time"]
    end_time = row["End_Time"]

    day_idx = DAY_TO_IDX[day]
    zone_idx = ZONE_TO_IDX[zone]

    start_slot = time_to_slot(start_time)
    end_slot = time_to_slot(end_time)

    # Handle overnight wrap
    if end_slot <= start_slot:
        calendar[day_idx, start_slot:, zone_idx] = setpoint
        calendar[day_idx, :end_slot, zone_idx] = setpoint
    else:
        calendar[day_idx, start_slot:end_slot, zone_idx] = setpoint

print("Calendar shape:", calendar.shape)

['Cor', 'Nor', 'Sou']
Calendar shape: (7, 24, 3)


In [6]:
kelvin = lambda c: c + 273.15

In [7]:
class BoptestGymEnvCustomReward(BoptestGymEnv):
    def __init__(self, *args, calendar=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.calendar = calendar
    
    def reset(self, *args, **kwargs):
        obs, info = super().reset(**kwargs)
        self.last_obs = obs
        self.calendar = calendar
        return obs, info

    def step(self, action):
        obs, reward, terminated, truncated, info = super().step(action)
        self.last_obs = obs
        return obs, reward, terminated, truncated, info

    def get_expected_setpoint(self, time):
        hour = time / 3600.0
        day = int(hour // 24) % 7
        hour = int(hour % 24)
        return self.calendar[day, hour]
    
    def get_reward(self):
         # Compute BOPTEST core kpis
        kpis = requests.get('{0}/kpi/{1}'.format(self.url, self.testid)).json()['payload']

        ener_rew = 0
        ener_tot = kpis['ener_tot']
        if 0< kpis['ener_tot'] <1:
            ener_rew = 1 - (kpis['ener_tot']*kpis['ener_tot'])
        else:
            ener_rew = - kpis['ener_tot'] * kpis['ener_tot']

            
        obs = self.last_obs
        sp_cor, sp_nor, sp_sou = self.get_expected_setpoint(obs[0])
        
        tdis_cor = abs(obs[2] - kelvin(sp_cor))
        tdis_nor = abs(obs[3] - kelvin(sp_nor))
        tdis_sou = abs(obs[4] - kelvin(sp_sou))


        tdis_tot = tdis_cor + tdis_nor + tdis_sou
        # todo: search for best reward function
        reward = - (tdis_tot + ener_rew) 

        # Record current objective integrand for next evaluation
        self.objective_integrand = reward

        self.reward_log_path = os.path.join("local_files", "logs", f"dqn_5_temp.csv")
        os.makedirs(os.path.dirname(self.reward_log_path), exist_ok=True)
        if not os.path.exists(self.reward_log_path):
            with open(self.reward_log_path, "w", newline="") as f:
                writer = csv.writer(f)
                writer.writerow([
                    "timestamp","tdis_tot","ener_tot", "reward"
                ])
        
        with open(self.reward_log_path, "a", newline="") as f:
            csv.writer(f).writerow([
            datetime.now().isoformat(),
            tdis_tot, ener_tot, reward])
        return reward

In [8]:
import gymnasium as gym
from gymnasium.spaces import Box
import pandas as pd
from gymnasium import Env

In [9]:
class AddExpectedSetpointWrapper(gym.ObservationWrapper):
    def __init__(self, env, calendar):
        super().__init__(env)
        self.calendar = calendar
        old_space: Box = env.observation_space
        assert isinstance(old_space, Box), f"Expected Box, got {type(old_space)}"

        low  = np.concatenate([old_space.low,  np.array([291.0, 291.0, 291.0], dtype=int)]) #18 C
        high = np.concatenate([old_space.high, np.array([297.0, 297.0, 297.0], dtype=int)]) #23 C

        self.observation_space = Box(
            low=low,
            high=high,
            dtype=np.float32
        )

    def observation(self, obs: np.ndarray) -> np.ndarray:
        time = obs[0]

        hour = time / 3600.0
        day = int(hour // 24) % 7
        hour = int(hour % 24)

        setpoint_cor = self.calendar[day][hour][0]
        setpoint_nor = self.calendar[day][hour][1]
        setpoint_sou = self.calendar[day][hour][2]
        
        expected_setpoints = np.array([kelvin(setpoint_cor), kelvin(setpoint_nor), kelvin(setpoint_sou)], dtype = np.float32)

        return np.concatenate([obs, expected_setpoints], axis=-1)

In [10]:
class DeltaTimeWrapper(gym.ActionWrapper):
    def __init__(self, env: Env):
        super().__init__(env)
        assert isinstance(env.action_space, gym.spaces.Box), "RescaleAction expects a Box action space"
        assert np.all(np.isfinite(env.action_space.low)) and np.all(np.isfinite(env.action_space.high)), \
            "Action space must have finite bounds"

        
        self.action_space = gym.spaces.Box(
            low=-2.0, high=2.0, shape=env.action_space.shape, dtype=int
        )
        
    def action(self, action):
        obs = self.env.last_obs
        #add the delta to the observed temp
        return [action[0] / 2 + obs[2], action[1] / 2 + obs[3], action[2] / 2 + obs[4]]

In [11]:
feb15 = 47 * 24*3600                    # Jan 1 → Feb 15
episode_length = 7 * 24*3600            # 1 week episodes
max_start = feb15 - episode_length      # last valid start time

In [12]:
start_cooling_day = 134 * 24 * 3600  # May 14 
end_cooling_day = 255 * 24 * 3600  # September 12

env = BoptestGymEnvCustomReward(
                                url=url,
                                testcase="multizone_office_simple_air",
                                actions=[
                                        'hvac_oveZonSupCor_TZonHeaSet_u',
                                        'hvac_oveZonSupNor_TZonHeaSet_u',
                                        'hvac_oveZonSupSou_TZonHeaSet_u',
                                        ],
                                observations={
                                        "time": (0,604800),
                                        "weaSta_reaWeaTDryBul_y": (258, 303), # (-15)/30 C
                                        "hvac_reaZonCor_TZon_y": (288, 302), # 15/29 C
                                        "hvac_reaZonNor_TZon_y": (288, 302), # 15/29 C
                                        "hvac_reaZonSou_TZon_y": (288, 302) # 15/29 C
                                    },
                                random_start_time=False,
                                start_time=5* 24* 3600, #TODO check which day the current FMU hols this is 5/1/2009 a monday
                                #excluding_periods=[(start_cooling_day, end_cooling_day)],
                                max_episode_length=7 * 24 * 3600,
                                warmup_period=24 * 3600,
                                step_period=1800,
                                predictive_period=0,
                                regressive_period=None,
                                calendar = calendar
)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [13]:
env = AddExpectedSetpointWrapper(env, calendar)

In [14]:
env = DeltaTimeWrapper(env)

In [15]:
from boptestGymEnv import NormalizedObservationWrapper
from boptestGymEnv import DiscretizedActionWrapper
#env = NormalizedObservationWrapper(env)
env = DiscretizedActionWrapper(env,n_bins_act=10)

In [16]:
import os
log_path = os.path.join( "local_files", "Logs")

In [17]:
import torch

In [18]:
policy_kwargs = dict(
    net_arch=[64, 8],  
    activation_fn=torch.nn.ReLU
)

model = DQN('MlpPolicy',
            env,
            verbose=1,
            gamma=0.99,
            learning_rate=0.01,
            batch_size=64,
            buffer_size=20000,
            learning_starts=0,
            train_freq=1,
            target_update_interval=1000,
            tau=1.0,
            gradient_steps=1,
            exploration_fraction=0.1,          #ε-greedy?
            exploration_initial_eps=1.0,
            exploration_final_eps=0.05,
            policy_kwargs=policy_kwargs,
            tensorboard_log= log_path)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [19]:
episodes_no = 25 #can be 25 or 50 as the paper 
total_timesteps = 336 * episodes_no #336 = episode_length / step_period (604800 / 1800 = 336)
model.learn(total_timesteps= total_timesteps) 

Logging to local_files\Logs\DQN_25
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 336       |
|    ep_rew_mean      | -1.38e+04 |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 4         |
|    fps              | 2         |
|    time_elapsed     | 562       |
|    total_timesteps  | 1344      |
| train/              |           |
|    learning_rate    | 0.01      |
|    loss             | 1.77e+03  |
|    n_updates        | 1343      |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 336       |
|    ep_rew_mean      | -1.35e+04 |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 8         |
|    fps              | 2         |
|    time_elapsed     | 1070      |
|    total_timesteps  | 2688      |
| train/              |           |
|    learning_rate    | 0.01 

<stable_baselines3.dqn.dqn.DQN at 0x1e887b07770>

In [20]:
model.save("dqn_5_temp")

In [21]:
env.stop()

In [22]:
%tensorboard --logdir ./local_files/Logs --port 6007

UsageError: Line magic function `%tensorboard` not found.
