In [1]:
%load_ext tensorboard
import tensorflow as tf

In [2]:
import numpy as np
import sys
import csv
import matplotlib.pyplot as plt
from IPython.display import clear_output
from stable_baselines3 import DQN
import requests
import datetime

In [3]:
clear_output(wait=True)

In [4]:
url_api = 'https://api.boptest.net'
url = "http://localhost:80"

In [5]:
sys.path.insert(0,'boptestGym')
from boptestGymEnv import BoptestGymEnv

In [6]:
class BoptestGymEnvCustomReward(BoptestGymEnv):
    def get_reward(self):
        # Compute BOPTEST core kpis
        kpis = requests.get('{0}/kpi/{1}'.format(self.url, self.testid)).json()['payload']

        ener_coef = 1.0
        temp_coef = 1.0
        
        # todo: search for best reward function
        reward = - ((kpis['tdis_tot']*temp_coef) + (kpis['ener_tot']*ener_coef))

        # Record current objective integrand for next evaluation
        self.objective_integrand = reward

        self.reward_log_path = os.path.join("local_files", "logs", f"dqn_single_switch_6week.csv")
        os.makedirs(os.path.dirname(self.reward_log_path), exist_ok=True)
        if not os.path.exists(self.reward_log_path):
            with open(self.reward_log_path, "w", newline="") as f:
                writer = csv.writer(f)
                writer.writerow([
                    "timestamp","tdis_tot","ener_tot", "reward"
                ])
        
        with open(self.reward_log_path, "a", newline="") as f:
            csv.writer(f).writerow([
            datetime.datetime.now().isoformat(),
            kpis['tdis_tot'], kpis['ener_tot'], reward])
        return reward

In [7]:
feb15 = 47 * 24*3600                    # Jan 1 → Feb 15
episode_length = 7 * 24*3600            # 1 week episodes
max_start = feb15 - episode_length      # last valid start time

In [8]:
# Instantiate environment
env = BoptestGymEnvCustomReward(url       = url,
                    testcase              = 'bestest_hydronic_heat_pump',
                    actions               = ['oveHeaPumY_u'],
                    observations          = {'reaTZon_y':(280.,310.),
                                             'weaSta_reaWeaTDryBul_y':(265.,303.),
                                             'weaSta_reaWeaHDirNor_y':(0.,862.)
                                            },
                    random_start_time     = True,
                    start_time            = 1*24*3600,
                    max_episode_length    = 7 * 24*3600 ,
                    excluding_periods     = [(max_start, 365*24*3600)],
                    warmup_period         = 24*3600,
                    predictive_period     = 0,
                    regressive_period     = 4*1800,
                    step_period           = 1800)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [9]:
from boptestGymEnv import NormalizedObservationWrapper
from boptestGymEnv import DiscretizedActionWrapper
env = NormalizedObservationWrapper(env)
env = DiscretizedActionWrapper(env,n_bins_act=1)

In [10]:
import os
log_path = os.path.join( "local_files", "Logs")

In [11]:
import torch

In [12]:
policy_kwargs = dict(
    net_arch=[64, 8],  
    activation_fn=torch.nn.ReLU
)

model = DQN('MlpPolicy',
            env,
            verbose=1,
            gamma=0.99,
            learning_rate=0.01,
            batch_size=64,
            buffer_size=20000,
            learning_starts=0,
            train_freq=1,
            target_update_interval=1000,
            tau=1.0,
            gradient_steps=1,
            exploration_fraction=0.1,          #ε-greedy?
            exploration_initial_eps=1.0,
            exploration_final_eps=0.05,
            policy_kwargs=policy_kwargs,
            tensorboard_log= log_path)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [None]:
episodes_no = 50 #can be 25 or 50 as the paper 
total_timesteps = 336 * episodes_no #336 = episode_length / step_period (604800 / 1800 = 336)
model.learn(total_timesteps= total_timesteps) 

Logging to local_files\Logs\DQN_31


In [None]:
model.save("dqn_switch_50")

In [None]:
env.stop()

In [None]:
%tensorboard --logdir ./local_files/Logs --port 6006

In [None]:
import pickle
import pandas as pd

buffer_path = os.path.join( "local_files", "Buffer")
model.save_replay_buffer(buffer_path)

with open("local_files/Buffer.pkl", "rb") as f:
    buf = pickle.load(f)


obs = buf.observations.reshape(len(buf.observations), -1)
next_obs = buf.next_observations.reshape(len(buf.next_observations), -1)
actions = buf.actions.reshape(len(buf.actions), -1)
rewards = buf.rewards.reshape(len(buf.rewards), -1)
dones = buf.dones.reshape(len(buf.dones), -1)

# Build dataframe
df = pd.DataFrame(
    np.hstack([obs, actions, rewards, dones, next_obs]),
    columns=[
        *[f"obs_{i}" for i in range(obs.shape[1])],
        "action",
        "reward",
        "done",
        *[f"next_obs_{i}" for i in range(next_obs.shape[1])]
    ]
)

# Save CSV
csv_path = "local_files/DQN_single_S_25.csv"
df.to_csv(csv_path, index=False)