In [1]:
import numpy as np
import os
import sys
from stable_baselines3 import SAC
from stable_baselines3 import DQN
import torch

In [2]:
sys.path.insert(0,'boptestGym')
from boptestGymEnv import BoptestGymEnv

In [3]:
url_api = 'https://api.boptest.net'
url = "http://localhost:80"

In [4]:
import os
log_path = os.path.join( "local_files", "Logs")

# tested period: nov 1 to december 31 paper settings

In [6]:
env = BoptestGymEnv(url                   = url,
                    testcase              = 'bestest_hydronic_heat_pump',
                    actions               = ['oveHeaPumY_u'],
                    observations          = {'reaTZon_y':(280.,310.),
                                             'weaSta_reaWeaTDryBul_y':(265.,303.),
                                             'weaSta_reaWeaHDirNor_y':(0.,862.)
                                            },
                    random_start_time     = False,
                    start_time            = 305*24*3600, #nov 1 like the paper
                    max_episode_length    = 61 * 24*3600, #2 month testing period
                    warmup_period         = 24*3600,
                    predictive_period     = 0,
                    regressive_period     = 4*1800,
                    step_period           = 1800)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [7]:
from boptestGymEnv import NormalizedObservationWrapper
from boptestGymEnv import DiscretizedActionWrapper
env = NormalizedObservationWrapper(env)
env = DiscretizedActionWrapper(env,n_bins_act=1)

In [8]:
policy_kwargs = dict(
    net_arch=[64, 8],  
    activation_fn=torch.nn.ReLU
)


model = DQN('MlpPolicy', env, verbose=1, gamma=0.99, learning_rate=0.01, batch_size=64, buffer_size=20000,
            learning_starts=0, train_freq=1, target_update_interval=1000, tau=1.0, gradient_steps=1,
            exploration_fraction=0.1, exploration_final_eps=0.05, policy_kwargs=policy_kwargs,
            tensorboard_log= log_path)
DQN.load("local_files/testing/dqn_baseline.zip", env = env)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


<stable_baselines3.dqn.dqn.DQN at 0x2b2c42c1a30>

In [9]:
import requests

done = False
obs, _ = env.reset()
rows = []

from IPython.display import clear_output
while not done:
    # Clear the display output at each step
    clear_output(wait=True)
    # Compute control signal
    action, _ = model.predict(obs, deterministic=True)
    kpis = requests.get('{0}/kpi/{1}'.format(url, env.testid)).json()['payload']
    if isinstance(action, (tuple, list, np.ndarray)):
        action = int(np.array(action).flatten()[0])
    # Print the current operative temperature and decided action
    print('-------------------------------------------------------------------')
    print(obs)
    print(action)
    print('-------------------------------------------------------------------')
    # Implement action
    rows.append({
        "T_zone": obs[0],
        "t_out": obs[1],
        "Psol_Wm2": obs[2],
        "action": action,
        "energy_kWh": kpis["ener_tot"],
        "discomfort": kpis["tdis_tot"]
    })
    obs, reward, terminated, truncated, info = env.step(action)  # send the action to the environment
    done = (terminated or truncated)

-------------------------------------------------------------------
[-1.0936483  -0.58333385 -1.         -1.0925679  -1.091217   -1.089618
 -1.0877767  -0.5973687  -0.60789406 -0.6184211  -0.628948   -1.
 -1.         -1.         -1.        ]
1
-------------------------------------------------------------------


In [10]:
import pandas as pd

df = pd.DataFrame(rows)

df.to_csv("local_files/testing/dqn_single_switch_25_2.csv", index=False)

In [11]:
env.stop()