In [1]:
import numpy as np
import os
import sys
from stable_baselines3 import SAC
from stable_baselines3 import DQN
import torch

In [2]:
sys.path.insert(0,'boptestGym')
from boptestGymEnv import BoptestGymEnv

In [3]:
url_api = 'https://api.boptest.net'
url = "http://localhost:80"

In [4]:
import os
log_path = os.path.join( "local_files", "Logs")
log_boptest_path = os.path.join( "local_files", "Logs_Test")

# tested period: nov 1 to december 31 paper settings

In [6]:
class BoptestGymEnvCustomReward(BoptestGymEnv):
    def get_reward(self):
        # Compute BOPTEST core kpis
        kpis = requests.get('{0}/kpi/{1}'.format(self.url, self.testid)).json()['payload']

        ener_coef = 1.0
        temp_coef = 1.0
        
        # todo: search for best reward function
        reward = - ((kpis['tdis_tot']*temp_coef) + (kpis['ener_tot']*ener_coef))

        # Record current objective integrand for next evaluation
        self.objective_integrand = reward
        return reward

In [7]:
env = BoptestGymEnvCustomReward(url                   = url,
                    testcase              = 'bestest_hydronic_heat_pump',
                    actions               = ['oveHeaPumY_u'],
                    observations          = {'reaTZon_y':(273.,324.),
                                             'weaSta_reaWeaTDryBul_y':(263.,302.),
                                             'weaSta_reaWeaHDirNor_y':(0.,862.)
                                            },
                    random_start_time     = False,
                    start_time            = 305*24*3600, #nov 1 like the paper
                    max_episode_length    = 61 * 24*3600, #2 month testing period
                    warmup_period         = 24*3600,
                    predictive_period     = 0,
                    regressive_period     = 4*1800,
                    step_period           = 1800,
                    log_dir= log_boptest_path           )

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [8]:
from boptestGymEnv import NormalizedObservationWrapper
from boptestGymEnv import DiscretizedActionWrapper
env = NormalizedObservationWrapper(env)
env = DiscretizedActionWrapper(env,n_bins_act=1)

In [9]:
model= DQN.load("local_files/Saved Models/dqn27_switch_50.zip")

In [10]:
import requests

done = False
obs, _ = env.reset()
rows = []

from IPython.display import clear_output
i=0
while i<=2928:
    # Clear the display output at each step
    clear_output(wait=True)
    
    # Compute control signal
    action, _ = model.predict(obs, deterministic=True)
    kpis = requests.get('{0}/kpi/{1}'.format(url, env.testid)).json()['payload']

    if isinstance(action, (tuple, list, np.ndarray)):
        action = int(np.array(action).flatten()[0])
    # Print the current operative temperature and decided action
    print('-------------------------------------------------------------------')
    print("obs: %s"%obs)
    print("act: %s"%action)
    print("%s /2928"%i)
    print('-------------------------------------------------------------------')
    i+=1
    # Implement action
    rows.append({
        "T_zone": obs[0],
        "t_out": obs[1],
        "Psol_Wm2": obs[2],
        "action": action,
        "energy_kWh": kpis["ener_tot"],
        "discomfort": kpis["tdis_tot"]
    })
    obs, reward, terminated, truncated, info = env.step(action)  # send the action to the environment
    done = (terminated or truncated)

-------------------------------------------------------------------
obs: [ 0.3529855  -0.47948748 -1.          0.3530525   0.35334682  0.35389745
  0.3546753  -0.49145353 -0.5051285  -0.51538396 -0.525641   -1.
 -1.         -1.         -1.        ]
act: 1
2928 /2928
-------------------------------------------------------------------


In [11]:
import pandas as pd

df = pd.DataFrame(rows)

df.to_csv("local_files/testing/dqn_single_switch_27_50.csv", index=False)

In [12]:
env.stop()