In [1]:
!pip install stable-baselines3==2.0.0 
!pip install numpy
!pip install tensorflow
%load_ext tensorboard
import tensorflow as tf



In [2]:
import numpy as np
import sys
import matplotlib.pyplot as plt
from IPython.display import clear_output
from stable_baselines3 import SAC
import requests
import csv
import torch
import datetime

In [3]:
clear_output(wait=True)

In [4]:
url_api = 'https://api.boptest.net'
url = "http://localhost:80"

In [5]:
sys.path.insert(0,'boptestGym')
from boptestGymEnv import BoptestGymEnv

In [18]:
class BoptestGymEnvCustomReward(BoptestGymEnv):
    def get_reward(self):
        # Compute BOPTEST core kpis
        kpis = requests.get('{0}/kpi/{1}'.format(self.url, self.testid)).json()['payload']

        ener_coef = 1.0
        temp_coef = 1.0
        
        # todo: search for best reward function
        reward = - ((kpis['tdis_tot']*temp_coef) + (kpis['ener_tot']*ener_coef))

        # Record current objective integrand for next evaluation
        self.objective_integrand = reward

        self.reward_log_path = os.path.join("local_files", "logs", f"sac_single_temp.csv")
        os.makedirs(os.path.dirname(self.reward_log_path), exist_ok=True)
        if not os.path.exists(self.reward_log_path):
            with open(self.reward_log_path, "w", newline="") as f:
                writer = csv.writer(f)
                writer.writerow([
                    "timestamp","tdis_tot","ener_tot", "reward"
                ])
        
        with open(self.reward_log_path, "a", newline="") as f:
            csv.writer(f).writerow([
            datetime.datetime.now().isoformat(),
            kpis['tdis_tot'], kpis['ener_tot'], reward])
        return reward

In [20]:
feb15 = 47 * 24*3600                    # Jan 1 â†’ Feb 15
episode_length = 7 * 24*3600            # 1 week episodes
max_start = feb15 - episode_length      # last valid start time

In [22]:
# Instantiate environment
env = BoptestGymEnvCustomReward(url       = url,
                    testcase              = 'bestest_hydronic_heat_pump',
                    actions               = ['oveTSet_u'],
                    observations          = {'reaTZon_y':(280.,310.),
                                             'weaSta_reaWeaTDryBul_y':(265.,303.),
                                             'weaSta_reaWeaHDirNor_y':(0.,862.)
                                            },
                    random_start_time     = True,
                    start_time            = 1*24*3600,
                    max_episode_length    = 7 * 24*3600 ,
                    excluding_periods     = [(max_start, 365*24*3600)],
                    warmup_period         = 24*3600,
                    predictive_period     = 0,
                    regressive_period     = 4*1800,
                    step_period           = 1800)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [24]:
import os
log_path = os.path.join( "local_files", "Logs")

In [26]:
model = SAC(
        policy='MlpPolicy',
        env=env,
        verbose=1,
        learning_rate=1e-4,
        batch_size=1024,
        tau=0.005,
        gamma=0.99,
        tensorboard_log=log_path,
       seed=42,
    )

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [28]:
episodes_no = 25 #can be 25 or 50 as the paper 
total_timesteps = 336 * episodes_no #336 = episode_length / step_period (604800 / 1800 = 336)
model.learn(total_timesteps= total_timesteps) 

Logging to local_files\Logs\SAC_3
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 336       |
|    ep_rew_mean     | -9.45e+04 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 12        |
|    time_elapsed    | 110       |
|    total_timesteps | 1344      |
| train/             |           |
|    actor_loss      | 1.38e+03  |
|    critic_loss     | 7.11e+04  |
|    ent_coef        | 1.13      |
|    ent_coef_loss   | -3.96     |
|    learning_rate   | 0.0001    |
|    n_updates       | 1243      |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 336       |
|    ep_rew_mean     | -1.22e+05 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 12        |
|    time_elapsed    | 220       |
|    total_timesteps | 2688      |
| train/             |           |
|    actor_loss      

<stable_baselines3.sac.sac.SAC at 0x1fb454f9a60>

In [32]:
env.stop()

In [None]:
%tensorboard --logdir ./local_files/Logs --port 6007

In [30]:
model.save("sac_single_temp")