In [114]:
import sys
from datetime import datetime

import gymnasium as gym
import numpy as np
import wandb
from stable_baselines3 import *
from stable_baselines3.common.callbacks import CallbackList
from stable_baselines3.common.logger import HumanOutputFormat
from stable_baselines3.common.logger import Logger as SB3Logger
from stable_baselines3.common.monitor import Monitor

import sinergym
import sinergym.utils.gcloud as gcloud
from sinergym.utils.callbacks import *
from sinergym.utils.constants import *
from sinergym.utils.logger import CSVLogger, WandBOutputFormat
from sinergym.utils.rewards import *
from sinergym.utils.wrappers import *

In [115]:
# Environment ID
environment = "Eplus-small_office-cool-discrete-stochastic-v1"
# Training episodes
episodes = 10
#Name of the experiment
experiment_date = datetime.today().strftime('%Y-%m-%d_%H:%M')
experiment_name = 'SB3_PPO-' + environment + \
    '-episodes-' + str(episodes) + "_7"
experiment_name += '_' + experiment_date

In [116]:
# Create wandb.config object in order to log all experiment params
experiment_params = {
    'sinergym-version': sinergym.__version__,
    'python-version': sys.version
}
experiment_params.update({'environment':environment,
                          'episodes':episodes,
                          'algorithm':'SB3-PPO'})

# Get wandb init params (you have to specify your own project and entity)
wandb_params = {"project": 'sinergym',
                "entity": 'Huron-Yin'}

print('Experiment name:', experiment_params)



Experiment name: {'sinergym-version': '3.2.7', 'python-version': '3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]', 'environment': 'Eplus-small_office-cool-discrete-stochastic-v1', 'episodes': 10, 'algorithm': 'SB3-PPO'}


In [117]:
# Init wandb entry
run = wandb.init(
    name=experiment_name + '_' + wandb.util.generate_id(),
    config=experiment_params,
    ** wandb_params
)

print(wandb.config)

{'sinergym-version': '3.2.7', 'python-version': '3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]', 'environment': 'Eplus-small_office-cool-discrete-stochastic-v1', 'episodes': 10, 'algorithm': 'SB3-PPO'}


In [118]:
env = gym.make(environment, env_name=experiment_name)
eval_env = gym.make(environment, env_name=experiment_name+'_EVALUATION')

[38;20m[ENVIRONMENT] (INFO) : Creating Gymnasium environment... [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34][0m
[38;20m[MODELING] (INFO) : Experiment working directory created [/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041][0m
[38;20m[MODELING] (INFO) : Model Config is correct.[0m
[38;20m[MODELING] (INFO) : Updated building model with whole Output:Variable available names[0m
[38;20m[MODELING] (INFO) : Updated building model with whole Output:Meter available names[0m
[38;20m[MODELING] (INFO) : runperiod established: {'start_day': 1, 'start_month': 1, 'start_year': 1991, 'end_day': 31, 'end_month': 12, 'end_year': 1991, 'start_weekday': 0, 'n_steps_per_hour': 4}[0m
[38;20m[MODELING] (INFO) : Episode length (seconds): 31536000.0[0m
[38;20m[MODELING] (INFO) : timestep size (seconds): 900.0[0m
[38;20m[MODELING] (INFO) : timesteps per episode: 35040[0m


In [119]:
env = LoggerWrapper(env)
eval_env = LoggerWrapper(eval_env)

[38;20m[WRAPPER LoggerWrapper] (INFO) : Wrapper initialized.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Wrapper initialized.[0m


In [120]:
#model = DQN('MlpPolicy', env, verbose=1)
# model = PPO('MlpPolicy', env, verbose=1)

# model = PPO('MlpPolicy', env, learning_rate=0.0003, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99,
#             gae_lambda=0.95, clip_range=0.2, ent_coef=0, vf_coef=0.5, max_grad_norm=0.5, verbose=1, seed=3,
#             tensorboard_log=None)

# model = PPO('MlpPolicy', env, learning_rate=0.001, n_steps=4096, batch_size=128, n_epochs=15, gamma=0.9,
#             gae_lambda=0.9, clip_range=0.2, ent_coef=0, vf_coef=0.5, max_grad_norm=0.5, verbose=1, seed=3,
#             tensorboard_log=None)   #1
model = PPO('MlpPolicy', env, learning_rate=0.0003, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99,
            gae_lambda=0.95, clip_range=0.2, ent_coef=0, vf_coef=0.5, max_grad_norm=0.5, verbose=1, seed=3,
            tensorboard_log=None)   #2
# model = PPO('MlpPolicy', env, learning_rate=0.0003, n_steps=2048, batch_size=128, n_epochs=10, gamma=0.99,
#             gae_lambda=0.95, clip_range=0.2, ent_coef=0, vf_coef=0.5, max_grad_norm=0.5, verbose=1, seed=3,
#             tensorboard_log=None)   #3
# model = PPO('MlpPolicy', env, learning_rate=0.0003, n_steps=4096, batch_size=128, n_epochs=10, gamma=0.99,
#             gae_lambda=0.95, clip_range=0.2, ent_coef=0, vf_coef=0.5, max_grad_norm=0.5, verbose=1, seed=3,
#             tensorboard_log=None)   #4
# model = PPO('MlpPolicy', env, learning_rate=0.001, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99,
#             gae_lambda=0.95, clip_range=0.2, ent_coef=0, vf_coef=0.5, max_grad_norm=0.5, verbose=1, seed=3,
#             tensorboard_log=None)   #5
# model = PPO('MlpPolicy', env, learning_rate=0.01, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99,
#             gae_lambda=0.95, clip_range=0.2, ent_coef=0, vf_coef=0.5, max_grad_norm=0.5, verbose=1, seed=3,
#             tensorboard_log=None)   #6
model = PPO('MlpPolicy', env, learning_rate=0.0003, n_steps=4096, batch_size=64, n_epochs=10, gamma=0.99,
            gae_lambda=0.95, clip_range=0.2, ent_coef=0, vf_coef=0.5, max_grad_norm=0.5, verbose=1, seed=3,
            tensorboard_log=None)   #7


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [121]:
callbacks = []

# Set up Evaluation and saving best model
eval_callback = LoggerEvalCallback(
    eval_env,
    best_model_save_path=eval_env.get_wrapper_attr('workspace_path') +
    '/best_model/',
    log_path=eval_env.get_wrapper_attr('workspace_path') +
    '/best_model/',
    eval_freq=(eval_env.get_wrapper_attr('timestep_per_episode') - 1) * 2 - 1,
    deterministic=True,
    render=False,
    n_eval_episodes=1)
callbacks.append(eval_callback)


In [122]:
# wandb logger and setting in SB3
logger = SB3Logger(
    folder=None,
    output_formats=[
        HumanOutputFormat(
            sys.stdout,
            max_length=120),
        WandBOutputFormat()])
model.set_logger(logger)
# Append callback
log_callback = LoggerCallback()
callbacks.append(log_callback)


callback = CallbackList(callbacks)

In [123]:
timesteps = episodes * (env.get_wrapper_attr('timestep_per_episode') - 1)
print(timesteps)

350390


In [124]:
model.learn(
    total_timesteps=timesteps,
    callback=callback,
    log_interval=1)

#----------------------------------------------------------------------------------------------#
[38;20m[ENVIRONMENT] (INFO) : Starting a new episode... [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34] [Episode 1][0m
#----------------------------------------------------------------------------------------------#
[38;20m[MODELING] (INFO) : Episode directory created [/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run1][0m
[38;20m[MODELING] (INFO) : Weather file CAN_YT_Whitehorse.719640_CWEC.epw used.[0m
[38;20m[MODELING] (INFO) : Adapting weather to building model. [CAN_YT_Whitehorse.719640_CWEC.epw][0m
[38;20m[ENVIRONMENT] (INFO) : Saving episode output path... [/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run1/output][0m


  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[SIMULATOR] (INFO) : Running EnergyPlus with args: ['-w', '/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run1/CAN_YT_Whitehorse.719640_CWEC_Random_1.0_0.0_0.001.epw', '-d', '/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run1/output', '/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run1/ASHRAE901_OfficeSmall_STD2019_Denver.epJSON'][0m
[38;20m[ENVIRONMENT] (INFO) : Episode 1 started.[0m
[38;20m[SIMULATOR] (INFO) : handlers initialized.[0m
[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 1) if logger is active[0m
--------------------------------------------

  gym.logger.warn("Casting input x to numpy array.")
  logger.warn(
  logger.warn(
  logger.warn(


--------------------------------------------------------------------------------------------------------------| 1%
| action_network/                   |           |
|    index                          | 9         |
| action_simulation/                |           |
|    Cooling_Setpoint_RL            | 22.5      |
|    Heating_Setpoint_RL            | 21        |
| observation/                      |           |
|    HVAC_electricity_demand_rate   | 1925.3821 |
|    clg_setpoint                   | 22.5      |
|    core_zn_air_humidity           | 22.577223 |
|    core_zn_air_temperature        | 20.945555 |
|    core_zn_people_occupant        | 0.0       |
|    day_of_month                   | 3.0       |
|    diffuse_solar_radiation        | 0.0       |
|    direct_solar_radiation         | 0.0       |
|    hour                           | 1.0       |
|    htg_setpoint                   | 21.0      |
|    month                          | 1.0       |
|    outdoor_humidity              

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 2) if logger is active[0m
----------------------------------------------------
| action_network/                     |            |
|    index                            | 4          |
| action_simulation/                  |            |
|    Cooling_Setpoint_RL              | 26         |
|    Heating_Setpoint_RL              | 19         |
| episode/                            |            |
|    comfort_violation_time(%)        | 54.9       |
|    cumulative_abs_comfort_penalty   | -7.48e+04  |
|    cumulative_abs_energy_penalty    | -6.74e+07  |
|    cumulative_power_demand          | 6.74e+07   |
|    cumulative_reward                | -40771.734 |
|    cumulative_reward_comfort_term   | -3.74e+04  |
|    cumulative_reward_energy_term    | -3.37e+03  |
|    cumulative_temperature_violation

  logger.warn(
  logger.warn(
  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")


--------------------------------------------------
| action_network/                   |            |
|    index                          | 7          |
| action_simulation/                |            |
|    Cooling_Setpoint_RL            | 23         |
|    Heating_Setpoint_RL            | 22         |
| observation/                      |            |
|    HVAC_electricity_demand_rate   | 1925.3821  |
|    clg_setpoint                   | 24.0       |
|    core_zn_air_humidity           | 23.44765   |
|    core_zn_air_temperature        | 21.09916   |
|    core_zn_people_occupant        | 9.021986   |
|    day_of_month                   | 2.0        |
|    diffuse_solar_radiation        | 0.0        |
|    direct_solar_radiation         | 0.0        |
|    hour                           | 15.0       |
|    htg_setpoint                   | 21.0       |
|    month                          | 1.0        |
|    outdoor_humidity               | 59.0       |
|    outdoor_temperature       

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[SIMULATOR] (INFO) : handlers initialized.[0m
[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 1) if logger is active[0m
Progress: |*--------------------------------------------------------------------------------------------------| 1%

  gym.logger.warn("Casting input x to numpy array.")


Progress: |***************************************************************************************************| 99%
[38;20m[WRAPPER LoggerWrapper] (INFO) : End of episode, recording summary (progress.csv) if logger is active[0m
[38;20m[ENVIRONMENT] (INFO) : Environment closed. [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34_EVALUATION][0m
#----------------------------------------------------------------------------------------------#
[38;20m[ENVIRONMENT] (INFO) : Starting a new episode... [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34] [Episode 3][0m
#----------------------------------------------------------------------------------------------#
[38;20m[MODELING] (INFO) : Episode directory created [/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run3][0m
[38;20m[MODELING] (INFO) : Weather file CAN_YT_Whitehor

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 3) if logger is active[0m
Eval num_timesteps=70077, episode_reward=-17604.43 +/- 0.00
Episode length: 35040.00 +/- 0.00
-------------------------------------------------------
| action_network/                        |            |
|    index                               | 7          |
| action_simulation/                     |            |
|    Cooling_Setpoint_RL                 | 23         |
|    Heating_Setpoint_RL                 | 22         |
| eval/                                  |            |
|    comfort_violation(%)                | 29.7       |
|    cumulative_absolute_comfort_penalty | -2.75e+04  |
|    cumulative_absolute_energy_penalty  | -7.72e+07  |
|    cumulative_power_demand             | 7.72e+07   |
|    cumulative_reward                   | -1.76e+04  |
|    cumulative_reward_comfort_term      | -1.37e+04  |
|    cumulative_reward_energy_term       | -3.86e+03  |
|   

  logger.warn(
  logger.warn(
  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
--------------------------------------------------
| action_network/                   |            |
|    index                          | 2          |
| action_simulation/                |            |
|    Cooling_Setpoint_RL            | 28         |
|    Heating_Setpoint_RL            | 17         |
| observation/                      |            |
|    HVAC_electricity_demand_rate   | 821.0      |
|    clg_setpoint                   | 22.5       |
|    core_zn_air_humidity           | 6.21254    |
|    core_zn_air_temperature        | 18.290571  |
|    core_zn_people_occupant        | 0.0        |
|    day_of_month                   | 1.0        |
|    diffuse_solar_radiation        | 0.0        |
|    direct_solar_radiation         | 0.0        |
|    hour                           | 5.0        |
|    htg_setpoint                   | 22.0       |
|    month                    

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 4) if logger is active[0m
-----------------------------------------------------
| action_network/                     |             |
|    index                            | 7           |
| action_simulation/                  |             |
|    Cooling_Setpoint_RL              | 23          |
|    Heating_Setpoint_RL              | 22          |
| episode/                            |             |
|    comfort_violation_time(%)        | 41.6        |
|    cumulative_abs_comfort_penalty   | -8.36e+04   |
|    cumulative_abs_energy_penalty    | -1.46e+08   |
|    cumulative_power_demand          | 1.46e+08    |
|    cumulative_reward                | -49130.457  |
|    cumulative_reward_comfort_term   | -4.18e+04   |
|    cumulative_reward_energy_term    | -7.31e+03   |
|    cumulative_tempera

  logger.warn(
  logger.warn(
  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")


-------------------------------------------------
| action_network/                   |           |
|    index                          | 7         |
| action_simulation/                |           |
|    Cooling_Setpoint_RL            | 23        |
|    Heating_Setpoint_RL            | 22        |
| observation/                      |           |
|    HVAC_electricity_demand_rate   | 1925.3821 |
|    clg_setpoint                   | 23.0      |
|    core_zn_air_humidity           | 20.93019  |
|    core_zn_air_temperature        | 21.989073 |
|    core_zn_people_occupant        | 0.9924185 |
|    day_of_month                   | 2.0       |
|    diffuse_solar_radiation        | 0.0       |
|    direct_solar_radiation         | 0.0       |
|    hour                           | 21.0      |
|    htg_setpoint                   | 22.0      |
|    month                          | 1.0       |
|    outdoor_humidity               | 58.0      |
|    outdoor_temperature            | -19.89289 |


  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 2) if logger is active[0m
Progress: |*--------------------------------------------------------------------------------------------------| 1%

  gym.logger.warn("Casting input x to numpy array.")


Progress: |***************************************************************************************************| 99%
[38;20m[WRAPPER LoggerWrapper] (INFO) : End of episode, recording summary (progress.csv) if logger is active[0m
[38;20m[ENVIRONMENT] (INFO) : Environment closed. [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34_EVALUATION][0m
#----------------------------------------------------------------------------------------------#
[38;20m[ENVIRONMENT] (INFO) : Starting a new episode... [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34] [Episode 5][0m
#----------------------------------------------------------------------------------------------#
[38;20m[MODELING] (INFO) : Episode directory created [/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run5][0m
[38;20m[MODELING] (INFO) : Weather file CAN_YT_Whitehor

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 5) if logger is active[0m
Eval num_timesteps=140154, episode_reward=-17076.44 +/- 0.00
Episode length: 35040.00 +/- 0.00
-------------------------------------------------------
| action_network/                        |            |
|    index                               | 7          |
| action_simulation/                     |            |
|    Cooling_Setpoint_RL                 | 23         |
|    Heating_Setpoint_RL                 | 22         |
| eval/                                  |            |
|    comfort_violation(%)                | 29.8       |
|    cumulative_absolute_comfort_penalty | -2.64e+04  |
|    cumulative_absolute_energy_penalty  | -7.74e+07  |
|    cumulative_power_demand             | 7.74e+07   |
|    cumulative_reward                   | -1.71e+04  |
|    cumulative_reward_comfort_term      | -1.32e+04  |
|    cumulative_reward_energy_term       | -3.87e+03  |
|  

  logger.warn(
  logger.warn(
  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
--------------------------------------------------
| action_network/                   |            |
|    index                          | 9          |
| action_simulation/                |            |
|    Cooling_Setpoint_RL            | 22.5       |
|    Heating_Setpoint_RL            | 21         |
| observation/                      |            |
|    HVAC_electricity_demand_rate   | 5572.701   |
|    clg_setpoint                   | 23.0       |
|    core_zn_air_humidity           | 5.0693984  |
|    core_zn_air_temperature        | 21.209747  |
|    core_zn_people_occupant        | 0.0        |
|    day_of_month                   | 1.0        |
|    diffuse_solar_radiation        | 20.25      |
|    direct_solar_radiation         | 0.0        |
|    hour                           | 11.0       |
|    htg_setpoint                   | 22.0       |
|    month                    

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 6) if logger is active[0m
----------------------------------------------------
| action_network/                     |            |
|    index                            | 9          |
| action_simulation/                  |            |
|    Cooling_Setpoint_RL              | 22.5       |
|    Heating_Setpoint_RL              | 21         |
| episode/                            |            |
|    comfort_violation_time(%)        | 35.7       |
|    cumulative_abs_comfort_penalty   | -6.43e+04  |
|    cumulative_abs_energy_penalty    | -1.54e+08  |
|    cumulative_power_demand          | 1.54e+08   |
|    cumulative_reward                | -39841.57  |
|    cumulative_reward_comfort_term   | -3.22e+04  |
|    cumulative_reward_energy_term    | -7.69e+03  |
|    cumulative_temperature_violation

  logger.warn(
  logger.warn(
  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")


--------------------------------------------------------------------------------------------------------------| 1%
| action_network/                   |           |
|    index                          | 7         |
| action_simulation/                |           |
|    Cooling_Setpoint_RL            | 23        |
|    Heating_Setpoint_RL            | 22        |
| observation/                      |           |
|    HVAC_electricity_demand_rate   | 1925.3821 |
|    clg_setpoint                   | 23.0      |
|    core_zn_air_humidity           | 22.655035 |
|    core_zn_air_temperature        | 21.910234 |
|    core_zn_people_occupant        | 0.0       |
|    day_of_month                   | 3.0       |
|    diffuse_solar_radiation        | 0.0       |
|    direct_solar_radiation         | 0.0       |
|    hour                           | 3.0       |
|    htg_setpoint                   | 22.0      |
|    month                          | 1.0       |
|    outdoor_humidity              

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 3) if logger is active[0m
Progress: |*--------------------------------------------------------------------------------------------------| 1%

  gym.logger.warn("Casting input x to numpy array.")


Progress: |***************************************************************************************************| 99%
[38;20m[WRAPPER LoggerWrapper] (INFO) : End of episode, recording summary (progress.csv) if logger is active[0m
[38;20m[ENVIRONMENT] (INFO) : Environment closed. [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34_EVALUATION][0m
#----------------------------------------------------------------------------------------------#
[38;20m[ENVIRONMENT] (INFO) : Starting a new episode... [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34] [Episode 7][0m
#----------------------------------------------------------------------------------------------#
[38;20m[MODELING] (INFO) : Episode directory created [/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run7][0m
[38;20m[MODELING] (INFO) : Weather file CAN_YT_Whitehor

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 7) if logger is active[0m
Eval num_timesteps=210231, episode_reward=-17123.22 +/- 0.00
Episode length: 35040.00 +/- 0.00
-------------------------------------------------------
| action_network/                        |            |
|    index                               | 7          |
| action_simulation/                     |            |
|    Cooling_Setpoint_RL                 | 23         |
|    Heating_Setpoint_RL                 | 22         |
| eval/                                  |            |
|    comfort_violation(%)                | 29.3       |
|    cumulative_absolute_comfort_penalty | -2.65e+04  |
|    cumulative_absolute_energy_penalty  | -7.76e+07  |
|    cumulative_power_demand             | 7.76e+07   |
|    cumulative_reward                   | -1.71e+04  |
|    cumulative_reward_comfort_term      | -1.32e+04  |
|    cumulative_reward_energy_term       | -3.88e+03  |
|  

  logger.warn(
  logger.warn(
  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
--------------------------------------------------
| action_network/                   |            |
|    index                          | 8          |
| action_simulation/                |            |
|    Cooling_Setpoint_RL            | 22.5       |
|    Heating_Setpoint_RL            | 22         |
| observation/                      |            |
|    HVAC_electricity_demand_rate   | 1676.5897  |
|    clg_setpoint                   | 25.0       |
|    core_zn_air_humidity           | 5.42703    |
|    core_zn_air_temperature        | 19.09517   |
|    core_zn_people_occupant        | 0.0        |
|    day_of_month                   | 1.0        |
|    diffuse_solar_radiation        | 0.0        |
|    direct_solar_radiation         | 0.0        |
|    hour                           | 16.0       |
|    htg_setpoint                   | 20.0       |
|    month                    

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 8) if logger is active[0m
----------------------------------------------------
| action_network/                     |            |
|    index                            | 7          |
| action_simulation/                  |            |
|    Cooling_Setpoint_RL              | 23         |
|    Heating_Setpoint_RL              | 22         |
| episode/                            |            |
|    comfort_violation_time(%)        | 34.7       |
|    cumulative_abs_comfort_penalty   | -6.15e+04  |
|    cumulative_abs_energy_penalty    | -1.56e+08  |
|    cumulative_power_demand          | 1.56e+08   |
|    cumulative_reward                | -38547.047 |
|    cumulative_reward_comfort_term   | -3.08e+04  |
|    cumulative_reward_energy_term    | -7.79e+03  |
|    cumulative_temperature_violation

  logger.warn(
  logger.warn(
  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")


--------------------------------------------------
| action_network/                   |            |
|    index                          | 7          |
| action_simulation/                |            |
|    Cooling_Setpoint_RL            | 23         |
|    Heating_Setpoint_RL            | 22         |
| observation/                      |            |
|    HVAC_electricity_demand_rate   | 1925.3821  |
|    clg_setpoint                   | 23.0       |
|    core_zn_air_humidity           | 6.7929196  |
|    core_zn_air_temperature        | 22.00022   |
|    core_zn_people_occupant        | 1.8946172  |
|    day_of_month                   | 2.0        |
|    diffuse_solar_radiation        | 0.0        |
|    direct_solar_radiation         | 0.0        |
|    hour                           | 7.0        |
|    htg_setpoint                   | 22.0       |
|    month                          | 1.0        |
|    outdoor_humidity               | 73.0       |
|    outdoor_temperature       

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 4) if logger is active[0m
Progress: |*--------------------------------------------------------------------------------------------------| 1%

  gym.logger.warn("Casting input x to numpy array.")


Progress: |***************************************************************************************************| 99%
[38;20m[WRAPPER LoggerWrapper] (INFO) : End of episode, recording summary (progress.csv) if logger is active[0m
[38;20m[ENVIRONMENT] (INFO) : Environment closed. [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34_EVALUATION][0m
#----------------------------------------------------------------------------------------------#
[38;20m[ENVIRONMENT] (INFO) : Starting a new episode... [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34] [Episode 9][0m
#----------------------------------------------------------------------------------------------#
[38;20m[MODELING] (INFO) : Episode directory created [/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run9][0m
[38;20m[MODELING] (INFO) : Weather file CAN_YT_Whitehor

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 9) if logger is active[0m
Eval num_timesteps=280308, episode_reward=-17232.21 +/- 0.00
Episode length: 35040.00 +/- 0.00
-------------------------------------------------------
| action_network/                        |            |
|    index                               | 7          |
| action_simulation/                     |            |
|    Cooling_Setpoint_RL                 | 23         |
|    Heating_Setpoint_RL                 | 22         |
| eval/                                  |            |
|    comfort_violation(%)                | 29.6       |
|    cumulative_absolute_comfort_penalty | -2.67e+04  |
|    cumulative_absolute_energy_penalty  | -7.78e+07  |
|    cumulative_power_demand             | 7.78e+07   |
|    cumulative_reward                   | -1.72e+04  |
|    cumulative_reward_comfort_term      | -1.33e+04  |
|    cumulative_reward_energy_term       | -3.89e+03  |
|  

  logger.warn(
  logger.warn(
  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
--------------------------------------------------
| action_network/                   |            |
|    index                          | 8          |
| action_simulation/                |            |
|    Cooling_Setpoint_RL            | 22.5       |
|    Heating_Setpoint_RL            | 22         |
| observation/                      |            |
|    HVAC_electricity_demand_rate   | 821.0      |
|    clg_setpoint                   | 23.0       |
|    core_zn_air_humidity           | 4.791964   |
|    core_zn_air_temperature        | 20.54156   |
|    core_zn_people_occupant        | 0.0        |
|    day_of_month                   | 1.0        |
|    diffuse_solar_radiation        | 0.0        |
|    direct_solar_radiation         | 0.0        |
|    hour                           | 22.0       |
|    htg_setpoint                   | 22.0       |
|    month                    

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[ENVIRONMENT] (INFO) : Saving episode output path... [/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run10/output][0m
[38;20m[SIMULATOR] (INFO) : Running EnergyPlus with args: ['-w', '/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run10/CAN_YT_Whitehorse.719640_CWEC_Random_1.0_0.0_0.001.epw', '-d', '/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run10/output', '/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run10/ASHRAE901_OfficeSmall_STD2019_Denver.epJSON'][0m
[38;20m[ENVIRONMENT] (INFO) : Episode 10 started.[0m
[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is read

  logger.warn(
  logger.warn(
  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")


----------------------------------------------------
| action_network/                   |              |
|    index                          | 7            |
| action_simulation/                |              |
|    Cooling_Setpoint_RL            | 23           |
|    Heating_Setpoint_RL            | 22           |
| observation/                      |              |
|    HVAC_electricity_demand_rate   | 5544.475     |
|    clg_setpoint                   | 23.0         |
|    core_zn_air_humidity           | 5.1941533    |
|    core_zn_air_temperature        | 20.56965     |
|    core_zn_people_occupant        | 0.0          |
|    day_of_month                   | 1.0          |
|    diffuse_solar_radiation        | 33.0         |
|    direct_solar_radiation         | 345.0        |
|    hour                           | 12.0         |
|    htg_setpoint                   | 22.0         |
|    month                          | 1.0          |
|    outdoor_humidity               | 43.0    

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 5) if logger is active[0m
Progress: |*--------------------------------------------------------------------------------------------------| 1%

  gym.logger.warn("Casting input x to numpy array.")


Progress: |***************************************************************************************************| 99%
[38;20m[WRAPPER LoggerWrapper] (INFO) : End of episode, recording summary (progress.csv) if logger is active[0m
[38;20m[ENVIRONMENT] (INFO) : Environment closed. [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34_EVALUATION][0m
#----------------------------------------------------------------------------------------------#
[38;20m[ENVIRONMENT] (INFO) : Starting a new episode... [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34] [Episode 11][0m
#----------------------------------------------------------------------------------------------#
[38;20m[MODELING] (INFO) : Episode directory created [/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041/Eplus-env-sub_run11][0m
[38;20m[MODELING] (INFO) : Weather file CAN_YT_Whiteh

  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


[38;20m[WRAPPER LoggerWrapper] (INFO) : Creating monitor.csv for current episode (episode 11) if logger is active[0m
Eval num_timesteps=350385, episode_reward=-17007.15 +/- 0.00
Episode length: 35040.00 +/- 0.00
-------------------------------------------------------
| action_network/                        |            |
|    index                               | 7          |
| action_simulation/                     |            |
|    Cooling_Setpoint_RL                 | 23         |
|    Heating_Setpoint_RL                 | 22         |
| eval/                                  |            |
|    comfort_violation(%)                | 29.3       |
|    cumulative_absolute_comfort_penalty | -2.63e+04  |
|    cumulative_absolute_energy_penalty  | -7.73e+07  |
|    cumulative_power_demand             | 7.73e+07   |
|    cumulative_reward                   | -1.7e+04   |
|    cumulative_reward_comfort_term      | -1.31e+04  |
|    cumulative_reward_energy_term       | -3.86e+03  |
| 

  logger.warn(
  logger.warn(
  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")


[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[0m
-------------------------------------------------
| action_network/                   |           |
|    index                          | 7         |
| action_simulation/                |           |
|    Cooling_Setpoint_RL            | 23        |
|    Heating_Setpoint_RL            | 22        |
| observation/                      |           |
|    HVAC_electricity_demand_rate   | 5720.277  |
|    clg_setpoint                   | 23.0      |
|    core_zn_air_humidity           | 5.315053  |
|    core_zn_air_temperature        | 18.768671 |
|    core_zn_people_occupant        | 0.0       |
|    day_of_month                   | 1.0       |
|    diffuse_solar_radiation        | 0.0       |
|    direct_solar_radiation         | 0.0       |
|    hour                           | 3.0       |
|    htg_setpoint                   | 22.0      |
|    month                          | 1.0      

<stable_baselines3.ppo.ppo.PPO at 0x7f926ee71000>

In [125]:
model.save(str(env.get_wrapper_attr('timestep_per_episode'))+ '/' + experiment_name)

In [126]:
env.close()

[38;20m[WRAPPER LoggerWrapper] (INFO) : End of episode, recording summary (progress.csv) if logger is active[0m
[38;20m[ENVIRONMENT] (INFO) : Environment closed. [SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34][0m


Progress: |***************************************************************************************************| 99%


In [127]:
artifact = wandb.Artifact(
        name="experiment1",
        type="training")
artifact.add_dir(
        env.get_wrapper_attr('workspace_path'),
        name='training_output/')
artifact.add_dir(
    eval_env.get_wrapper_attr('workspace_path'),
    name='evaluation_output/')
run.log_artifact(artifact)

# wandb has finished
run.finish()

[34m[1mwandb[0m: Adding directory to artifact (/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34-res35041)... Done. 1.8s
[34m[1mwandb[0m: Adding directory to artifact (/workspaces/sinergym/Eplus-env-SB3_PPO-Eplus-small_office-cool-discrete-stochastic-v1-episodes-10_7_2024-04-16_05:34_EVALUATION-res35041)... Done. 1.2s


0,1
action_network/index,▄▅▇▆▇▇▇▆▆▆▇▇▇▇▃▇▇▇██▇▇▇█▇▅█▇▇█▁▇▇▇▁▇▇▇▁▇
action_simulation/Cooling_Setpoint_RL,▅▄▁▂▁▁▁▂▂▂▁▁▁▁▆▁▁▁▁▁▁▁▁▁▁▃▁▁▁▁█▁▁▁█▁▁▁█▁
action_simulation/Heating_Setpoint_RL,▄▅█▇███▇▇▇████▃██████████▆████▁███▁███▁█
episode/comfort_violation_time(%),█▄▂▁▁
episode/cumulative_abs_comfort_penalty,▃▁▆▇█
episode/cumulative_abs_energy_penalty,█▂▁▁▁
episode/cumulative_power_demand,▁▇███
episode/cumulative_reward,▆▁▆▇█
episode/cumulative_reward_comfort_term,▃▁▆▇█
episode/cumulative_reward_energy_term,█▂▁▁▁

0,1
action_network/index,7.0
action_simulation/Cooling_Setpoint_RL,23.0
action_simulation/Heating_Setpoint_RL,22.0
episode/comfort_violation_time(%),33.87417
episode/cumulative_abs_comfort_penalty,-58250.29593
episode/cumulative_abs_energy_penalty,-155365969.75542
episode/cumulative_power_demand,155365969.75542
episode/cumulative_reward,-36893.44531
episode/cumulative_reward_comfort_term,-29125.14796
episode/cumulative_reward_energy_term,-7768.29849
