In [8]:
import numpy as np
from agent.DDPG_Agent import DDPG_Agent
from environment.ContinuousEnvironment import ContinuousSimpleEnvironment

import pandas as pd

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Set environment 

In [9]:
env = ContinuousSimpleEnvironment(param_list=['Tair', 'RH', 'Tmrt', 'Tout', 'Qheat', 'Occ'],
alpha=0.25,
beta=1,
min_temp=16,
max_temp=21,
modelname='CELLS_v1.fmu',
simulation_path=r'C:\Users\hbenoit\Desktop\DIET_Controller\EnergyPlus_simulations\simple_simulation',
days=151,
hours=24,
minutes=60,
seconds=60,
ep_timestep=6)

## Hyperparameter tuning and training

In [None]:
from Performance import all_combinations_list
from Performance import search_similar

agent_arguments = {
"discount": [0.99],
"memory_size": [1000],
"batch_size": [128],
"tau": [0.05],
"policy_noise": [0.2],
"noise_clip":[0.5],
"lr":[1e-3],
"inside_dim":[32,64,128,256],
"num_hidden_layers":[1,2,3,4],
"num_random_episodes":[1],
"num_training_iterations":[100],
"seed": [770]
}

logging_path = r"C:\Users\hbenoit\Desktop\DIET_Controller\logs\simple_simulation"
searching_directory = r"C:\Users\hbenoit\Desktop\DIET_Controller\logs\simple_simulation\DDPG_Agent\results"

for curr_agent_arguments in all_combinations_list(agent_arguments):

    ## creating the dictionary of parameters against which to check
    agent = DDPG_Agent(env).from_dict(dict_arguments=curr_agent_arguments)
    log_dict = {**agent.log_dict(), **env.log_dict()}
    num_episodes = 10
    log_dict["num_episodes"] = num_episodes

    ## so that we don't train a configuration that has already been trained
    if(not(search_similar(searching_directory, log_dict))):
        print("WE TRAIN")
        results_path, summary_df = agent.train(logging_path= logging_path,
         num_iterations= None,
         num_episodes=num_episodes,
         log=True)
  

# Finding the best performing agent in the logs

In [10]:
# Example of how to use the function: 
# 
import Performance

searching_directory = r"C:\Users\hbenoit\Desktop\DIET_Controller\logs\simple_simulation\DDPG_Agent"

conditions = {"seed":["=",770],    "alpha":["=",0.25],
    "beta":["=",1], "num_episodes":["=",10]}

best_path_list = Performance.search_paths(
searching_directory,
conditions=conditions,
top_k=1,
utility_function=Performance.cumulative_reward,
normalized=True)

In [11]:
best_path_list

['C:\\Users\\hbenoit\\Desktop\\DIET_Controller\\logs\\simple_simulation\\DDPG_Agent\\results\\2022_6_19\\results_2022_6_19_6_41']

## Testing in-training performance pipeline

Here, we test the performance of the best agent according to the cumulative reward

In [12]:
from logger.SimpleLogger import SimpleLogger

best_agent_path = best_path_list[0]


parameter = ("seed", [775,776,777,778])

logging_path = r"C:\Users\hbenoit\Desktop\DIET_Controller\logs\simple_simulation"

utility_function = Performance.cumulative_reward
agent = DDPG_Agent(env=env)
num_episodes = 5
num_iterations = env.numsteps
agent_name = "DDPG_Agent"

results_dict = Performance.across_runs(
agent=agent,
agent_config_path=best_agent_path,
parameter=parameter,
num_episodes=num_episodes,
num_iterations=num_iterations,
utility_function=utility_function,
alpha=0.05,
window=6,
column_names=["Tset","Reward"])

logger = SimpleLogger(
        logging_path=logging_path,
        agent_name="DDPG_Agent",
        num_episodes=num_episodes,
        num_iterations=num_iterations,
    )

results_dict["alpha"]= agent.env.alpha
results_dict["beta"]= agent.env.beta

logger.log_performance_pipeline(results_dict,fixed_policy=False)

Iteration0
Iteration1000
Iteration2000
Iteration3000
Iteration4000
Iteration5000
Iteration6000
Iteration7000
Iteration8000
Iteration9000
Iteration10000
Iteration11000
Iteration12000
Iteration13000
Iteration14000
Iteration15000
Iteration16000
Iteration17000
Iteration18000
Iteration19000
Iteration20000
Iteration21000
Training iterations 0
Iteration0
ACTION SELECTED 16.000009536743164
Iteration1000
ACTION SELECTED 16.000019073486328
Iteration2000
ACTION SELECTED 16.000001907348633
Iteration3000
ACTION SELECTED 16.000003814697266
Iteration4000
ACTION SELECTED 16.000028610229492
Iteration5000
ACTION SELECTED 16.00001335144043
Iteration6000
ACTION SELECTED 16.007890701293945
Iteration7000
ACTION SELECTED 16.000295639038086
Iteration8000
ACTION SELECTED 16.000001907348633
Iteration9000
ACTION SELECTED 16.000043869018555
Iteration10000
ACTION SELECTED 16.0
Iteration11000
ACTION SELECTED 16.000015258789062
Iteration12000
ACTION SELECTED 16.000024795532227
Iteration13000
ACTION SELECTED 16.0
Ite

## Testing fixed policy performance

Here, we test the performance of the best agent according to the cumulative reward

In [13]:
from logger.SimpleLogger import SimpleLogger

best_agent_path = best_path_list[0]



utility_function = Performance.cumulative_reward
agent = DDPG_Agent(env=env)
num_testing = 3
num_episodes = 5
num_iterations = env.numsteps
agent_name = "DDPG_Agent"

results_dict = Performance.across_fixed_policy(
agent=agent,
agent_config_path=best_agent_path,
num_testing=num_testing,
num_episodes=num_episodes,
num_iterations=num_iterations,
utility_function=utility_function,
alpha=0.05,
window=6,
column_names=["Tset","Reward"])

logging_path = r"C:\Users\hbenoit\Desktop\DIET_Controller\logs\simple_simulation"

logger = SimpleLogger(
        logging_path=logging_path,
        agent_name="DDPG_Agent",
        num_episodes=num_episodes,
        num_iterations=num_iterations,
    )

results_dict["alpha"]= agent.env.alpha
results_dict["beta"]= agent.env.beta

logger.log_performance_pipeline(
results_dict,
fixed_policy=True)

Iteration0
ACTION SELECTED 21.0
Iteration1000
ACTION SELECTED 20.999996185302734
Iteration2000
ACTION SELECTED 21.0
Iteration3000
ACTION SELECTED 21.0
Iteration4000
ACTION SELECTED 20.999996185302734
Iteration5000
ACTION SELECTED 21.0
Iteration6000
ACTION SELECTED 20.99999237060547
Iteration7000
ACTION SELECTED 21.0
Iteration8000
ACTION SELECTED 21.0
Iteration9000
ACTION SELECTED 21.0
Iteration10000
ACTION SELECTED 21.0
Iteration11000
ACTION SELECTED 21.0
Iteration12000
ACTION SELECTED 21.0
Iteration13000
ACTION SELECTED 21.0
Iteration14000
ACTION SELECTED 20.999996185302734
Iteration15000
ACTION SELECTED 21.0
Iteration16000
ACTION SELECTED 21.0
Iteration17000
ACTION SELECTED 21.0
Iteration18000
ACTION SELECTED 21.0
Iteration19000
ACTION SELECTED 21.0
Iteration20000
ACTION SELECTED 21.0
Iteration21000
ACTION SELECTED 21.0
Iteration0
ACTION SELECTED 21.0
Iteration1000
ACTION SELECTED 20.999996185302734
Iteration2000
ACTION SELECTED 21.0
Iteration3000
ACTION SELECTED 21.0
Iteration4000
A