In [1]:
from typing import Dict, List, Tuple
import torch
import numpy as np
import gym
from plotly.subplots import make_subplots
from agent.DDPG_Agent import DDPG_Agent
from environment.ContinuousEnvironment import ContinuousSimpleEnvironment

import pandas as pd

%load_ext autoreload
%autoreload 2

## Set environment 

In [2]:
env = ContinuousSimpleEnvironment(param_list=['Tair', 'RH', 'Tmrt', 'Tout', 'Qheat', 'Occ'],
alpha=1,
beta=1,
min_temp=16,
max_temp=21,
modelname='CELLS_v1.fmu',
simulation_path=r'C:\Users\hbenoit\Desktop\DIET_Controller\EnergyPlus_simulations\simple_simulation',
days=151,
hours=24,
minutes=60,
seconds=60,
ep_timestep=6)


## Hyperparameter tuning and training

In [None]:
from Performance import all_combinations_list
from Performance import search_similar

agent_arguments = {
"discount": [0.99],
"memory_size": [1000],
"batch_size": [128],
"tau": [0.05],
"policy_noise": [0.2],
"noise_clip":[0.5],
"lr":[1e-3],
"inside_dim":[32,64,128],
"num_hidden_layers":[1,2,3,4],
"num_random_episodes":[1],
"num_training_iterations":[100],
"seed": [779]
}

logging_path = r"C:\Users\hbenoit\Desktop\DIET_Controller\logs\simple_simulation"

for curr_agent_arguments in all_combinations_list(agent_arguments):

    searching_directory = r"C:\Users\hbenoit\Desktop\DIET_Controller\logs\simple_simulation\DDPG_Agent\results"
    agent = DDPG_Agent(env).from_dict(dict_arguments=curr_agent_arguments)
    log_dict = {**agent.log_dict(), **env.log_dict()}
    num_episodes = 5
    log_dict["num_episodes"] = num_episodes

    ## so that we don't train a configuration that has already been trained
    if(not(search_similar(searching_directory, log_dict))):
        print("WE TRAIN")
        results_path, summary_df = agent.train(logging_path= logging_path, num_episodes=num_episodes,log=True)
  

# Finding the best performing agent in the logs

In [9]:
# Example of how to use the function: 
import Performance

searching_directory = r"C:\Users\hbenoit\Desktop\DIET_Controller\logs\simple_simulation\DDPG_Agent"

conditions = {"seed":[">",777],    "alpha":["=",1],
    "beta":["=",1], "num_episodes":["=",5]}

best_path_list = Performance.search_paths(searching_directory,conditions=conditions, top_k=5, utility_function=Performance.negative_cumulative_heating,normalized=True)

best_path_list

['C:\\Users\\hbenoit\\Desktop\\DIET_Controller\\logs\\simple_simulation\\DDPG_Agent\\results\\2022_6_17\\results_2022_6_17_10_29',
 'C:\\Users\\hbenoit\\Desktop\\DIET_Controller\\logs\\simple_simulation\\DDPG_Agent\\results\\2022_6_17\\results_2022_6_17_11_49',
 'C:\\Users\\hbenoit\\Desktop\\DIET_Controller\\logs\\simple_simulation\\DDPG_Agent\\results\\2022_6_17\\results_2022_6_17_11_55',
 'C:\\Users\\hbenoit\\Desktop\\DIET_Controller\\logs\\simple_simulation\\DDPG_Agent\\results\\2022_6_17\\results_2022_6_17_11_57',
 'C:\\Users\\hbenoit\\Desktop\\DIET_Controller\\logs\\simple_simulation\\DDPG_Agent\\results\\2022_6_17\\results_2022_6_17_11_36']

# Testing in-training performance pipeline

In [3]:
import Performance
from logger.SimpleLogger import SimpleLogger

# best_agent_path = best_path_list[0]

best_agent_path = 'C:/Users/hbenoit/Desktop/DIET_Controller/logs/simple_simulation/DDPG_Agent/results/2022_6_17/results_2022_6_17_11_52' # most sensible policy


parameter = ("seed", [775,776,777,778])

logging_path = r"C:\Users\hbenoit\Desktop\DIET_Controller\logs\simple_simulation"

utility_function = Performance.cumulative_reward
agent = DDPG_Agent(env=env)
num_episodes = 5
num_iterations = env.numsteps
agent_name = "DDPG_Agent"

results_dict = Performance.across_runs(agent=agent,agent_config_path=best_agent_path, parameter=parameter, num_episodes=num_episodes, num_iterations=num_iterations, utility_function=utility_function,alpha=0.05,
column_names=["Tset", "Reward"])

logger = SimpleLogger(
        logging_path=logging_path,
        agent_name="DDPG_Agent",
        num_episodes=num_episodes,
        num_iterations=num_iterations,
    )

logger.log_performance_pipeline(results_dict,fixed_policy=False)

Iteration0
Iteration1000
Iteration2000
Iteration3000
Iteration4000
Iteration5000
Iteration6000
Iteration7000
Iteration8000
Iteration9000
Iteration10000
Iteration11000
Iteration12000
Iteration13000
Iteration14000
Iteration15000
Iteration16000
Iteration17000
Iteration18000
Iteration19000
Iteration20000
Iteration21000
Training iterations 0
Iteration0
ACTION SELECTED 16.000038146972656
Iteration1000
ACTION SELECTED 16.000030517578125
Iteration2000
ACTION SELECTED 16.000001907348633
Iteration3000
ACTION SELECTED 16.0000057220459
Iteration4000
ACTION SELECTED 16.000024795532227
Iteration5000
ACTION SELECTED 16.000003814697266
Iteration6000
ACTION SELECTED 16.000030517578125
Iteration7000
ACTION SELECTED 16.00000762939453
Iteration8000
ACTION SELECTED 16.000001907348633
Iteration9000
ACTION SELECTED 16.0000057220459
Iteration10000
ACTION SELECTED 16.0
Iteration11000
ACTION SELECTED 16.000001907348633
Iteration12000
ACTION SELECTED 16.00000762939453
Iteration13000
ACTION SELECTED 16.0
Iteratio

# Testing fixed policy performance

In [None]:
import Performance
from logger.SimpleLogger import SimpleLogger

best_agent_path = best_path_list[0]

agent= Performance.load_trained_agent(DQNAgent(env), results_path=best_agent_path)

utility_function = Performance.cumulative_reward
agent = DQNAgent(env=env)
num_testing = 3
num_episodes = 2
num_iterations = env.numsteps
agent_name = "DQN_Agent"

results_dict = Performance.across_fixed_policy(agent=agent, agent_config_path=best_agent_path, num_testing=num_testing, num_episodes=num_episodes, num_iterations=num_iterations, utility_function=utility_function,alpha=0.05,
column_names=["Tset"])

logging_path = r"C:\Users\hbenoit\Desktop\DIET_Controller\logs\simple_simulation"

logger = SimpleLogger(
        logging_path=logging_path,
        agent_name="DQN_Agent",
        num_episodes=num_episodes,
        num_iterations=num_iterations,
    )

logger.log_performance_pipeline(results_dict, fixed_policy=True)