In [3]:
from glob import glob
from humemai.utils import read_yaml
import pandas as pd
import numpy as np

results_all = []
for results_path in glob("./training-results/PPO/*/results.yaml"):
    train_path = results_path.replace("results.yaml", "train.yaml")
    train = read_yaml(train_path)
    results = read_yaml(results_path)
    results_all.append(
        {
            "num_episodes": train["num_episodes"],
            "num_rollouts": train["num_rollouts"],
            "epoch_per_rollout": train["epoch_per_rollout"],
            "gamma": train["gamma"],
            "batch_size": train["batch_size"],
            "epsilon": train["epsilon"],
            "entropy_weight": train["entropy_weight"],
            "tau": train["tau"],
            "test_score": results["test_score"]["mean"],
            "val_score": max([foo["mean"] for foo in results["validation_score"]]),
            "path": results_path.split("/")[-2],
        }
    )

df = pd.DataFrame(results_all)
df_sorted = df.sort_values(by="test_score", ascending=False)
print(f"number of training results: {len(df_sorted)}")

top_k = 5
df_sorted[:top_k]

number of training results: 379


Unnamed: 0,num_episodes,num_rollouts,epoch_per_rollout,gamma,batch_size,epsilon,entropy_weight,tau,test_score,val_score,path
6,8,16,32,0.63324,64,0.263211,0.029818,0.970911,84.2,88.4,2024-03-01 22:35:16.501752
136,8,32,64,0.721412,32,0.212036,0.067332,0.9094,74.6,76.8,2024-03-03 03:57:33.979330
152,8,32,16,0.685885,32,0.273645,0.091987,0.939045,72.6,78.2,2024-03-01 17:23:51.061081
320,16,16,32,0.974509,64,0.2,0.001736,0.93,71.2,75.8,2024-03-03 14:44:32.408514
182,8,32,64,0.791657,32,0.197891,0.088347,0.919734,71.0,79.4,2024-03-01 22:03:45.485153


In [2]:
df_sorted[:top_k].select_dtypes(include=[np.number]).median()

num_episodes          8.000000
num_rollouts         16.000000
epoch_per_rollout    32.000000
gamma                 0.687965
batch_size           32.000000
epsilon               0.251928
entropy_weight        0.057523
tau                   0.921153
test_score           70.200000
dtype: float64

In [3]:
df_sorted[:top_k].select_dtypes(include=[np.number]).mean()

num_episodes          8.000000
num_rollouts         19.200000
epoch_per_rollout    40.000000
gamma                 0.694632
batch_size           48.000000
epsilon               0.243799
entropy_weight        0.055020
tau                   0.932301
test_score           71.520000
dtype: float64

In [None]:
from agent import DQNAgent
from tqdm.auto import tqdm
import random
from copy import deepcopy
import logging

logger = logging.getLogger()
logger.disabled = True


train_configs = []
for pretrain_semantic in [False]:
    for gamma in [0.5, 0.75]:
        for test_seed in [0, 1, 2, 3, 4]:
            for batch_size in [256, 512, 1024]:
                for ddqn in [True, False]:
                    for dueling_dqn in [True, False]:
                        params = {
                            "env_str": "room_env:RoomEnv-v1",
                            "env_config": {
                                "des_size": "l",
                                "question_prob": 1.0,
                                "allow_random_human": False,
                                "allow_random_question": False,
                                "check_resources": True,
                            },
                            "num_iterations": 128 * 20,
                            "replay_buffer_size": 128 * 20,
                            "epsilon_decay_until": 128 * 20,
                            "warm_start": 128 * 10,
                            "batch_size": batch_size,
                            "target_update_interval": 10,
                            "max_epsilon": 1.0,
                            "min_epsilon": 0.1,
                            "gamma": gamma,
                            "capacity": {
                                "episodic": 16,
                                "semantic": 16,
                                "short": 1,
                            },
                            "pretrain_semantic": pretrain_semantic,
                            "nn_params": {
                                "hidden_size": 64,
                                "num_layers": 2,
                                "embedding_dim": 64,
                                "v1_params": {
                                    "include_human": "sum",
                                    "human_embedding_on_object_location": False,
                                },
                                "v2_params": None,
                                "fuse_information": "sum",
                                "include_positional_encoding": True,
                                "max_timesteps": 128,
                                "max_strength": 128,
                            },
                            "run_test": True,
                            "num_samples_for_results": 10,
                            "plotting_interval": 10,
                            "train_seed": test_seed + 5,
                            "test_seed": test_seed,
                            "device": "cpu",
                            "ddqn": ddqn,
                            "dueling_dqn": dueling_dqn,
                            "default_root_dir": f"./training-results/",
                        }
                        train_configs.append(deepcopy(params))

random.shuffle(train_configs)
for params in tqdm(train_configs):
    agent = DQNAgent(**params)
    agent.train()

In [20]:
from glob import glob
from humemai.utils import read_yaml
import pandas as pd

results_all = []
for results_path in glob("./training-results/DQN/*/results.yaml"):
    train_path = results_path.replace("results.yaml", "train.yaml")
    train = read_yaml(train_path)
    results = read_yaml(results_path)
    results_all.append(
        {
            "gamma": train["gamma"],
            "batch_size": train["batch_size"],
            "ddqn": train["ddqn"],
            "dueling_dqn": train["dueling_dqn"],
            "test_score": results["test_score"]["mean"],
            "path": results_path.split("/")[-2],
        }
    )

df = pd.DataFrame(results_all)
df_sorted = df.sort_values(by="test_score", ascending=False)

print(len(df_sorted))

top_k = 10
df_sorted[:top_k]

388


Unnamed: 0,gamma,batch_size,ddqn,dueling_dqn,test_score,path
108,0.9,32,False,False,84.2,2024-02-23 07:15:37.385522
201,0.5,256,False,False,83.6,2024-02-21 14:44:10.721757
65,0.5,256,False,False,80.2,2024-02-20 19:26:57.937234
154,0.5,256,True,True,77.8,2024-02-21 04:04:46.595617
188,0.5,32,False,False,76.8,2024-02-21 02:58:53.944084
364,0.9,32,True,True,76.8,2024-02-25 01:18:50.172876
10,0.9,128,True,True,76.8,2024-02-25 12:08:19.884828
35,0.75,64,False,False,76.4,2024-02-23 00:36:32.949645
229,0.9,128,True,False,75.6,2024-02-23 15:16:02.490512
238,0.75,1024,False,False,75.0,2024-02-21 16:57:25.989463
