In [1]:
from glob import glob
from explicit_memory.utils import read_yaml
import pandas as pd
import numpy as np

results_all = []
for results_path in glob("./training_results/PPO/*/results.yaml"):
    train_path = results_path.replace("results.yaml", "train.yaml")
    train = read_yaml(train_path)
    results = read_yaml(results_path)
    results_all.append(
        {
            "gamma": train["gamma"],
            "batch_size": train["batch_size"],
            "rollout_multiples": train["rollout_multiples"],
            "epoch": train["epoch"],
            "epsilon": train["epsilon"],
            "entropy_weight": train["entropy_weight"],
            "tau": train["tau"],
            "num_episodes": train["num_episodes"],
            "test_score": results["test_score"]["mean"],
            "path": results_path.split("/")[-2],
        }
    )

df = pd.DataFrame(results_all)
df_sorted = df.sort_values(by="test_score", ascending=False)
df_sorted[:10]

Unnamed: 0,gamma,batch_size,rollout_multiples,epoch,epsilon,entropy_weight,tau,num_episodes,test_score,path
55,0.666202,16,1,64,0.177128,0.049861,0.946423,16,79.4,2024-02-26 01:02:33.051209
9,0.77889,16,2,64,0.295614,0.025511,0.933607,16,79.2,2024-02-26 09:58:44.331760
27,0.677886,64,1,16,0.179656,0.079027,0.921926,16,76.6,2024-02-26 03:21:34.735486
28,0.575126,128,8,8,0.17913,0.093056,0.90949,16,76.6,2024-02-26 07:25:40.867056
69,0.655819,64,2,64,0.151351,0.046495,0.947462,16,76.0,2024-02-26 04:11:42.968327
61,0.97817,32,1,32,0.296022,0.006953,0.96126,16,75.6,2024-02-26 07:02:49.934022
17,0.594336,128,8,8,0.246431,0.086101,0.948052,16,73.4,2024-02-26 06:40:28.695328
31,0.773587,32,8,16,0.165273,0.071453,0.929209,16,71.8,2024-02-26 05:48:11.306158
12,0.7,64,2,32,0.2,0.055,0.93,50,71.8,2024-02-26 09:20:33.862611
57,0.683572,32,4,8,0.240478,0.063361,0.930643,16,71.6,2024-02-26 03:27:19.037119


In [24]:
df_sorted[:5].select_dtypes(include=[np.number]).median()

gamma                 0.666202
batch_size           64.000000
rollout_multiples     1.000000
epoch                32.000000
epsilon               0.179130
entropy_weight        0.049861
tau                   0.946423
test_score           76.600000
dtype: float64

In [25]:
df_sorted[:5].select_dtypes(include=[np.number]).mean()

gamma                 0.710640
batch_size           60.800000
rollout_multiples     2.600000
epoch                36.800000
epsilon               0.196658
entropy_weight        0.055078
tau                   0.937312
test_score           76.840000
dtype: float64

In [None]:
from agent import DQNAgent
from tqdm.auto import tqdm
import random
from copy import deepcopy
import logging

logger = logging.getLogger()
logger.disabled = True


train_configs = []
for pretrain_semantic in [False]:
    for gamma in [0.5, 0.75]:
        for test_seed in [0, 1, 2, 3, 4]:
            for batch_size in [256, 512, 1024]:
                for ddqn in [True, False]:
                    for dueling_dqn in [True, False]:
                        params = {
                            "env_str": "room_env:RoomEnv-v1",
                            "env_config": {
                                "des_size": "l",
                                "question_prob": 1.0,
                                "allow_random_human": False,
                                "allow_random_question": False,
                                "check_resources": True,
                            },
                            "num_iterations": 128 * 20,
                            "replay_buffer_size": 128 * 20,
                            "epsilon_decay_until": 128 * 20,
                            "warm_start": 128 * 10,
                            "batch_size": batch_size,
                            "target_update_interval": 10,
                            "max_epsilon": 1.0,
                            "min_epsilon": 0.1,
                            "gamma": gamma,
                            "capacity": {
                                "episodic": 16,
                                "semantic": 16,
                                "short": 1,
                            },
                            "pretrain_semantic": pretrain_semantic,
                            "nn_params": {
                                "hidden_size": 64,
                                "num_layers": 2,
                                "embedding_dim": 64,
                                "v1_params": {
                                    "include_human": "sum",
                                    "human_embedding_on_object_location": False,
                                },
                                "v2_params": None,
                                "fuse_information": "sum",
                                "include_positional_encoding": True,
                                "max_timesteps": 128,
                                "max_strength": 128,
                            },
                            "run_test": True,
                            "num_samples_for_results": 10,
                            "plotting_interval": 10,
                            "train_seed": test_seed + 5,
                            "test_seed": test_seed,
                            "device": "cpu",
                            "ddqn": ddqn,
                            "dueling_dqn": dueling_dqn,
                            "default_root_dir": f"./training_results/",
                        }
                        train_configs.append(deepcopy(params))

random.shuffle(train_configs)
for params in tqdm(train_configs):
    agent = DQNAgent(**params)
    agent.train()

In [3]:
from glob import glob
from explicit_memory.utils import read_yaml
import pandas as pd

results_all = []
for results_path in glob("./training_results/DQN/*/results.yaml"):
    train_path = results_path.replace("results.yaml", "train.yaml")
    train = read_yaml(train_path)
    results = read_yaml(results_path)
    results_all.append(
        {
            "gamma": train["gamma"],
            "batch_size": train["batch_size"],
            "ddqn": train["ddqn"],
            "dueling_dqn": train["dueling_dqn"],
            "test_score": results["test_score"]["mean"],
            "path": results_path.split("/")[-2],
        }
    )

df = pd.DataFrame(results_all)
df_sorted = df.sort_values(by="test_score", ascending=False)
df_sorted[:10]

Unnamed: 0,gamma,batch_size,ddqn,dueling_dqn,test_score,path
108,0.9,32,False,False,84.2,2024-02-23 07:15:37.385522
201,0.5,256,False,False,83.6,2024-02-21 14:44:10.721757
65,0.5,256,False,False,80.2,2024-02-20 19:26:57.937234
154,0.5,256,True,True,77.8,2024-02-21 04:04:46.595617
188,0.5,32,False,False,76.8,2024-02-21 02:58:53.944084
364,0.9,32,True,True,76.8,2024-02-25 01:18:50.172876
10,0.9,128,True,True,76.8,2024-02-25 12:08:19.884828
35,0.75,64,False,False,76.4,2024-02-23 00:36:32.949645
229,0.9,128,True,False,75.6,2024-02-23 15:16:02.490512
238,0.75,1024,False,False,75.0,2024-02-21 16:57:25.989463


In [6]:
results_path.split("/")[-2]

'2024-02-23 00:42:55.690184'