In [2]:
from glob import glob
from explicit_memory.utils import read_yaml
import pandas as pd
import numpy as np

results_all = []
for results_path in glob("./training_results/DQN/mm/LSTM/s/*/results.yaml"):
    train_path = results_path.replace("results.yaml", "train.yaml")
    train = read_yaml(train_path)
    results = read_yaml(results_path)
    results_all.append(
        {
            "num_iterations": train["num_iterations"],
            "replay_buffer_size": train["replay_buffer_size"],
            "warm_start": train["warm_start"],
            "gamma": train["gamma"],
            "batch_size": train["batch_size"],
            "target_update_interval": train["target_update_interval"],
            "epsilon_decay_until": train["epsilon_decay_until"],
            "ddqn": train["ddqn"],
            "dueling_dqn": train["dueling_dqn"],
            "split_reward_training": train["split_reward_training"],
            "test_score": results["test_score"]["mean"],
            "val_score": max([foo["mean"] for foo in results["validation_score"]]),
            "path": results_path.split("/")[-2],
        }
    )

df = pd.DataFrame(results_all)
df_sorted = df.sort_values(by="test_score", ascending=False)
print(f"number of training results: {len(df_sorted)}")

top_k = 10
df_sorted[:top_k]

number of training results: 295


Unnamed: 0,num_iterations,replay_buffer_size,warm_start,gamma,batch_size,target_update_interval,epsilon_decay_until,ddqn,dueling_dqn,split_reward_training,test_score,val_score,path
202,100,100,50,0.616647,16,20,100,True,False,False,738.7,501.2,2024-03-04 12:10:19.363577
178,100,100,50,0.504607,8,20,100,False,False,True,731.0,632.6,2024-03-04 11:09:47.335313
175,100,100,100,0.899188,16,50,100,True,True,False,714.0,572.8,2024-03-04 12:25:19.028678
192,100,50,10,0.827169,8,50,100,False,False,False,704.2,522.3,2024-03-04 12:29:29.609160
246,100,10,10,0.688927,8,10,50,False,False,False,698.6,445.9,2024-03-04 10:14:58.037869
6,100,100,50,0.833667,16,20,10,False,True,False,683.0,481.1,2024-03-04 12:16:33.008181
128,100,50,10,0.574458,8,20,10,False,False,False,679.7,601.6,2024-03-04 10:35:18.670043
157,100,100,50,0.524456,4,5,100,False,False,True,679.7,576.4,2024-03-04 10:53:58.339860
116,100,100,10,0.543233,8,50,50,False,True,True,679.7,550.9,2024-03-04 10:48:07.267002
286,100,50,10,0.609316,8,20,10,False,False,False,679.4,636.6,2024-03-04 12:22:06.630158


In [4]:
df_sorted[:top_k].select_dtypes(include=[np.number]).median()

num_iterations            100.000000
replay_buffer_size        100.000000
warm_start                 30.000000
gamma                       0.612982
batch_size                  8.000000
target_update_interval     20.000000
epsilon_decay_until        75.000000
test_score                690.800000
val_score                 561.850000
dtype: float64

In [5]:
df_sorted[:top_k].select_dtypes(include=[np.number]).mean()

num_iterations            100.000000
replay_buffer_size         76.000000
warm_start                 35.000000
gamma                       0.662167
batch_size                 10.000000
target_update_interval     26.500000
epsilon_decay_until        63.000000
test_score                698.800000
val_score                 552.140000
dtype: float64

In [14]:
top_k = 10
df_sorted[-top_k:]

Unnamed: 0,num_iterations,replay_buffer_size,warm_start,gamma,batch_size,target_update_interval,epsilon_decay_until,ddqn,dueling_dqn,split_reward_training,test_score,val_score,path
165,100,100,100,0.602287,32,50,50,True,False,True,190.2,456.2,2024-03-04 10:09:34.249153
161,100,50,10,0.696905,8,5,10,False,True,False,190.2,493.0,2024-03-04 11:00:40.787688
160,100,50,50,0.934442,16,5,100,True,True,True,190.2,503.9,2024-03-04 11:39:21.825215
106,100,50,50,0.948023,32,50,10,False,False,False,190.2,436.2,2024-03-04 10:46:36.525355
47,100,50,50,0.792558,32,50,10,True,True,True,189.6,433.2,2024-03-04 11:23:00.961343
230,100,100,50,0.558575,16,10,50,True,False,False,189.6,443.7,2024-03-04 11:02:17.158267
103,100,50,50,0.580734,8,20,100,True,False,True,189.6,550.3,2024-03-04 10:50:15.071202
216,100,100,100,0.930686,64,50,10,False,True,False,184.1,317.2,2024-03-04 12:26:41.378510
117,100,100,10,0.606811,8,20,10,True,True,True,184.1,339.0,2024-03-04 11:45:01.262337
8,100,50,50,0.944341,4,5,10,False,True,False,184.0,341.9,2024-03-04 10:41:15.543503


In [15]:
df_sorted[-top_k:].select_dtypes(include=[np.number]).median()

num_iterations            100.000000
replay_buffer_size         50.000000
warm_start                 50.000000
gamma                       0.744732
batch_size                 16.000000
target_update_interval     20.000000
epsilon_decay_until        10.000000
test_score                189.600000
val_score                 439.950000
dtype: float64

In [16]:
df_sorted[-top_k:].select_dtypes(include=[np.number]).mean()

num_iterations            100.000000
replay_buffer_size         70.000000
warm_start                 52.000000
gamma                       0.759536
batch_size                 22.000000
target_update_interval     26.500000
epsilon_decay_until        36.000000
test_score                188.180000
val_score                 431.460000
dtype: float64