## Explore

In [23]:
from glob import glob
from explicit_memory.utils import read_yaml
import pandas as pd
import numpy as np

results_all = []
for results_path in glob("./training_results/DQN/explore/LSTM/s/*/results.yaml"):
    train_path = results_path.replace("results.yaml", "train.yaml")
    train = read_yaml(train_path)
    results = read_yaml(results_path)
    results_all.append(
        {
            "#_iterations": train["num_iterations"],
            "replay_size": train["replay_buffer_size"],
            "warm": train["warm_start"],
            "gamma": train["gamma"],
            "bs": train["batch_size"],
            "target_update": train["target_update_interval"],
            "epsilon_decay": train["epsilon_decay_until"],
            # "ddqn": train["ddqn"],
            # "dueling": train["dueling_dqn"],
            "test": results["test_score"]["mean"],
            "val": max([foo["mean"] for foo in results["validation_score"]]),
            "path": results_path.split("/")[-2],
        }
    )

df = pd.DataFrame(results_all)
df_sorted = df.sort_values(by="test", ascending=False)
print(f"number of training results: {len(df_sorted)}")

top_k = 10
df_sorted[:top_k]

number of training results: 71


Unnamed: 0,#_iterations,replay_size,warm,gamma,bs,target_update,epsilon_decay,test,val,path
42,100,100,50,0.953065,32,40,100,768.8,695.1,2024-03-09 12:36:28.293188
70,100,50,50,0.962835,32,20,50,763.5,817.1,2024-03-09 12:31:52.548481
1,100,50,50,0.507285,8,10,100,763.5,817.1,2024-03-09 12:30:01.434379
0,100,100,10,0.710372,8,20,50,763.5,816.3,2024-03-09 12:46:34.509115
68,100,50,50,0.837729,8,40,50,763.5,817.5,2024-03-09 12:34:14.135312
58,100,100,50,0.685497,32,10,50,763.5,817.5,2024-03-09 12:44:43.503821
59,100,50,50,0.806979,8,10,50,763.5,823.9,2024-03-09 12:36:11.093804
60,100,50,50,0.685051,8,40,50,763.5,817.5,2024-03-09 12:36:51.012851
14,100,50,50,0.604145,32,20,50,763.5,823.9,2024-03-09 12:35:49.329689
18,100,100,10,0.856822,8,20,100,763.5,746.1,2024-03-09 12:33:38.513756


In [24]:
df_sorted[:top_k].select_dtypes(include=[np.number]).mean()

#_iterations     100.000000
replay_size       70.000000
warm              42.000000
gamma              0.760978
bs                17.600000
target_update     23.000000
epsilon_decay     65.000000
test             764.030000
val              799.200000
dtype: float64

## MM

In [5]:
from glob import glob
from explicit_memory.utils import read_yaml
import pandas as pd
import numpy as np

results_all = []
for results_path in glob("./training_results/DQN/mm/LSTM/s/*/results.yaml"):
    train_path = results_path.replace("results.yaml", "train.yaml")
    train = read_yaml(train_path)
    results = read_yaml(results_path)
    results_all.append(
        {
            "#_iterations": train["num_iterations"],
            "replay_size": train["replay_buffer_size"],
            "warm": train["warm_start"],
            "gamma": train["gamma"],
            "bs": train["batch_size"],
            "target_update": train["target_update_interval"],
            "epsilon_decay": train["epsilon_decay_until"],
            "ddqn": train["ddqn"],
            "dueling": train["dueling_dqn"],
            "split_reward": train["split_reward_training"],
            "test": results["test_score"]["mean"],
            "val": max([foo["mean"] for foo in results["validation_score"]]),
            "path": results_path.split("/")[-2],
        }
    )

df = pd.DataFrame(results_all)
df_sorted = df.sort_values(by="test", ascending=False)
print(f"number of training results: {len(df_sorted)}")

top_k = 10
df_sorted[:top_k]

number of training results: 431


Unnamed: 0,#_iterations,replay_size,warm,gamma,bs,target_update,epsilon_decay,ddqn,dueling,split_reward,test,val,path
230,100,100,50,0.945229,32,5,50,False,False,True,796.2,634.5,2024-03-09 11:11:20.006225
267,100,10,10,0.523097,8,50,10,False,False,True,795.8,622.3,2024-03-09 11:35:26.276438
101,100,50,50,0.625542,32,50,10,False,True,False,777.3,739.6,2024-03-09 09:34:32.354639
198,100,100,10,0.752595,4,50,100,False,False,False,755.7,703.7,2024-03-09 11:41:22.018899
222,100,100,10,0.549624,4,20,10,False,False,False,755.3,719.9,2024-03-09 10:18:32.096328
362,100,100,50,0.76776,8,20,50,True,False,True,749.5,799.4,2024-03-09 11:30:47.017775
370,100,100,10,0.671053,8,10,50,True,False,False,746.1,549.0,2024-03-09 11:35:09.756538
310,100,50,50,0.692025,16,10,100,False,True,False,745.4,549.9,2024-03-09 11:30:31.397133
396,100,50,50,0.578963,4,50,10,False,False,False,743.6,649.3,2024-03-09 11:21:27.623702
145,100,100,50,0.704801,4,20,50,True,False,False,741.0,629.4,2024-03-09 10:27:41.314769


In [43]:
df_sorted[:top_k].select_dtypes(include=[np.number, bool]).mean()

#_iterations     100.000000
replay_size       76.000000
warm              34.000000
gamma              0.681069
bs                12.000000
target_update     28.500000
epsilon_decay     44.000000
ddqn               0.300000
dueling            0.200000
split_reward       0.300000
test             760.590000
val              659.700000
dtype: float64