## Explore

In [26]:
from glob import glob
from explicit_memory.utils import read_yaml
import pandas as pd
import numpy as np

results_all = []
for results_path in glob("./training_results/PPO/explore/LSTM/s/*/results.yaml"):
    train_path = results_path.replace("results.yaml", "train.yaml")
    train = read_yaml(train_path)
    results = read_yaml(results_path)
    results_all.append(
        {
            "#_episodes": train["num_episodes"],
            "#_rollouts": train["num_rollouts"],
            "epoch": train["epoch_per_rollout"],
            "gamma": train["gamma"],
            "bs": train["batch_size"],
            # "epsilon": train["epsilon"],
            "entropy": train["entropy_weight"],
            "tau": train["tau"],
            "test": results["test_score"]["mean"],
            "val": max([foo["mean"] for foo in results["validation_score"]]),
            "path": results_path.split("/")[-2],
        }
    )

df = pd.DataFrame(results_all)
df_sorted = df.sort_values(by="test", ascending=False)
print(f"number of training results: {len(df_sorted)}")

top_k = 10
df_sorted[:top_k]

number of training results: 39


Unnamed: 0,#_episodes,#_rollouts,epoch,gamma,bs,entropy,tau,test,val,path
1,10,5,16,0.64805,4,0.071747,0.984606,763.8,821.9,2024-03-09 12:44:11.519694
20,10,10,32,0.51194,8,0.081285,0.666714,763.8,756.0,2024-03-09 12:46:08.474561
11,10,5,16,0.519763,4,0.056135,0.616993,763.8,821.9,2024-03-09 12:44:32.072675
33,10,10,16,0.783936,4,0.083958,0.741123,763.8,756.0,2024-03-09 12:44:52.226486
32,10,20,64,0.886834,4,0.068915,0.726059,763.8,842.3,2024-03-09 12:42:27.609265
6,10,5,16,0.980565,8,0.053708,0.901929,733.9,775.6,2024-03-09 12:53:16.095764
9,10,5,16,0.732786,8,0.082337,0.598391,733.9,669.9,2024-03-09 12:52:01.462254
28,10,10,64,0.655638,2,0.005215,0.519201,650.8,765.5,2024-03-09 12:38:17.138874
30,10,10,32,0.798617,8,0.095404,0.638633,650.8,727.3,2024-03-09 12:40:23.661909
15,10,10,16,0.744433,2,0.020012,0.725171,650.8,682.4,2024-03-09 12:39:05.502684


In [27]:
df_sorted[:top_k].select_dtypes(include=[np.number]).mean()

#_episodes     10.000000
#_rollouts      9.000000
epoch          28.800000
gamma           0.726256
bs              5.200000
entropy         0.061872
tau             0.711882
test          723.920000
val           761.880000
dtype: float64

## MM

In [80]:
from glob import glob
from explicit_memory.utils import read_yaml
import pandas as pd
import numpy as np

results_all = []
for results_path in glob("./training_results/PPO/mm/LSTM/l/toy/*/results.yaml"):
    train_path = results_path.replace("results.yaml", "train.yaml")
    train = read_yaml(train_path)
    results = read_yaml(results_path)
    results_all.append(
        {
            "#_episodes": train["num_episodes"],
            "#_rollouts": train["num_rollouts"],
            "epoch": train["epoch_per_rollout"],
            "bs": train["batch_size"],
            # "epsilon": train["epsilon"],
            "entropy": train["entropy_weight"],
            "gamma": train["gamma"],
            "tau": train["tau"],
            "split_reward": train["split_reward_training"],
            "test": results["test_score"]["mean"],
            "val": max([foo["mean"] for foo in results["validation_score"]]),
            "path": results_path.split("/")[-2],
        }
    )

df = pd.DataFrame(results_all)
df_sorted = df.sort_values(by="test", ascending=False)
print(f"number of training results: {len(df_sorted)}")

top_k = 10
df_sorted[:top_k]

number of training results: 53


Unnamed: 0,#_episodes,#_rollouts,epoch,bs,entropy,gamma,tau,split_reward,test,val,path
41,10,40,16,32,0.005161,0.607764,0.666438,True,192.0,199.8,2024-03-09 15:35:16.966959
31,10,20,16,32,0.030761,0.946429,0.860016,False,190.0,176.2,2024-03-09 16:38:48.716825
40,10,40,32,16,0.039121,0.711576,0.850811,True,188.2,156.1,2024-03-09 17:30:15.771379
16,10,20,16,8,0.097283,0.604613,0.88478,False,176.1,152.9,2024-03-09 15:25:54.244140
17,10,40,16,8,0.042864,0.591149,0.536925,True,173.6,178.5,2024-03-09 15:13:17.668374
47,10,5,16,32,0.025269,0.687513,0.707579,False,165.4,174.8,2024-03-09 16:06:44.489367
36,10,5,32,8,0.088654,0.893089,0.535304,False,160.4,143.3,2024-03-09 16:28:05.577303
5,10,10,16,64,0.091371,0.879833,0.754847,False,149.2,154.9,2024-03-09 17:43:55.607907
22,10,20,16,32,0.075305,0.695818,0.964776,True,144.9,163.2,2024-03-09 17:11:10.885717
8,10,20,16,8,0.054678,0.580908,0.612817,False,139.3,156.7,2024-03-09 16:22:00.251726


In [84]:
filtered_df = df_sorted[df_sorted['split_reward'] == False]

In [85]:
filtered_df[:5].select_dtypes(include=[np.number, bool]).mean()

#_episodes       10.000000
#_rollouts       12.000000
epoch            19.200000
bs               28.800000
entropy           0.066668
gamma             0.802295
tau               0.748505
split_reward      0.000000
test            168.220000
val             160.420000
dtype: float64

In [83]:
df_sorted[:top_k].select_dtypes(include=[np.number, bool]).mean()

#_episodes       10.000000
#_rollouts       22.000000
epoch            19.200000
bs               24.000000
entropy           0.055047
gamma             0.719869
tau               0.737429
split_reward      0.400000
test            167.910000
val             165.640000
dtype: float64

In [50]:
df_sorted[-top_k:].select_dtypes(include=[np.number, bool]).mean()

#_episodes      10.000000
#_rollouts      19.000000
epoch           27.200000
bs              30.400000
entropy          0.044273
gamma            0.704869
tau              0.756687
split_reward     0.700000
test            65.050000
val             87.010000
dtype: float64