In [19]:
import pandas as pd
import tensorboard as tb
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import os


In [20]:
from utils import *
from modified_env_utils import *

In [21]:
LOGS_DIR = "logs"

# ['rollout/ep_len_mean', 'rollout/ep_rew_mean', 'time/fps', 'train/entropy_loss', 'train/explained_variance', 'train/learning_rate', 'train/policy_loss', 'train/std', 'train/value_loss']
REWARD_KEY = "rollout/ep_rew_mean"
LEN_KEY = "rollout/ep_len_mean"
FPS_KEY = "time/fps"
ENTROPY_KEY = "train/entropy_loss"
EXPLAINED_VARIANCE_KEY = "train/explained_variance"
LEARNING_RATE_KEY = "train/learning_rate"
POLICY_LOSS_KEY = "train/policy_loss"
STD_KEY = "train/std"
VALUE_LOSS_KEY = "train/value_loss"


In [22]:
logs_folders = os.listdir(LOGS_DIR)
print(len(logs_folders))

49


In [23]:
def get_event_accumulator(folder):
    return EventAccumulator(f"{LOGS_DIR}/{folder}")


In [24]:
event_accumulators = {}
for folder in logs_folders:
    event_accumulator = get_event_accumulator(folder)
    event_accumulator.Reload()
    event_accumulators[folder] = event_accumulator

In [25]:
accumulator = next(iter(event_accumulators.values()))
print(accumulator.Tags())
scalars = accumulator.scalars.Keys()
print(scalars)

{'images': [], 'audio': [], 'histograms': [], 'scalars': ['rollout/ep_len_mean', 'rollout/ep_rew_mean', 'time/fps', 'train/entropy_loss', 'train/explained_variance', 'train/learning_rate', 'train/policy_loss', 'train/std', 'train/value_loss'], 'distributions': [], 'tensors': [], 'graph': False, 'meta_graph': False, 'run_metadata': []}
['rollout/ep_len_mean', 'rollout/ep_rew_mean', 'time/fps', 'train/entropy_loss', 'train/explained_variance', 'train/learning_rate', 'train/policy_loss', 'train/std', 'train/value_loss']


In [26]:
accumulators_by_env = {}
accumulators_by_model = {}

for folder, event_accumulator in event_accumulators.items():
    folder = "_".join(folder.split("_")[:-1])
    env_name = get_env(folder)
    model = get_model_type_from_filename(folder)

    # add to accumulators by env
    if env_name not in accumulators_by_env:
        accumulators_by_env[env_name] = {}
    accumulators_by_env[env_name][folder] = event_accumulator

    # add to accumulators by model
    if model not in accumulators_by_model:
        accumulators_by_model[model] = {}
    accumulators_by_model[model][folder] = event_accumulator

In [39]:
columns = ["folder", "model", "env", "length"]
used_scalars = [REWARD_KEY, LEN_KEY]
used_scalars_names = ["reward", "len"]
scalar_columns = ["max", "min", "last"]
columns.extend([f"{scalar}_{column}" for scalar in used_scalars_names for column in scalar_columns])
results_rows = []
for env_name, accumulators in accumulators_by_env.items():
    print(f"{env_name}: {len(accumulators)}")
    for folder, event_accumulator in accumulators.items():
        model = get_model_type_from_filename(folder)
        env = get_env(folder)

        scalars = event_accumulator.scalars.Keys()
        accumulator_row = [folder, model, env]
        got_length = False
        for scalar, scalar_name in zip(used_scalars, used_scalars_names):
            if scalar not in scalars:
                # print(scalars)
                # print(f"{folder} does not have {scalar}")
                max = None
                min = None
                last = None
            else:
                values = event_accumulator.Scalars(scalar)
                values_columns_names = list(values[0].__dict__.keys())
                values_columns = [event.__dict__.values() for event in values]
                df = pd.DataFrame(values_columns, columns=values_columns_names)

                max = df["value"].max()
                min = df["value"].min()
                df.sort_values(by="step", ascending=True, inplace=True)
                if not got_length:
                    accumulator_row.insert(columns.index("length"), df["step"].iloc[-1])
                    got_length = True
                last = df["value"].iloc[-1]
            accumulator_row.extend([max, min, last])
        results_rows.append(accumulator_row)

    results_df = pd.DataFrame(results_rows, columns=columns)
    # results_df.sort_values(by=f"reward_max", ascending=False, inplace=True)


LunarLander-v2: 13
LunarLander-v2---max-thrust--0-75: 4
LunarLander-v2---wind-power--5-0: 4
LunarLander-v2---wind-power--10-0: 3
LunarLander-v2---wind-power--15-0: 3
LunarLander-v2---failure-rate--0-25: 3
LunarLander-v2---failure-rate--0-5: 3
LunarLander-v2---byzantine-rate--0-25: 3
LunarLander-v2---byzantine-rate--0-5: 4
LunarLander-v2---failure-rate--0-05: 5
LunarLander-v2---failure-rate--0-1: 4


In [40]:
results_df_by_model = results_df.groupby("model")
for model, df in results_df_by_model:
    print(model)
    df.sort_values(by="env", inplace=True)
    display(df.loc[:, ~df.columns.isin(["folder", "model"])])


A2C


Unnamed: 0,env,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
0,LunarLander-v2,170000,-236.452652,-306.650452,-306.650452,570.5,233.830002,570.5
1,LunarLander-v2,150000,-400.023376,-754.365417,-582.866028,1449.329956,97.800003,1449.329956
5,LunarLander-v2,150000,-202.001465,-410.694122,-293.969849,612.530029,109.5,450.079987
6,LunarLander-v2,150000,-253.250946,-636.84967,-253.250946,622.51001,106.864868,518.02002
33,LunarLander-v2---byzantine-rate--0-25,150000,-292.394989,-484.809967,-351.236298,1125.810059,98.5,1125.810059
36,LunarLander-v2---byzantine-rate--0-5,77000,-234.007477,-310.266235,-305.794556,159.72728,114.75,159.72728
37,LunarLander-v2---byzantine-rate--0-5,150000,-227.212433,-436.405945,-283.610931,255.330002,93.800003,255.330002
40,LunarLander-v2---failure-rate--0-05,150000,-173.486984,-397.2612,-378.094635,1158.97998,124.333336,1158.97998
45,LunarLander-v2---failure-rate--0-1,150000,-228.266678,-425.79718,-422.983215,2429.694336,113.0,1922.657837
27,LunarLander-v2---failure-rate--0-25,150000,-275.416351,-626.930847,-276.24292,903.77002,89.080002,903.77002


DDPG


Unnamed: 0,env,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
2,LunarLander-v2,155104,-139.497116,-480.652466,-141.257126,623.659973,82.5,398.799988
7,LunarLander-v2,155329,-16.730101,-833.337769,-21.79413,608.609985,88.0,477.0
8,LunarLander-v2,153566,-88.562225,-853.943726,-88.562225,652.830017,78.0,647.059998
41,LunarLander-v2---failure-rate--0-05,1792,-272.985596,-450.430786,-371.169525,115.833336,75.25,112.0
42,LunarLander-v2---failure-rate--0-05,157548,-173.591629,-951.069641,-220.480194,758.400024,102.75,370.459991
15,LunarLander-v2---max-thrust--0-75,79358,-151.214157,-344.6073,-172.113037,342.880005,79.625,338.839996


PPO


Unnamed: 0,env,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
4,LunarLander-v2,153600,44.948971,-220.45047,44.948971,677.580017,107.052635,464.160004
9,LunarLander-v2,153600,98.261688,-250.998184,98.261688,881.880005,105.526314,802.080017
10,LunarLander-v2,153600,72.078133,-240.32402,72.078133,884.369995,105.111115,884.369995
34,LunarLander-v2---byzantine-rate--0-25,153600,-51.564663,-248.864975,-74.847038,499.429993,99.5,499.429993
38,LunarLander-v2---byzantine-rate--0-5,153600,-119.62159,-244.223297,-119.62159,183.449997,108.555557,183.449997
43,LunarLander-v2---failure-rate--0-05,153600,-63.5303,-262.285828,-136.515945,612.950012,108.244682,327.890015
46,LunarLander-v2---failure-rate--0-1,153600,-0.940206,-281.396973,-0.940206,659.73999,103.92308,502.200012
29,LunarLander-v2---failure-rate--0-25,153600,-33.346172,-244.939301,-33.346172,758.98999,99.016129,438.200012
32,LunarLander-v2---failure-rate--0-5,153600,-31.614159,-171.878326,-31.614159,149.880005,87.742859,140.419998
16,LunarLander-v2---max-thrust--0-75,153600,-57.385582,-237.672867,-57.385582,682.820007,114.333336,325.070007


SAC


Unnamed: 0,env,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
3,LunarLander-v2,149642,185.473572,-271.13623,154.138931,702.640015,94.0,349.549988
11,LunarLander-v2,149317,210.785843,-167.376083,199.461029,621.570007,161.75,319.290009
12,LunarLander-v2,149103,40.995003,-246.109467,40.995003,791.419983,86.5,689.359985
35,LunarLander-v2---byzantine-rate--0-25,146432,-94.873909,-307.104462,-94.873909,1558.366699,133.0,1413.920044
39,LunarLander-v2---byzantine-rate--0-5,94964,-76.314713,-185.968613,-129.672638,388.869995,104.5,388.869995
44,LunarLander-v2---failure-rate--0-05,148631,132.274673,-213.371399,132.274673,593.929993,121.5,505.429993
47,LunarLander-v2---failure-rate--0-1,147203,-198.012543,-385.652435,-198.012543,1682.474976,115.0,1176.147705
48,LunarLander-v2---failure-rate--0-1,148073,104.778748,-279.042908,104.778748,738.700012,101.5,508.899994
28,LunarLander-v2---failure-rate--0-25,148693,200.833145,-214.601364,171.485413,714.59375,90.0,386.649994
31,LunarLander-v2---failure-rate--0-5,149744,-38.51025,-228.60849,-44.803474,128.559998,83.25,126.0


In [41]:
results_df_by_env = results_df.groupby("env")
for env, df in results_df_by_env:  
    print(env)
    df.sort_values(by="env",inplace=True)
    display(df.loc[:, ~df.columns.isin(["env","folder"])])


LunarLander-v2


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
0,A2C,170000,-236.452652,-306.650452,-306.650452,570.5,233.830002,570.5
1,A2C,150000,-400.023376,-754.365417,-582.866028,1449.329956,97.800003,1449.329956
2,DDPG,155104,-139.497116,-480.652466,-141.257126,623.659973,82.5,398.799988
3,SAC,149642,185.473572,-271.13623,154.138931,702.640015,94.0,349.549988
4,PPO,153600,44.948971,-220.45047,44.948971,677.580017,107.052635,464.160004
5,A2C,150000,-202.001465,-410.694122,-293.969849,612.530029,109.5,450.079987
6,A2C,150000,-253.250946,-636.84967,-253.250946,622.51001,106.864868,518.02002
7,DDPG,155329,-16.730101,-833.337769,-21.79413,608.609985,88.0,477.0
8,DDPG,153566,-88.562225,-853.943726,-88.562225,652.830017,78.0,647.059998
9,PPO,153600,98.261688,-250.998184,98.261688,881.880005,105.526314,802.080017


LunarLander-v2---byzantine-rate--0-25


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
33,A2C,150000,-292.394989,-484.809967,-351.236298,1125.810059,98.5,1125.810059
34,PPO,153600,-51.564663,-248.864975,-74.847038,499.429993,99.5,499.429993
35,SAC,146432,-94.873909,-307.104462,-94.873909,1558.366699,133.0,1413.920044


LunarLander-v2---byzantine-rate--0-5


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
36,A2C,77000,-234.007477,-310.266235,-305.794556,159.72728,114.75,159.72728
37,A2C,150000,-227.212433,-436.405945,-283.610931,255.330002,93.800003,255.330002
38,PPO,153600,-119.62159,-244.223297,-119.62159,183.449997,108.555557,183.449997
39,SAC,94964,-76.314713,-185.968613,-129.672638,388.869995,104.5,388.869995


LunarLander-v2---failure-rate--0-05


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
40,A2C,150000,-173.486984,-397.2612,-378.094635,1158.97998,124.333336,1158.97998
41,DDPG,1792,-272.985596,-450.430786,-371.169525,115.833336,75.25,112.0
42,DDPG,157548,-173.591629,-951.069641,-220.480194,758.400024,102.75,370.459991
43,PPO,153600,-63.5303,-262.285828,-136.515945,612.950012,108.244682,327.890015
44,SAC,148631,132.274673,-213.371399,132.274673,593.929993,121.5,505.429993


LunarLander-v2---failure-rate--0-1


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
45,A2C,150000,-228.266678,-425.79718,-422.983215,2429.694336,113.0,1922.657837
46,PPO,153600,-0.940206,-281.396973,-0.940206,659.73999,103.92308,502.200012
47,SAC,147203,-198.012543,-385.652435,-198.012543,1682.474976,115.0,1176.147705
48,SAC,148073,104.778748,-279.042908,104.778748,738.700012,101.5,508.899994


LunarLander-v2---failure-rate--0-25


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
27,A2C,150000,-275.416351,-626.930847,-276.24292,903.77002,89.080002,903.77002
28,SAC,148693,200.833145,-214.601364,171.485413,714.59375,90.0,386.649994
29,PPO,153600,-33.346172,-244.939301,-33.346172,758.98999,99.016129,438.200012


LunarLander-v2---failure-rate--0-5


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
30,A2C,150000,-79.922859,-407.965912,-115.64241,239.330002,84.727272,187.190002
31,SAC,149744,-38.51025,-228.60849,-44.803474,128.559998,83.25,126.0
32,PPO,153600,-31.614159,-171.878326,-31.614159,149.880005,87.742859,140.419998


LunarLander-v2---max-thrust--0-75


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
13,A2C,150000,-172.319672,-551.877502,-220.667114,498.779999,108.25,498.779999
14,SAC,149146,-192.448166,-418.174591,-418.174591,1477.22998,92.75,1477.22998
15,DDPG,79358,-151.214157,-344.6073,-172.113037,342.880005,79.625,338.839996
16,PPO,153600,-57.385582,-237.672867,-57.385582,682.820007,114.333336,325.070007


LunarLander-v2---wind-power--10-0


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
21,A2C,150000,-143.933395,-572.376404,-205.673065,547.859985,93.599998,417.170013
22,SAC,149057,93.922272,-419.526001,80.7239,734.01001,106.5,394.929993
23,PPO,153600,-10.691111,-237.685669,-10.691111,919.140015,102.947365,373.309998


LunarLander-v2---wind-power--15-0


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
24,A2C,150000,19.821482,-668.412476,-136.707581,601.859985,101.367348,601.859985
25,SAC,149556,202.200546,-306.832642,185.989029,994.109985,143.25,347.970001
26,PPO,153600,-64.839897,-251.497894,-64.839897,1089.727295,112.694443,183.860001


LunarLander-v2---wind-power--5-0


Unnamed: 0,model,length,reward_max,reward_min,reward_last,len_max,len_min,len_last
17,A2C,150000,-308.635834,-563.784119,-409.120087,804.0,105.878784,804.0
18,SAC,3917,-204.409363,-289.090729,-204.409363,195.850006,105.0,195.850006
19,SAC,148452,154.536194,-293.235901,137.542816,769.119995,122.5,565.590027
20,PPO,153600,-67.407654,-234.716034,-72.116722,741.359985,103.789474,727.190002
