In [2]:
import os
import pandas as pd
import numpy as np

from tbparse import SummaryReader

import math
import plotly.express as px
import plotly.graph_objects as go

from tensorboard.backend.event_processing import event_accumulator

2023-08-08 10:52:52.935100: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
BASE_DIR = "../code/runs/"
FOUROOMS_TF = False

In [4]:
def load_tf(dirname):    
    ea = event_accumulator.EventAccumulator(BASE_DIR + dirname, size_guidance={event_accumulator.SCALARS: 0})
    ea.Reload()
    dframes = {}
#     mnames = ea.Tags()['scalars']
    mnames = ["episodic_rewards", "episode_lengths"]
    
    df = pd.DataFrame()
    for n in mnames:
        values = [x.value for x in ea.Scalars(n)]
        df[n] = values
        
    return df

def smooth(scalars, weight: float):
    """
    EMA implementation according to
    https://github.com/tensorflow/tensorboard/blob/34877f15153e1a2087316b9952c931807a122aa7/tensorboard/components/vz_line_chart2/line-chart.ts#L699
    """
    last = 0
    smoothed = []
    num_acc = 0
    for next_val in scalars:
        last = last * weight + (1 - weight) * next_val
        num_acc += 1
        # de-bias
        debias_weight = 1
        if weight != 1:
            debias_weight = 1 - math.pow(weight, num_acc)
        smoothed_val = last / debias_weight
        smoothed.append(smoothed_val)

    return smoothed

def cum_mean(array, intervale = 200):
    new_array = np.zeros(len(array))
    for i in range(len(array)):
        if i>intervale:
            new_array[i] = np.mean(array[i-intervale:i])
        else:
            new_array[i] = np.mean(array[0:i].tolist()+[0]*(intervale-i))
            
    return new_array

# FourRooms

## Read Data

In [5]:
if FOUROOMS_TF:
    experiments = {
        "OC_fourrooms_12_RouteSpec_3steps_1opt",
        "OC_fourrooms_12_RouteSpec_3steps_4opt",
        "OC_fourrooms_12_RouteSpec_3steps_8opt"
    }

    logs = {}
    for e in experiments:
        logs[e] = load_tf(e)

    data = pd.DataFrame()
    for k in logs:
        n = logs[k]["episode_lengths"].shape[0]
        o = k[-4]
        data[f"reward_{o}"] = logs[k]["episodic_rewards"]
        data[f"lengths_{o}"] = logs[k]["episode_lengths"]
        data[f"mean_lengths_{o}"] = [
            logs[k]["episode_lengths"][(i-100 if i > 100 else 0):i].mean() for i in range(n)] + [None]*(3000-n)

    data[["lengths_1", "lengths_4", "lengths_8"]].plot(figsize=(20, 10))
    data[["mean_lengths_1", "mean_lengths_4", "mean_lengths_8"]].plot(figsize=(20, 10))
    data.to_csv("exp_3point.csv")

## Graphs
### First Version

In [41]:
data = pd.read_csv("exp_3point.csv")

def cum_mean(array, intervale = 200):
    new_array = np.zeros(len(array))
    for i in range(len(array)):
        if i>intervale:
            new_array[i] = np.mean(array[i-intervale:i])
        else:
            new_array[i] = np.mean(array[0:i].tolist())
    
    return new_array

for o in [1,4,8]:
    for i in range(3):
        l1 = i*1000
        l2 = (i+1)*1000
        data.loc[l1:l2, f"smooth_{o}"] = smooth(data.loc[l1:l2, f"lengths_{o}"], .99)
        data.loc[l1:l2, f"mean_{o}"] = cum_mean(data.loc[l1:l2, f"lengths_{o}"], 300)
        
data.loc[2001:, 'smooth_1'] = None

options = [1, 4, 8]
names = ["Actor-Critic", "Option-Critic 4 Options", "Option-Critic 8 Options"]
colors = px.colors.qualitative.T10

fig = go.Figure()

for i, o in enumerate(options):
    
    line = go.Scatter(
        x=data.index[4:], y=data[f'mean_{o}'][4:],
        mode='lines', name=names[i], 
        line=dict(
                color=colors[i],
            )
    )
    fig.add_trace(line)
    fig.add_trace(go.Scatter(
        x=data.index, y=data[f'lengths_{o}'],
        mode='lines',
        opacity=0.1, showlegend=False,
        line=dict(color=colors[i])
        ))

fig.update_layout(
        xaxis_title="Episodes",
        yaxis_title="Mean Steps",
        template="simple_white",
        width=1000, height=500,
        yaxis_range=[0, 3000],
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        font=dict(
            size=18,
        )
)

# Display the plot
fig.show()

table = pd.DataFrame(columns=names)

for j, o in enumerate(options):   
    
    tmp = []
    for i in range(3):
        l1 = i*1000
        l2 = (i+1)*1000
        section = data.loc[l1:l2, f"lengths_{o}"]
        auc = np.trapz(section)
        tmp.append(auc)
        
    table[names[j]] = tmp

table["Number of States"] = range(1, 4)
table[["Number of States"]+names]


Mean of empty slice.


invalid value encountered in double_scalars


Mean of empty slice.


invalid value encountered in double_scalars


Mean of empty slice.


invalid value encountered in double_scalars


Mean of empty slice.


invalid value encountered in double_scalars



Unnamed: 0,Number of States,Actor-Critic,Option-Critic 4 Options,Option-Critic 8 Options
0,1,53209.0,57408.0,67712.0
1,2,330841.5,104272.5,92924.0
2,3,,163037.5,137297.5


In [24]:
opt4 = pd.read_csv("4opt_2023-08-08T15_11_15.611+10_00.csv")
opt8 = pd.read_csv("8opt_2023-08-08T15_13_18.951+10_00.csv")

In [42]:
def cum_mean(array, intervale = 200):
    new_array = np.zeros(len(array))
    for i in range(len(array)):
        if i>intervale:
            new_array[i] = np.mean(array[i-intervale:i])
        else:
            new_array[i] = np.mean(array[0:i].tolist())
    
    return new_array

datasets = [opt4, opt8]
colors = px.colors.qualitative.T10
names = ["Option-Critic 4 Options", "Option-Critic 8 Options"]

span = 1500
for i in range(len(datasets)):
    for j in range(3):
        l1 = j*span
        l2 = (j+1)*span
        datasets[i].loc[l1:l2, f"smooth"] = smooth(
            datasets[i].loc[l1:l2, "env.name: fourrooms - steps"], .99
        )
        datasets[i].loc[l1:l2, f"mean"] = cum_mean(
            datasets[i].loc[l1:l2, "env.name: fourrooms - steps"], 10
        )

fig = go.Figure()

for i, d in enumerate(datasets):
    
    line = go.Scatter(
        x=d["episode"][:4500], y=d["mean"][:4500],
        mode='lines', name=names[i], 
        line=dict(
                color=colors[i],
            )
    )
    fig.add_trace(line)
    fig.add_trace(go.Scatter(
        x=d["episode"][:4500], y=d["env.name: fourrooms - steps"][:4500],
        mode='lines',
        opacity=0.1, showlegend=False,
        line=dict(color=colors[i])
        ))

fig.update_layout(
        xaxis_title="Episodes",
        yaxis_title="Mean Steps",
        template="simple_white",
        width=1000, height=500,
        yaxis_range=[0, 1500],
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        font=dict(
            size=18,
        )
)

# Display the plot
fig.show()

# table = pd.DataFrame(columns=names)

# for j, o in enumerate(options):   
    
#     tmp = []
#     for i in range(3):
#         l1 = i*1000
#         l2 = (i+1)*1000
#         section = data.loc[l1:l2, f"lengths_{o}"]
#         auc = np.trapz(section)
#         tmp.append(auc)
        
#     table[names[j]] = tmp

# table["Number of States"] = range(1, 4)
# table[["Number of States"]+names]


Mean of empty slice.


invalid value encountered in double_scalars



# BreakOut
## Half Display

In [14]:
# from tensorflow.python.summary.summary_iterator import summary_iterator
    
path = BASE_DIR + "OC_breakout_5_LTLHalfLTL5k_8opt/events.out.tfevents.1690416557.crarojasca-Blade-14-RZ09-0370.110635.0"
# log_dir = "<PATH_TO_EVENT_FILE_OR_DIRECTORY>"
reader = SummaryReader(path)
df = reader.scalars
print(df)

            step           tag       value
0            129    actor_loss    0.392025
1            130    actor_loss   -0.004729
2            131    actor_loss   -0.020647
3            132    actor_loss   -0.392173
4            133    actor_loss    2.655761
...          ...           ...         ...
7141431  1658841  step_rewards  100.000000
7141432  1658842  step_rewards  100.000000
7141433  1658843  step_rewards  100.000000
7141434  1658844  step_rewards  100.000000
7141435  1658845  step_rewards  100.000000

[7141436 rows x 3 columns]


In [17]:
df.tag.unique()

array(['actor_loss', 'critic_loss', 'episode_lengths',
       'episodic_mean_rewards', 'episodic_rewards', 'epsilon',
       'option_0_active', 'option_0_avg_length', 'option_1_active',
       'option_1_avg_length', 'option_2_active', 'option_2_avg_length',
       'option_3_active', 'option_3_avg_length', 'option_4_active',
       'option_4_avg_length', 'option_5_active', 'option_5_avg_length',
       'option_6_active', 'option_6_avg_length', 'option_7_active',
       'option_7_avg_length', 'policy_entropy', 'step_rewards'],
      dtype=object)

In [20]:
logs = df[df.tag == "episodic_rewards"].sort_values("step")

In [24]:
logs

Unnamed: 0,step,tag,value,smooth_rewards
2083029,1,episodic_rewards,50.0,50.000000
2083030,2,episodic_rewards,0.0,24.874372
2083031,3,episodic_rewards,80.0,43.434564
2083032,4,episodic_rewards,30.0,40.025122
2083033,5,episodic_rewards,40.0,40.019996
...,...,...,...,...
2087840,4812,episodic_rewards,10.0,60.584738
2087841,4813,episodic_rewards,140.0,61.378890
2087842,4814,episodic_rewards,140.0,62.165101
2087843,4815,episodic_rewards,0.0,61.543450


In [23]:
logs.loc[:, "smooth_rewards"] = smooth(logs.loc[:, "value"], .99)

options = [1, 4, 8]
colors = px.colors.qualitative.T10

fig = go.Figure()

start = 200
line = go.Scatter(
    x=logs.index[start:], y=logs['smooth_rewards'][start:],
    mode='lines', 
    line=dict(
        color=colors[0],
    ),
    showlegend=False,
)
fig.add_trace(line)
fig.add_trace(go.Scatter(
    x=logs.index[start:], y=logs['value'][start:],
    mode='lines',
    opacity=0.1, showlegend=False,
    line=dict(color=colors[0])
))

fig.update_layout(
    xaxis_title="Episodes",
    yaxis_title="Instant Reward",
    template="simple_white",
    width=1000, height=500,
    yaxis_range=[0, 100],
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    font=dict(
        size=18,
    )
)

## Order Spec

In [178]:
exps = [
    BASE_DIR + "OC_breakout_OC_breakout_5_LTLOrderNoFireNoPretrain5k_1opt/",
    BASE_DIR + "OC_breakout_5_LTLOrderNoFireNoPretrain5k_4opt/",
    BASE_DIR + "OC_breakout_5_LTLOrderNoFireNoPretrain5k_8opt/",
#     "../RLgames/runs/OC_12-NRA-Aut2"
] 
names = [
    "actor-critic",
    "OC 4 Options",
    "OC 8 Options"
]

logs = pd.DataFrame()
for i, dir_ in enumerate(exps):
    print(f"Processing {i}.")
    reader = SummaryReader(dir_)
    df = reader.scalars
    df = (
        df[df.tag == "episodic_rewards"]
        .sort_values("step")
        .rename(columns={"value": names[i]})[names[i]]
        .reset_index(drop=True)
        .copy(deep=True)
    )
    logs = pd.concat([logs, df], axis=1)
    
logs.to_csv("breakout_logs.csv")

Processing 0.
Processing 1.
Processing 2.


In [190]:
def cum_mean(array, interval = 200):
    new_array = np.zeros(len(array))
    for i in range(len(array)):
        if i>interval:
            new_array[i] = np.mean(array[i-interval:i])
        else:
            new_array[i] = np.mean(array[0:i].tolist() + [0]*(interval-i)) 
    
    return new_array


tmp_logs = pd.read_csv("breakout_logs.csv")

names = [
    "actor-critic",
#     "OC 4 Options",
    "OC 8 Options"
]

fig = go.Figure()

min_value = 0
max_value = 4500
step_value = 1
for name in names:
    sub = pd.Series(tmp_logs[name][min_value:max_value:step_value], name=f"sub_{name}")
    tmp_logs = pd.concat([tmp_logs, sub], axis=1)
    tmp_logs.loc[:, f"mean_{name}"] = cum_mean(tmp_logs.loc[:, f"sub_{name}"], 300)
    tmp_logs.loc[:, f"smooth_{name}"] = smooth(tmp_logs.loc[:, f"sub_{name}"], .99)
    
  
    
colors = px.colors.qualitative.T10


for i, name in enumerate(names):
    
    line = go.Scatter(
        x=tmp_logs.index, y=tmp_logs[f"mean_{name}"][:max_value],
        mode='lines', name=name, 
        line=dict(
                color=colors[i],
            )
    )
    fig.add_trace(line)
#     fig.add_trace(go.Scatter(
#         x=tmp_logs.index, y=tmp_logs[name][:max_value],
#         mode='lines',
#         opacity=0.1, showlegend=False,
#         line=dict(color=colors[i])
#         ))

fig.update_layout(
        xaxis_title="Episodes",
        yaxis_title="Steps",
        template="simple_white",
        width=1000, height=500,
        yaxis_range=[0, 50],
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        font=dict(
            size=18,
        )
)

# Sapiento

In [159]:
exps = [
    BASE_DIR + "OC_sapiento_2_Sapiento_MULTIfeaturefreeze200_1opt/",
    BASE_DIR + "OC_sapiento_2_Sapiento_MULTIfeaturefreeze200_4opt/",
    BASE_DIR + "OC_sapiento_2_Sapiento_MULTIfeaturefreeze200_8opt/",
] 
names = [
    "actor-critic",
    "OC 4 Options",
    "OC 8 Options"
]

logs = pd.DataFrame()
for i, dir_ in enumerate(exps):
    print(f"Processing {i}.")
    reader = SummaryReader(dir_)
    df = reader.scalars
    df = (
        df[df.tag == "episodic_rewards"]
        .sort_values("step")
        .rename(columns={"value": names[i]})[names[i]]
        .reset_index(drop=True)
        .copy(deep=True)
    )
    logs = pd.concat([logs, df], axis=1)

Processing 0.
Processing 1.
Processing 2.


In [161]:
# logs.to_csv("sapiento_logs.csv")

In [173]:
logs = pd.read_csv("sapiento_logs.csv")

tmp_logs = pd.DataFrame()

min_value = 0
max_value = 6000
step_value = 5
for name in names:
    tmp_logs[f"sub_{name}"] = logs[name][min_value:max_value:step_value]
#     sub = pd.Series(tmp_logs[name][min_value:max_value:step_value], name=f"sub_{name}")
#     tmp_logs = pd.concat([tmp_logs, sub], axis=1)
    tmp_logs.loc[:, f"mean_{name}"] = cum_mean(tmp_logs.loc[:, f"sub_{name}"].dropna(), 300)
    tmp_logs.loc[:, f"smooth_{name}"] = smooth(tmp_logs.loc[:, f"sub_{name}"].dropna(), .99)


Mean of empty slice.


invalid value encountered in double_scalars


Mean of empty slice.


invalid value encountered in double_scalars


Mean of empty slice.


invalid value encountered in double_scalars



In [175]:
fig = go.Figure() 
colors = px.colors.qualitative.T10

names = [
    "actor-critic",
#     "OC 4 Options",
    "OC 8 Options"
]


for i, name in enumerate(names):
    
    line = go.Scatter(
        x=tmp_logs.index, y=tmp_logs[f"mean_{name}"][:max_value],
        mode='lines', name=name, 
        line=dict(
                color=colors[i],
            )
    )
    fig.add_trace(line)
#     fig.add_trace(go.Scatter(
#         x=tmp_logs.index, y=tmp_logs[name][:max_value],
#         mode='lines',
#         opacity=0.1, showlegend=False,
#         line=dict(color=colors[i])
#         ))

fig.update_layout(
        xaxis_title="Episodes",
        yaxis_title="Steps",
        template="simple_white",
        width=1000, height=500,
        yaxis_range=[0, 200],
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        font=dict(
            size=18,
        )
)