# Data Sanitization

In [4]:
import json
import wandb

api = wandb.Api()

runs = api.runs("no-organization-for-signup/mini-test-waterworld")

for run in runs:
    run.config['num_agents'] = int(run.config['num_agents'])
    #run.config['steps_pretrained'] = json.dumps(int(0))
    #run.config['steps_pretrained'] = int(json.dumps(int(0)))
    run.update()

# Localize/Ingest Data

Localize to avoid the numerous/lengthy API calls:

In [174]:
import wandb

api = wandb.Api()
runs = api.runs("no-organization-for-signup/ww_test")
a = runs[0].history()
a["id"] = runs[0].id
#a.head()

summary = run.summary._json_dict
config = {k: v for k, v in run.config.items() if not k.startswith('_')}
run_data = {**summary, **config}

run_data['steps_pretrained']

0

In [30]:
import wandb
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

ENTITY = "no-organization-for-signup"
PROJECT = "ww_test"

# Define a function to delete a single run
def export_run(run):
    try:
        # Collect run's summary metrics, configs, and name
        summary = run.summary._json_dict
        config = {k: v for k, v in run.config.items() if not k.startswith('_')}
        name = run.name

        # Combine summary and config into a single dictionary
        run_data = {**summary, **config}
        run_data['name'] = name

        # Optionally add more run metadata
        run_data['id'] = run.id
        run_data['created_at'] = run.created_at
        run_data['state'] = run.state

        hist = run.history()
        hist['id'] = run.id
        hist['steps_pretrained'] = run_data['steps_pretrained']

        return run_data, hist

    except Exception as e:
        return f"Error exporting run {run.id}: {e}"

# Initialize W&B API
api = wandb.Api()
runs = api.runs(f'{ENTITY}/{PROJECT}')

# Initialize lists to hold run data and history
runs_data = []
histories = []

# Set up the ThreadPoolExecutor to parallelize the process
with ThreadPoolExecutor(max_workers=5) as executor:
    # Submit export tasks to the executor
    for run_data, history in executor.map(export_run, runs):
        runs_data.append(run_data)
        histories.append(history)

# Convert the list of dictionaries to a DataFrame
runs_df = pd.DataFrame(runs_data)
hist_df = pd.concat(histories, keys=[f'run_{i}' for i in range(len(histories))])

# Reorder columns so identifying info is at the front
cols = ['id', 'name', 'created_at', 'state'] + \
    [col for col in runs_df.columns if col not in 
     ['id', 'name', 'created_at', 'state']]
runs_df = runs_df[cols]

# Export the DataFrame to CSV
runs_df.to_csv(f"{PROJECT}.csv", index=False)
hist_df.to_csv(f"{PROJECT}_history.csv", index=True)

print(f"Data has been successfully exported to '{PROJECT}.csv'.")

Data has been successfully exported to 'ww_test.csv'.


In [2]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

import wandb

In [20]:
import wandb
import numpy as np
import pandas as pd

api = wandb.Api()

def wandb_ingest(path, agent_filter=lambda _: False):
    runs = api.runs(path)
    data = []

    for run in runs:
        if run.state == 'running': continue # Skip currently in-progress run
        id = run.id
        n = run.config['num_agents']
        pre_steps = run.config['steps_pretrained']

        if agent_filter(n): continue

        erm = (run.history()['env_runners/episode_reward_mean']
            .replace('NaN',None)
            .bfill()
            )

        try:

            for step, reward in enumerate(erm):
                if step + pre_steps > 200: break
                data.append({
                    'id': id,
                    'num_agents': n,
                    'timestep': step + pre_steps,
                    'episode_reward_mean': reward,
                    'per_agent_erm': reward / n,
                    'pretrained_steps': pre_steps,
                })

        except:
            print(f"Problem with run: {run.id}")

    return pd.DataFrame(data)

def baseline_prediction_interval(num_agents,path='ww-baseline.csv'):
    df = pd.read_csv(path)
    df = df[df["num_agents"]==num_agents]
    seq = df.groupby('timestep')['per_agent_erm']

    # Mean and standard deviation per timestep
    mean = seq.mean()
    std_dev = seq.std()
    n = seq.count()

    # Calculate the Prediction Interval (PI)
    # For large n, using z=1.96 for ~95% coverage. (Central limit theorem)
    z = 1.96
    margin_of_error = z * std_dev * np.sqrt(1 + 1/n)

    # Lower and upper bounds of the PI
    lower_bound = mean - margin_of_error
    upper_bound = mean + margin_of_error

    x = np.concatenate([mean.index, mean.index[::-1]]) * num_agents
    y = np.concatenate([upper_bound, lower_bound[::-1]])

    return x,y

In [55]:
# The actual retraining test
wandb_ingest("no-organization-for-signup/ww_test").to_csv('ww-out.csv', index=False)

Problem with run: f1095_00000
Problem with run: 0cd08_00000
Problem with run: 2c6c1_00000
Problem with run: 513e2_00000
Problem with run: 7a053_00000
Problem with run: a7c63_00000


In [56]:
# Data from previous training, for baselining
wandb_ingest("no-organization-for-signup/mini-test-waterworld").to_csv('ww-baseline.csv', index=False)

In [60]:
wandb_ingest("no-organization-for-signup/multiwalker").to_csv('walker-out.csv', index=False)

# Next Section

In [15]:
import plotly.graph_objects as go

base_x,base_y = baseline_prediction_interval(7)

# Plotting
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=base_x,
    y=base_y,
    fill='toself',
    fillcolor='rgba(0,100,80,0.2)',  # semi-transparent fill
    line=dict(color='rgba(255,255,255,0)'),
    name='95% Prediction Interval',
    hoverinfo="skip"
))

fig.show()

The prediction interval is based upon a sample of baseline tests. In which the number of agents trained for the task remain the same for the duration of the training.
Timesteps are adjusted to reflect the ratio of compute required to complete a round of training for each set of agents. For example, due to negligent overhead associated with managing the environment, training four agents takes twice as many operations as training two.


In [None]:
import plotly.express as px

def plot_avg_retrain(n, path='ww-out.csv'):
    pre_agents = 2
    task_agents = n

    ts_df = pd.read_csv(path)
    d1=(ts_df[ts_df["num_agents"]==task_agents]
        .groupby(['pretrained_steps','timestep'])["per_agent_erm"]
        .mean()
        .reset_index()
        )
    d1['timestep'] = pre_agents*d1['pretrained_steps'] +\
                     task_agents*(d1['timestep']-d1['pretrained_steps'])

    fig = px.line(
        d1,
        y="per_agent_erm", 
        x="timestep", color="pretrained_steps", line_group="pretrained_steps",
        color_discrete_sequence=px.colors.qualitative.G10, line_shape="spline", 
        render_mode="svg", 
        title=f"{task_agents} Agent Task with {pre_agents} Agent Pretraining",
        labels={
            "per_agent_erm" : "Mean Episode Reward per Agent", 
            "timestep": "Agent-steps",
            "pretrained_steps": "Pretraining Length"})

    base_x,base_y = baseline_prediction_interval(task_agents)
    fig.add_trace(go.Scatter(
        x=base_x,
        y=base_y,
        fill='toself',
        fillcolor='rgba(0,100,80,0.2)',  # semi-transparent fill
        line=dict(color='rgba(255,255,255,0)'),
        name=f'Baseline:<br>{task_agents} Agent 95%<br>Prediction<br>Interval',
        hoverinfo="skip"
    ))

    fig.update_layout(width=700, height=450,)

    fig.show()
    fig.write_image(f"{n}task.png", width=700, height=450)

plot_avg_retrain(4)

In [26]:
plot_avg_retrain(6)

In [27]:
plot_avg_retrain(8)

In [4]:
import wandb
import pandas as pd

api = wandb.Api()
runs = api.runs("no-organization-for-signup/ww_test")

data = []
for run in runs:
    for time in run.history()['timers/training_iteration_time_ms']:
        data.append({
                    'num_agents': run.config['num_agents'],
                    'time': time,
                    #'mean_time': hist.mean(),
                    #'steps': len(hist)
                    })

time_costs = pd.DataFrame(data)
time_costs.to_csv('ww-timecosts.csv', index=False)

import statsmodels.formula.api as smf 

model = smf.ols(formula='time ~ num_agents', data=time_costs).fit() 
print(model.summary())



                            OLS Regression Results                            
Dep. Variable:                   time   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.999
Method:                 Least Squares   F-statistic:                 2.827e+07
Date:                Mon, 10 Feb 2025   Prob (F-statistic):               0.00
Time:                        22:22:50   Log-Likelihood:            -2.6299e+05
No. Observations:               34901   AIC:                         5.260e+05
Df Residuals:                   34899   BIC:                         5.260e+05
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    324.2441      7.543     42.984      0.0

In [37]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

time_costs = pd.read_csv('ww-timecosts.csv')

tc = time_costs.groupby(["num_agents"]).mean().reset_index()
tc["error"] = time_costs.groupby(["num_agents"]).std().reset_index()["time"]
tc["n_agents"] = tc["num_agents"].astype(str)

fig = (px.bar(
    tc.groupby(["num_agents"]).mean().reset_index(), 
    y="time", x="num_agents", color="n_agents", 
    error_y="error",
    title="Time Cost Per Training Iteration",
    labels={"num_agents":"Number of Agents", "time":"Time (ms)"},
    color_discrete_sequence=px.colors.qualitative.Prism,
    opacity=0.65,
    )
    .update_layout(
        showlegend=False,
        width=700, height=450,
        barcornerradius=5,
        )
    .update_xaxes(range=[0, 8.5])
)

#fig.add_trace(go.Line( x=[0, 8], y=[324.2441, 324.2441 + 8 * 7004.7673] ) )

fig.show()
fig.write_image(f"time_costs.png", width=700, height=450)

In [89]:
def plot_walker_retrain(n, path='walker-out.csv'):
    pre_agents = 3
    task_agents = n

    ts_df = pd.read_csv(path)
    re = ts_df
    #re = ts_df[ts_df["pretrained_steps"]>0]
    #re = re[re["timestep"]>10]
    d1=(re[re["num_agents"]==task_agents]
        .groupby(['pretrained_steps','timestep'])["per_agent_erm"]
        .mean()
        .reset_index()
        )
    d1['timestep'] = d1['timestep'] - (d1['pretrained_steps']*(1-pre_agents/task_agents))

    fig = px.line(
        d1,
        y="per_agent_erm", 
        x="timestep", color="pretrained_steps", line_group="pretrained_steps",
        color_discrete_sequence=px.colors.qualitative.G10, line_shape="spline", 
        render_mode="svg", 
        title=f"{task_agents} Agent Task with {pre_agents} Agent Pretraining",
        labels={
            "per_agent_erm" : "Mean Episode Reward per Agent", 
            "timestep": "Adjusted Timesteps",
            "pretrained_steps": "Pretraining<br>Length"})

    """
    base_x,base_y = baseline_prediction_interval(task_agents)
    fig.add_trace(go.Scatter(
        x=base_x,
        y=base_y,
        fill='toself',
        fillcolor='rgba(0,100,80,0.2)',  # semi-transparent fill
        line=dict(color='rgba(255,255,255,0)'),
        name=f'Baseline:<br>{task_agents} Agent 95%<br>Prediction<br>Interval',
        hoverinfo="skip"
    ))
    """

    fig.update_layout(width=700, height=450,)

    fig.show()
    fig.write_image(f"walker_{n}task.png", width=700, height=450)

plot_walker_retrain(6)

In [100]:
import plotly.graph_objects as go
ts_df = pd.read_csv('walker-out.csv')

#ts_df = ts_df[ts_df["timestep"]>6]
#ts_df.head()
#ts_df['pretrained_steps'][7] == 0
#ts_df[ts_df["pretrained_steps"]>0].head()
#ts_df = ts_df[ts_df["num_agents"]==6]
seq = (
    ts_df[ts_df["pretrained_steps"]==0]
    #.groupby(['pretrained_steps','timestep'])["per_agent_erm"]
    .groupby(['timestep'])["per_agent_erm"]
    #.mean()
    #.reset_index()
)

mean = seq.mean()
std_dev = seq.std()
n = seq.count()
z = 1.96
margin_of_error = z * std_dev * np.sqrt(1 + 1/n)
lower_bound = mean - margin_of_error
upper_bound = mean + margin_of_error
x = np.concatenate([mean.index, mean.index[::-1]])
y = np.concatenate([upper_bound, lower_bound[::-1]])


fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x,
    y=y,
    fill='tonexty', 
    #fill='toself',
    fillcolor='rgba(0,100,80,0.2)',  # semi-transparent fill
    line=dict(color='rgba(255,255,255,0)'),
    name='95% Prediction Interval',
    hoverinfo="skip",
    #title=f"{task_agents} Agent Task with {pre_agents} Agent Pretraining",
    ))

#fig.add_trace(go.Line(
#    x=x,
#    y=mean
#))

fig.show()


In [28]:
time_costs.groupby(["num_agents"]).mean().reset_index()
time_costs.groupby(["num_agents"]).std().reset_index()["time"]

0    171.883260
1    232.042801
2    357.261298
3    362.324095
4    467.984644
5    430.636087
6    664.235734
Name: time, dtype: float64

In [220]:
import numpy as np

def diffuse(arr:np.array, n_steps:int=1):
    if n_steps == 0:
        return arr

    movespace = [[-1,0],[0,-1],[0,0],[1,0],[0,1]]
    new_arr = np.zeros_like(arr)

    # Iterate across entire array
    for index, n in np.ndenumerate(arr):
        # Check for existing probability
        if n > 0:
            # Get adjacent indices
            possible_moves = np.array(index) + movespace
            # Clip for out of bound moves
            valid_moves = np.unique(np.clip(possible_moves, 0, np.subtract(arr.shape,1)), axis=0)
            # Calculate probability
            p = n/len(valid_moves)
            # Add probability to new space
            for move in valid_moves:
                new_arr[*move] = new_arr[*move] + p

    return diffuse(new_arr,n_steps-1)

# EXAMPLE:

a = np.zeros((5,5))
a[2,2] = 1

print(diffuse(a,8))

#print(a)
#for _ in range(3):
#    a = diffuse(a)
#    print()
#    print(a)

[[0.02527156 0.03550806 0.03796476 0.03550806 0.02527156]
 [0.03550806 0.04977457 0.0518548  0.04977457 0.03550806]
 [0.03796476 0.0518548  0.05647276 0.0518548  0.03796476]
 [0.03550806 0.04977457 0.0518548  0.04977457 0.03550806]
 [0.02527156 0.03550806 0.03796476 0.03550806 0.02527156]]


In [6]:
import pandas as pd 
import wandb
api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("no-organization-for-signup/mini-test-waterworld")

summary_list, config_list, name_list = [], [], []
for run in runs: 
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files 
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append(
        {k: v for k,v in run.config.items()
          if not k.startswith('_')})

    # .name is the human-readable name of the run.
    name_list.append(run.name)

runs_df = pd.DataFrame({
    "summary": summary_list,
    "config": config_list,
    "name": name_list
    })

runs_df.to_csv("mini-test-waterworld.csv")

# Graphing Functions

Rewriting graphing functions to fit the full wandb ingest.

info/learner/pursuer_2/diff_num_grad_updates_vs_sampler_policy,
info/learner/pursuer_1/learner_stats/vf_explained_var,
info/learner/pursuer_2/learner_stats/policy_loss,
info/learner/pursuer_4/diff_num_grad_updates_vs_sampler_policy,
info/learner/pursuer_5/learner_stats/entropy_coeff,
info/learner/pursuer_5/learner_stats/cur_lr,
info/learner/pursuer_2/learner_stats/total_loss,
env_runners/policy_reward_max/pursuer_2,
info/learner/pursuer_5/learner_stats/entropy,
info/learner/pursuer_4/learner_stats/vf_loss,
env_runners/policy_reward_max/pursuer_3,
num_agents,env_runners/sampler_perf/mean_raw_obs_processing_ms,
env_runners/sampler_perf/mean_env_wait_ms,
info/learner/pursuer_0/learner_stats/total_loss,
info/num_env_steps_trained,
env_runners/policy_reward_max/pursuer_1,
counters/num_agent_steps_trained,
info/learner/pursuer_2/learner_stats/vf_loss,env_runners/episode_reward_max,
num_env_steps_sampled_throughput_per_sec,
iterations_since_restore,
info/learner/pursuer_4/learner_stats/cur_kl_coeff,
info/learner/pursuer_3/learner_stats/kl,
info/learner/pursuer_4/learner_stats/entropy_coeff,
info/learner/pursuer_0/num_grad_updates_lifetime,
info/learner/pursuer_6/learner_stats/kl,
num_agent_steps_sampled,time_this_iter_s,
time_total_s,info/learner/pursuer_1/num_agent_steps_trained,
env_runners/policy_reward_max/pursuer_5,
info/learner/pursuer_3/learner_stats/grad_gnorm,
timers/restore_workers_time_ms,
timers/training_iteration_time_ms,
info/learner/pursuer_6/learner_stats/total_loss,
info/learner/pursuer_0/diff_num_grad_updates_vs_sampler_policy,
info/learner/pursuer_2/num_agent_steps_trained,
info/learner/pursuer_0/learner_stats/cur_lr,
info/learner/pursuer_5/learner_stats/allreduce_latency,
info/learner/pursuer_2/learner_stats/grad_gnorm,
num_env_steps_trained_throughput_per_sec,
info/learner/pursuer_4/learner_stats/policy_loss,
timestamp,info/learner/pursuer_4/num_grad_updates_lifetime,
env_runners/policy_reward_mean/pursuer_2,
num_env_steps_trained_this_iter,
info/learner/pursuer_5/learner_stats/total_loss,
env_runners/policy_reward_max/pursuer_7,
env_runners/connector_metrics/StateBufferConnector_ms,
info/learner/pursuer_3/learner_stats/cur_kl_coeff,
info/learner/pursuer_6/learner_stats/entropy_coeff,
env_runners/policy_reward_min/pursuer_2,
_step,
info/learner/pursuer_0/learner_stats/policy_loss,
_runtime,
info/learner/pursuer_0/learner_stats/entropy_coeff,
info/learner/pursuer_7/num_agent_steps_trained,
info/learner/pursuer_2/learner_stats/vf_explained_var,
info/num_agent_steps_sampled,time_since_restore,
info/learner/pursuer_2/learner_stats/entropy,
info/learner/pursuer_1/learner_stats/total_loss,
env_runners/policy_reward_mean/pursuer_0,
env_runners/policy_reward_mean/pursuer_3,
env_runners/policy_reward_min/pursuer_5,
info/learner/pursuer_7/learner_stats/total_loss,
info/learner/pursuer_6/diff_num_grad_updates_vs_sampler_policy,
info/learner/pursuer_7/learner_stats/vf_explained_var,
info/learner/pursuer_1/learner_stats/cur_kl_coeff,
info/learner/pursuer_3/num_grad_updates_lifetime,
env_runners/policy_reward_mean/pursuer_4,
info/learner/pursuer_2/learner_stats/cur_lr,
info/learner/pursuer_4/learner_stats/allreduce_latency,
env_runners/episodes_this_iter,
num_remote_worker_restarts,
env_runners/num_episodes,
info/num_env_steps_sampled,
info/learner/pursuer_3/learner_stats/entropy,
info/learner/pursuer_1/learner_stats/vf_loss,
env_runners/episode_reward_min,
info/learner/pursuer_6/learner_stats/entropy,
info/learner/pursuer_4/learner_stats/grad_gnorm,
info/learner/pursuer_3/diff_num_grad_updates_vs_sampler_policy,
env_runners/policy_reward_min/pursuer_4,
env_runners/policy_reward_mean/pursuer_5,
num_pretrained,
info/learner/pursuer_5/diff_num_grad_updates_vs_sampler_policy,
info/learner/pursuer_4/learner_stats/kl,
env_runners/episode_reward_mean,
counters/num_env_steps_trained,
info/learner/pursuer_2/learner_stats/cur_kl_coeff,
info/learner/pursuer_4/num_agent_steps_trained,
num_agent_steps_trained,
info/learner/pursuer_5/learner_stats/policy_loss,
env_runners/policy_reward_min/pursuer_3,
env_runners/policy_reward_max/pursuer_4,
env_runners/episodes_timesteps_total,
info/learner/pursuer_4/learner_stats/cur_lr,
num_agent_steps_sampled_lifetime,
agent_timesteps_total,
num_env_steps_sampled_lifetime,
env_runners/episode_return_mean,
info/learner/pursuer_3/learner_stats/allreduce_latency,
info/learner/pursuer_1/learner_stats/kl,
timers/synch_weights_time_ms,
timers/training_step_time_ms,
perf/ram_util_percent,
env_runners/sampler_perf/mean_inference_ms,
info/learner/pursuer_3/learner_stats/policy_loss,
env_runners/num_faulty_episodes,
info/learner/pursuer_3/learner_stats/vf_loss,
info/learner/pursuer_1/learner_stats/entropy,
info/learner/pursuer_1/learner_stats/allreduce_latency,
info/learner/pursuer_6/learner_stats/vf_loss,
env_runners/sampler_perf/mean_env_render_ms,
info/learner/pursuer_6/learner_stats/policy_loss,
info/learner/pursuer_3/learner_stats/vf_explained_var,
info/learner/pursuer_3/learner_stats/entropy_coeff,
info/learner/pursuer_0/num_agent_steps_trained,
info/num_agent_steps_trained,
info/learner/pursuer_7/learner_stats/allreduce_latency,
info/learner/pursuer_0/learner_stats/entropy,
info/learner/pursuer_5/num_agent_steps_trained,
timesteps_total,
info/learner/pursuer_6/learner_stats/cur_kl_coeff,
info/learner/pursuer_7/learner_stats/cur_kl_coeff,
env_runners/sampler_perf/mean_action_processing_ms,
info/learner/pursuer_0/learner_stats/vf_explained_var,
timers/learn_time_ms,info/learner/pursuer_5/num_grad_updates_lifetime,
env_runners/policy_reward_mean/pursuer_7,
info/learner/pursuer_1/diff_num_grad_updates_vs_sampler_policy,
info/learner/pursuer_3/learner_stats/cur_lr,
info/learner/pursuer_4/learner_stats/total_loss,
timers/sample_time_ms,perf/cpu_util_percent,
env_runners/policy_reward_mean/pursuer_1,
info/learner/pursuer_7/num_grad_updates_lifetime,
info/learner/pursuer_7/learner_stats/cur_lr,
info/learner/pursuer_6/learner_stats/allreduce_latency,
info/learner/pursuer_7/learner_stats/vf_loss,
num_env_steps_sampled,
info/learner/pursuer_2/learner_stats/kl,
num_env_steps_sampled_this_iter,
env_runners/connector_metrics/ViewRequirementAgentConnector_ms,
env_runners/policy_reward_max/pursuer_6,
env_runners/policy_reward_max/pursuer_0,
info/learner/pursuer_3/num_agent_steps_trained,
env_runners/policy_reward_min/pursuer_7,
info/learner/pursuer_4/learner_stats/vf_explained_var,
counters/num_env_steps_sampled,_timestamp,
info/learner/pursuer_0/learner_stats/cur_kl_coeff,
info/learner/pursuer_2/learner_stats/entropy_coeff,
timers/learn_throughput,
info/learner/pursuer_7/learner_stats/kl,
info/learner/pursuer_7/learner_stats/entropy,
num_steps_trained_this_iter,
info/learner/pursuer_0/learner_stats/vf_loss,
info/learner/pursuer_5/learner_stats/cur_kl_coeff,
counters/num_agent_steps_sampled,
info/learner/pursuer_1/learner_stats/cur_lr,
info/learner/pursuer_0/learner_stats/kl,
num_healthy_workers,
info/learner/pursuer_7/learner_stats/policy_loss,
info/learner/pursuer_1/learner_stats/policy_loss,
info/learner/pursuer_6/num_agent_steps_trained,
info/learner/pursuer_6/learner_stats/grad_gnorm,
info/learner/pursuer_6/num_grad_updates_lifetime,
info/learner/pursuer_0/learner_stats/allreduce_latency,
info/learner/pursuer_3/learner_stats/total_loss,
training_iteration,env_runners/policy_reward_min/pursuer_1,
info/learner/pursuer_7/learner_stats/entropy_coeff,
env_runners/episode_return_max,
info/learner/pursuer_5/learner_stats/vf_loss,
info/learner/pursuer_4/learner_stats/entropy,
info/learner/pursuer_7/diff_num_grad_updates_vs_sampler_policy,
env_runners/connector_metrics/ObsPreprocessorConnector_ms,
info/learner/pursuer_2/learner_stats/allreduce_latency,
env_runners/policy_reward_min/pursuer_6,
info/learner/pursuer_5/learner_stats/kl,
env_runners/episode_return_min,
info/learner/pursuer_0/learner_stats/grad_gnorm,
info/learner/pursuer_7/learner_stats/grad_gnorm,
env_runners/policy_reward_min/pursuer_0,
info/learner/pursuer_1/learner_stats/grad_gnorm,
info/learner/pursuer_1/learner_stats/entropy_coeff,
env_runners/policy_reward_mean/pursuer_6,
info/learner/pursuer_2/num_grad_updates_lifetime,
info/learner/pursuer_6/learner_stats/cur_lr,
num_env_steps_trained,env_runners/episode_len_mean,
info/learner/pursuer_1/num_grad_updates_lifetime,
info/learner/pursuer_5/learner_stats/vf_explained_var,
info/learner/pursuer_5/learner_stats/grad_gnorm,
info/learner/pursuer_6/learner_stats/vf_explained_var,
num_in_flight_async_sample_reqs,
episode_reward_mean

In [157]:
import numpy as np
import pandas as pd


def baseline_prediction_interval(num_agents, path):
    data = []
    for p in path:
        # Read each table
        run_info = pd.read_csv(f"{p}.csv")
        run_info = run_info[run_info['num_agents']==num_agents]
        run_info = run_info[run_info['steps_pretrained']==0]
        ids = run_info['id']

        #print(ids)

        hist = pd.read_csv(f"{p}_history.csv")
        #hist = hist[hist['id'].isin(ids)]
        print(len(ids))
        print(len(hist))

        print(len(hist)/len(ids))

        #_df['env_runners/episode_reward_mean'] = (
        #    _df['env_runners/episode_reward_mean'].div(num_agents))
        #data.append(_df)
        #print(hist.head)





a = baseline_prediction_interval(4, path=['mini-test-waterworld'])
type(a)

32
40967
1280.21875


NoneType

In [142]:
import numpy as np
import pandas as pd


def baseline_prediction_interval(num_agents, path):
    data = []
    for p in path:
        # Read each table
        run_info = pd.read_csv(f"{p}.csv")
        run_info = run_info

        hist = pd.read_csv(f"{p}_history.csv")
        
        _df = pd.read_csv(f"{p}_history.csv")
        # Filter for number of agents
        _df = _df[_df['num_agents']==num_agents]
        # Filter for tabula rasa
        if 'steps_pretrained' in _df.columns:
            _df = _df[_df['steps_pretrained']==0]
        elif 'pretrained_steps' in _df.columns:
            _df = _df[_df['pretrained_steps']==0]
        _df['env_runners/episode_reward_mean'] = (
            _df['env_runners/episode_reward_mean'].div(num_agents))
        data.append(_df)

    seq = (pd.concat(data, ignore_index=True)
           .groupby('_step')['env_runners/episode_reward_mean']
           )

    # Mean and standard deviation per timestep
    mean = seq.mean()
    std_dev = seq.std()
    n = seq.count()

    # Calculate the Prediction Interval (PI)
    # For large n, using z=1.96 for ~95% coverage. (Central limit theorem)
    z = 1.96
    margin_of_error = z * std_dev * np.sqrt(1 + 1/n)

    # Lower and upper bounds of the PI
    lower_bound = mean - margin_of_error
    upper_bound = mean + margin_of_error

    x = np.concatenate([mean.index, mean.index[::-1]]) * num_agents
    y = np.concatenate([upper_bound, lower_bound[::-1]])

    return x, y, mean


def get_avg_retrain(task_agents, path, pre_agents=2):
    data = []
    for p in path:
        # Read each table
        _df = pd.read_csv(p)
        # Filter for number of agents
        _df = _df[_df['num_agents']==task_agents]
        # Correct possible naming disparity
        if 'pretrained_steps' in _df.columns:
            #_df.rename(columns={'pretrained_steps':'steps_pretrained'})
            _df['steps_pretrained'] = _df['pretrained_steps']
        # Filter for NON tabula rasa

        _df = _df[_df['steps_pretrained']>0]
        data.append(_df)

    d1 = (pd.concat(data, ignore_index=True)
          .groupby(['steps_pretrained','_step'])['env_runners/episode_reward_mean']
          .mean()
          .reset_index()
          )/task_agents

    d1['timestep'] = pre_agents*d1['steps_pretrained'] +\
                     task_agents*(d1['_step']-d1['steps_pretrained'])

    return d1

"""
import plotly.express as px

def plot_avg_retrain(task_agents, path, pre_agents=2):
    df = get_avg_retrain(task_agents, path)

    base_x, base_y, mean = baseline_prediction_interval(task_agents, path)

    fig = px.line(
        df,
        y="per_agent_erm", 
        x="timestep", color="pretrained_steps", line_group="pretrained_steps",
        color_discrete_sequence=px.colors.qualitative.G10, line_shape="spline", 
        render_mode="svg", 
        title=f"{task_agents} Agent Task with {pre_agents} Agent Pretraining",
        labels={
            "per_agent_erm" : "Mean Episode Reward per Agent", 
            "timestep": "Agent-steps",
            "pretrained_steps": "Pretraining Length"})

    fig.add_trace(go.Scatter(
        x=base_x,
        y=base_y,
        fill='toself',
        fillcolor='rgba(0,100,80,0.2)',  # semi-transparent fill
        line=dict(color='rgba(255,255,255,0)'),
        name=f'Baseline:<br>{task_agents} Agent 95%<br>Prediction<br>Interval',
        hoverinfo="skip"
    ))

    fig.update_layout(width=700, height=450,)

    fig.show()
    #fig.write_image(f"{n}task.png", width=700, height=450)

# ['mini-test-waterworld.csv', 'mini-test-waterworld_history.csv']
#x, y, mean = baseline_prediction_interval(3, path=['mini-test-waterworld_history.csv'])

plot_avg_retrain(4, path=['mini-test-waterworld_history.csv'])
"""

#base_x, base_y, mean = baseline_prediction_interval(4, path=['mini-test-waterworld_history.csv'])
a = get_avg_retrain(4, path=['mini-test-waterworld_history.csv'])
type(a)

KeyError: 'steps_pretrained'

In [3]:
info = pd.read_csv("mini-test-waterworld.csv")
hist = pd.read_csv("mini-test-waterworld_history.csv")

info['steps_pretrained']

hist['steps_pretrained'] = 0

hist.to_csv(f"mini-test-waterworld_history2.csv", index=False)