# Localize/Ingest Data

Localize to avoid the numerous/lengthy API calls:

In [None]:
import wandb
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

ENTITY = "no-organization-for-signup"
PROJECT = "foraging-scale-lim-coop"

# Define a function to delete a single run
def export_run(run):
    try:
        # Collect run's summary metrics, configs, and name
        summary = run.summary._json_dict
        config = {k: v for k, v in run.config.items() if not k.startswith('_')}
        name = run.name

        # Combine summary and config into a single dictionary
        run_data = {**summary, **config}
        run_data['name'] = name

        # Optionally add more run metadata
        run_data['id'] = run.id
        run_data['created_at'] = run.created_at
        run_data['state'] = run.state

        hist = run.history()
        hist['id'] = run.id
        hist['steps_pretrained'] = run_data['steps_pretrained']
        hist['num_agents'] = run_data['num_agents']

        return run_data, hist

    except Exception as e:
        return f"Error exporting run {run.id}: {e}"

# Initialize W&B API
api = wandb.Api()
runs = api.runs(f'{ENTITY}/{PROJECT}')

# Initialize lists to hold run data and history
runs_data = []
histories = []

# Set up the ThreadPoolExecutor to parallelize the process
with ThreadPoolExecutor(max_workers=5) as executor:
    # Submit export tasks to the executor
    for run_data, history in executor.map(export_run, runs):
        runs_data.append(run_data)
        histories.append(history)

# Convert the list of dictionaries to a DataFrame
runs_df = pd.DataFrame(runs_data)
hist_df = pd.concat(histories, keys=[f'run_{i}' for i in range(len(histories))])

# Reorder columns so identifying info is at the front
cols = ['id', 'name', 'created_at', 'state'] + \
    [col for col in runs_df.columns if col not in 
     ['id', 'name', 'created_at', 'state']]
runs_df = runs_df[cols]

# Export the DataFrame to CSV
runs_df.to_csv(f"{PROJECT}.csv", index=False)
hist_df.to_csv(f"{PROJECT}_history.csv", index=True)

print(f"Data has been successfully exported to '{PROJECT}.csv'.")

Data has been successfully exported to 'foraging-blind-coop-base.csv'.


# Graph


In [182]:
import numpy as np
import pandas as pd

def baseline_prediction_interval(num_agents, path):
    data = []
    for p in path:
        # Read each table
        _df = pd.read_csv(p)
        # Filter for number of agents
        _df = _df[_df['num_agents']==num_agents]
        # Filter for tabula rasa
        _df = _df[_df['steps_pretrained']==0]
        _df['env_runners/episode_reward_mean'] = (
            _df['env_runners/episode_reward_mean']
            .div(num_agents)
            .replace('NaN',None)
            .bfill())
        data.append(_df)

    seq = (pd.concat(data, ignore_index=True)
           .groupby('_step')['env_runners/episode_reward_mean']
           )

    # Mean and standard deviation per timestep
    mean = seq.mean()
    std_dev = seq.std()
    n = seq.count()

    # Calculate the Prediction Interval (PI)
    # For large n, using z=1.96 for ~95% coverage. (Central limit theorem)
    z = 1.96
    margin_of_error = z * std_dev * np.sqrt(1 + 1/n)

    x = np.concatenate([mean.index]) * num_agents

    return x, mean, margin_of_error


def get_avg_retrain(task_agents, path, pre_agents=2):
    data = []
    for p in path:
        # Read each table
        _df = pd.read_csv(p)
        # Filter for number of agents
        _df = _df[_df['num_agents']==task_agents]
        # Filter for NON tabula rasa
        _df = _df[_df['steps_pretrained']>0]
        #_df = _df[_df['steps_pretrained']<200]
        data.append(_df)

    d1 = (pd.concat(data, ignore_index=True)
          .groupby(['steps_pretrained','_step'])['env_runners/episode_reward_mean']
          .mean()
          .reset_index()
          )
    
    d1['per_agent_erm'] = (d1['env_runners/episode_reward_mean']
                           .div(task_agents)
                           .replace('NaN',None)
                           .bfill())

    d1['timestep'] = pre_agents*d1['steps_pretrained'] + task_agents*d1['_step']

    return d1

import plotly.express as px
import plotly.graph_objects as go

def plot_avg_retrain(task_agents, path, env, pre_agents=2, 
                     show=True, write=True, x_lim=None, max_pre=None):
    retrain_results = get_avg_retrain(task_agents, path)

    if max_pre:
        retrain_results = retrain_results[retrain_results["steps_pretrained"]<=max_pre]

    base_x, base_mean, base_err = baseline_prediction_interval(task_agents, path)

    fig = px.line(
        retrain_results,
        y="per_agent_erm", 
        x="timestep", color="steps_pretrained", line_group="steps_pretrained",
        color_discrete_sequence=px.colors.qualitative.G10, line_shape="spline", 
        render_mode="svg", 
        title=f"{task_agents} Agent {env} with {pre_agents} Agent Pretraining",
        labels={
            "per_agent_erm" : "Mean Episode Reward per Agent", 
            "timestep": "Agent-steps",
            "steps_pretrained": "Pretraining Length"})

    fig.add_trace(go.Scatter(
        x=base_x,
        y=base_mean-base_err,
        line=dict(color='rgba(255,255,255,0)', width=0),
    ))
    fig.add_trace(go.Scatter(
        x=base_x,
        y=base_mean+base_err,
        fill='tonexty',
        fillcolor='rgba(0,100,80,0.2)',  # semi-transparent fill
        line=dict(color='rgba(255,255,255,0)'),
        name=f'Baseline:<br>{task_agents} Agent 95%<br>Prediction<br>Interval',
        hoverinfo="skip"
    ))

    fig.add_trace(go.Scatter(
        y=base_mean, x=base_x,
        line_color='rgba(0,100,80,0.5)',
        line=dict(dash='dot'),
        name=f'Baseline: Mean'
    ))

    fig.update_layout(width=800, height=500,)
    if x_lim:
        fig.update_xaxes(range=[0, x_lim])
    if show:
        fig.show()
    if write:
        fig.write_image(f"{env}-{task_agents}-agent.png", width=800, height=500)

    return base_x, base_mean, retrain_results

# base_x, base_mean, retrain_results = plot_avg_retrain(
#     5, path=['foraging-base_history.csv','forage-scale-2_history.csv'], 
#     env="LBF", pre_agents=2, write=False)

#base_x, base_mean, retrain_results = plot_avg_retrain(
#    4, path=['mini-test-waterworld_history.csv','ww_test_history.csv'], env="Waterworld", write=False)



base_x, base_mean, retrain_results = plot_avg_retrain(
   6, path=['multiwalker_history.csv','tune_multiwalker_history.csv','multiwalker-2_history.csv','walker-sup_history.csv'], 
   env="Multiwalker", write=False, x_lim=700, max_pre=100)

# Area Between Curves and Plots

## For Waterworld:

In [113]:
data = []

for num_agents in range(3,9):
    base_x, base_mean, retrain_results = plot_avg_retrain(
        num_agents, env="Waterworld", show=False, write=False,
        path=['mini-test-waterworld_history.csv','ww_test_history.csv']
        )
    z = base_mean.values.min()
    base_mean = base_mean.values - z

    for steps in retrain_results.steps_pretrained.unique():
        if steps < 200:
            res = retrain_results[retrain_results.steps_pretrained==steps
                                ]['per_agent_erm'].values - z
            a = int(steps/num_agents)
            b = a+len(res)
            dif = res - base_mean[a:b]
            auc = dif.cumsum().max()# * num_agents
            data.append({
                'Number of Agents': num_agents,
                'Steps Pretrained': steps,
                'Area Under Curve': auc,
            })

import plotly.express as px

df = pd.DataFrame(data)
piv = df.pivot(index="Number of Agents", columns="Steps Pretrained")

fig = px.imshow(piv,
                text_auto=True, title="Area Between Curves",
                labels=dict(x="Steps Pretrained", y="Number of Agents"),
                color_continuous_scale='RdBu', 
                 x=[f"{s}" for _,s in piv.columns],
                 y=piv.index
                )
fig.update_coloraxes(showscale=False) 
fig.update_xaxes(side="top")
fig.update_layout(width=700, height=500,)
fig.show()
#fig.write_image("Waterworld-AUCs.png")

In [35]:
print(dif_1)
print(dif_2)
print(dif_1.cumsum().max())
print(dif_2.cumsum().max())


[36.96029328 33.74154431 32.90326393 32.24043089 30.23930746 27.16492308
 26.64152169 27.34522309 24.93414294 23.94885926 23.52503508 21.0479972
 19.19921919 18.77848986 18.8460722  15.83752682 14.40091019 14.04749993
 13.73995745 12.71837874]
[-36.96029328 -33.74154431 -32.90326393 -32.24043089 -30.23930746
 -27.16492308 -26.64152169 -27.34522309 -24.93414294 -23.94885926
 -23.52503508 -21.0479972  -19.19921919 -18.77848986 -18.8460722
 -15.83752682 -14.40091019 -14.04749993 -13.73995745 -12.71837874]
468.2605965810983
-36.96029328091014


In [12]:
# Instead, ratio of area under curves
data = []
for num_agents in range(3,9):
    base_x, base_mean, retrain_results = plot_avg_retrain(
        num_agents, env="Waterworld", show=False, write=False,
        path=['mini-test-waterworld_history.csv','ww_test_history.csv']
        )
    z = base_mean.values.min()
    base_mean = base_mean.values - z

    for steps in retrain_results.steps_pretrained.unique():
        if steps < 200:
            res = retrain_results[retrain_results.steps_pretrained==steps
                                ]['per_agent_erm'].values - z
            a = int(steps/num_agents)
            b = a+len(res)

            i = (res - base_mean[a:b]).cumsum().argmax()
            n = res[:i].sum()
            m = base_mean[a:a+i].sum()
            auc = (n-m)/m 
            data.append({
                'Number of Agents': num_agents,
                'Steps Pretrained': steps,
                'Area Under Curve': auc,
            })


import plotly.express as px

df = pd.DataFrame(data)
piv = df.pivot(index="Number of Agents", columns="Steps Pretrained")

fig = px.imshow(piv.values,#/piv.values.max(), 
                text_auto=True, title="Percent Increase AUC",
                labels=dict(x="Steps Pretrained", y="Number of Agents"),
                color_continuous_scale='Blues', #'PuBu', 'RdBu'
                range_color=(0,1),
                x=[f"{s}" for _,s in piv.columns.values],
                y=piv.index.values
                )
fig.update_coloraxes(showscale=False) 
fig.update_xaxes(side="top")
fig.update_layout(width=700, height=500,)
fig.show()


In [14]:
import plotly.express as px

df = pd.DataFrame(data)
piv = df.pivot(index="Number of Agents", columns="Steps Pretrained")

fig = px.imshow(piv.values,#/piv.values.max(), 
                text_auto=True, title="Percent Increase AUC",
                labels=dict(x="Steps Pretrained", y="Number of Agents"),
                color_continuous_scale='Blues', range_color=(0,1),#'PuBu','RdBu'
                x=[f"{s}" for _,s in piv.columns.values],
                y=piv.index.values
                )
fig.update_coloraxes(showscale=False) 
fig.update_xaxes(side="top")
fig.update_layout(width=700, height=500,)
fig.show()

## For Multiwalker

In [116]:
data = []
for num_agents in range(4,8):
    base_x, base_mean, retrain_results = plot_avg_retrain(
        num_agents, env="Multiwalker", show=False, write=False, pre_agents=3,
        path=['multiwalker_history.csv','tune_multiwalker_history.csv','multiwalker-2_history.csv','walker-sup_history.csv']
        )
    z = base_mean.values.min()
    base_mean = base_mean.values - z

    for steps in retrain_results.steps_pretrained.unique():
        if steps < 200:
            res = retrain_results[retrain_results.steps_pretrained==steps
                                ]['per_agent_erm'].values - z
            # a = int(steps/num_agents)
            # b = a+len(res)
            a = int(steps/num_agents) # Number of base steps
            b = min(a+len(res), len(base_mean))

            # dif = res.values - base_mean[a:b].values
            #dif =  base_mean[a:b] - res[:b-a]
            dif =   res[:b-a] - base_mean[a:b]
            auc = dif.cumsum().max() * num_agents
            data.append({
                'Number of Agents': num_agents,
                'Steps Pretrained': steps,
                'Area Under Curve': auc,
            })


import plotly.express as px

df = pd.DataFrame(data)
piv = df.pivot(index="Number of Agents", columns="Steps Pretrained")

fig = px.imshow(piv.values, text_auto=True, title="Area Between Curves",
                labels=dict(x="Steps Pretrained", y="Number of Agents"),
                color_continuous_scale='RdBu', 
                x=[f"{s}" for _,s in piv.columns.values],
                y=piv.index.values
                )
fig.update_coloraxes(showscale=False)
fig.update_xaxes(side="top")
fig.update_layout(width=700, height=400,)
fig.show()
#fig.write_image("Multiwalker-AUCs.png")

In [21]:
# Instead, ratio of area under curves
data = []
for num_agents in range(4,8):
    base_x, base_mean, retrain_results = plot_avg_retrain(
        num_agents, env="Multiwalker", show=False, write=False, pre_agents=3,
        path=['multiwalker_history.csv','tune_multiwalker_history.csv','multiwalker-2_history.csv','walker-sup_history.csv']
        )
    z = base_mean.values.min()
    base_mean = base_mean.values - z

    for steps in retrain_results.steps_pretrained.unique():
        if steps < 200:
            res = retrain_results[retrain_results.steps_pretrained==steps
                                ]['per_agent_erm'].values - z
            a = int(steps/num_agents) # Number of base steps

            b = min(a+len(res), len(base_mean))
            #i = (res[:b-a] - base_mean[a:b]).cumsum().argmax()
            #i = (res[:b-a] - base_mean[a:b]).cumsum().argmax() + 1
            i = (res[:b-a] - base_mean[a:b]).cumsum().argmax() or 1

            n = res[:i].sum()
            m = base_mean[a:a+i].sum()
            auc = (n-m)/m
            #print(f"{m} : {auc}")
            data.append({
                'Number of Agents': num_agents,
                'Steps Pretrained': steps,
                'Area Under Curve': auc,
            })


import plotly.express as px

df = pd.DataFrame(data)
piv = df.pivot(index="Number of Agents", columns="Steps Pretrained")

fig = px.imshow(piv,#/piv.values.max(), 
                text_auto=True, title="Percent Increase AUC",
                labels=dict(x="Steps Pretrained", y="Number of Agents"),
                color_continuous_scale='RdBu', range_color=(-1,1), 
                # color_continuous_scale='Blues', range_color=(0,1),#'PuBu','RdBu'
                x=[f"{s}" for _,s in piv.columns.values],
                y=piv.index.values
                )
fig.update_coloraxes(showscale=False) 
fig.update_xaxes(side="top")
fig.update_layout(width=700, height=400,)
fig.show()

## For Level Based Foraging

In [123]:
data = []
for num_agents in range(3,8):
    base_x, base_mean, retrain_results = plot_avg_retrain(
        num_agents, env="LBF", show=False, pre_agents=2,
        path=['foraging-base_history.csv','forage-scale-2_history.csv'], 
        write=False,
        )
    z = base_mean.values.min()
    base_mean = base_mean.values - z

    for steps in retrain_results.steps_pretrained.unique():
        if steps < 500:
            res = retrain_results[retrain_results.steps_pretrained==steps
                                ]['per_agent_erm'].values - z
            a = int(steps/num_agents)
            b = min(a+len(res), len(base_mean))
            dif = res[:b-a] - base_mean[a:b]
            auc = dif.cumsum().max() * num_agents

            data.append({
                'Number of Agents': num_agents,
                'Steps Pretrained': steps,
                'Area Under Curve': auc,
            })

import plotly.express as px

df = pd.DataFrame(data)
piv = df.pivot(index="Number of Agents", columns="Steps Pretrained")

fig = px.imshow(piv.values, text_auto=True, title="Area Between Curves",
                labels=dict(x="Steps Pretrained", y="Number of Agents"),
                color_continuous_scale='RdBu', 
                x=[f"{s}" for _,s in piv.columns.values],
                y=piv.index.values
                )
fig.update_coloraxes(showscale=False)
fig.update_xaxes(side="top")
fig.show()
# fig.write_image("LBF-AUCs.png")

In [None]:
# Instead, ratio of area under curves
data = []
for num_agents in range(3,8):
    base_x, base_mean, retrain_results = plot_avg_retrain(
        num_agents, env="LBF", show=False, pre_agents=2,
        path=['foraging-base_history.csv','forage-scale-2_history.csv'], 
        write=False,
        )
    z = base_mean.values.min()
    base_mean = base_mean.values - z

    for steps in retrain_results.steps_pretrained.unique():
        if steps < 500:
            res = retrain_results[retrain_results.steps_pretrained==steps
                                ]['per_agent_erm'].values - z
            a = int(steps/num_agents) # Number of base steps
            b = min(a+len(res), len(base_mean))
            #print(f"{a=},{b=},{len(base_mean) < b}")

            # if len(base_mean) <= b:
            #     dif = res[:len(base_mean)-a+1].values - base_mean[a:].values
            # else:
            #     dif = res.values - base_mean[a:b-1].values
            
            # auc = dif.cumsum().max() * num_agents

            i = (res[:b-a] - base_mean[a:b]).cumsum().argmax()
            n = res[:i].sum()
            m = base_mean[a:a+i].sum()
            auc = (n-m)/m


            data.append({
                'Number of Agents': num_agents,
                'Steps Pretrained': steps,
                'Area Under Curve': auc,
            })

data[-1]["Area Under Curve"] *= .01

import plotly.express as px

df = pd.DataFrame(data)
piv = df.pivot(index="Number of Agents", columns="Steps Pretrained")

fig = px.imshow(piv.values,#/piv.values.max(), 
                text_auto=True, title="Percent Increase AUC",
                labels=dict(x="Steps Pretrained", y="Number of Agents"),
                # color_continuous_scale='RdBu', 
                color_continuous_scale='Blues', range_color=(0,1),#'PuBu','RdBu'
                 x=[f"{s}" for _,s in piv.columns.values],
                 y=piv.index.values
                )
fig.update_coloraxes(showscale=False) 
fig.update_xaxes(side="top")
fig.update_layout(width=700, height=500,)
fig.show()

In [171]:
data = []
for num_agents in range(3,8):
    base_x, base_mean, retrain_results = plot_avg_retrain(
        num_agents, env="LBF", show=False, pre_agents=2,
        path=['foraging-base_history.csv',
              'foraging-lim-base_history.csv',
              'foraging-blind-coop-base_history.csv',
              'forage-scale-2_history.csv',
              #'forage-scale-blind_history.csv',
              ], 
        write=False,
        )
    z = base_mean.values.min()
    base_mean = base_mean.values - z

    for steps in retrain_results.steps_pretrained.unique():
        if steps < 1500:
            res = retrain_results[retrain_results.steps_pretrained==steps
                                ]['per_agent_erm'].values - z
            a = int(steps/num_agents) # Number of base steps
            b = min(a+len(res), len(base_mean))
            i = (res[:b-a] - base_mean[a:b]).cumsum().argmax()
            n = res[:i].sum()
            m = base_mean[a:a+i].sum()
            auc = (n-m)/m

            data.append({
                'Number of Agents': num_agents,
                'Steps Pretrained': steps,
                'Area Under Curve': auc,
            })

data[-1]["Area Under Curve"] *= .01

import plotly.express as px

df = pd.DataFrame(data)
piv = df.pivot(index="Number of Agents", columns="Steps Pretrained")

fig = px.imshow(piv.values,#/piv.values.max(), 
                text_auto=True, title="Percent Increase AUC",
                labels=dict(x="Steps Pretrained", y="Number of Agents"),
                # color_continuous_scale='RdBu', 
                color_continuous_scale='Blues', range_color=(0,1),#'PuBu','RdBu'
                 x=[f"{s}" for _,s in piv.columns.values],
                 y=piv.index.values
                )
fig.update_coloraxes(showscale=False) 
fig.update_xaxes(side="top")
fig.update_layout(width=700, height=500,)
fig.show()

## Second Part Level Base Foraging

In which compare the effectiveness of the two scaling methodologies.

'foraging-base_history.csv',

'forage-scale-2_history.csv',

'foraging-lim-base_history.csv',

'foraging-blind-coop-base_history.csv',
'forage-scale-blind_history.csv',

In [155]:
base_x, base_mean, retrain_results = plot_avg_retrain(
    3, path=['foraging-blind-coop-base_history.csv','forage-scale-blind_history.csv'], 
    env="LBF", pre_agents=2, write=False, x_lim=500)

In [128]:
base_x, base_mean, retrain_results = plot_avg_retrain(
    7, path=['foraging-lim-base_history.csv',], 
    env="LBF", pre_agents=2, write=False, x_lim=1000)

In [129]:
base_x, base_mean, retrain_results = plot_avg_retrain(
    7, path=['foraging-base_history.csv',], 
    env="LBF", pre_agents=2, write=False, x_lim=1000)

In [158]:
# base_x, base_mean, retrain_results = plot_avg_retrain(
#     7, path=['foraging-base_history.csv', 'forage-scale-blind_history.csv'], 
#     #7, path=['forage-scale-blind_history.csv'], 
#     env="LBF", pre_agents=2, write=False, x_lim=1000)

base_x, base_mean, retrain_results = plot_avg_retrain(
    5, path=['foraging-base_history.csv','forage-scale-2_history.csv'], 
    env="LBF", pre_agents=2, write=False, x_lim=100)

In [None]:
type_and_paths = {
    "full"      :['foraging-base_history.csv',],
    "blind"     :['foraging-blind-coop-base_history.csv',],
    "partial"   :['foraging-lim-base_history.csv'],
}
data = []

for obs_type, paths in type_and_paths.items():
    for path in paths:
        # Only read relevant columns
        _df = pd.read_csv(path, usecols=['Unnamed: 0','Unnamed: 1','num_agents','env_runners/episode_reward_mean','steps_pretrained'])
        # Filter for tabula rasa
        _df = _df[_df['steps_pretrained']==0]
        # Backfill cold starts
        _df['env_runners/episode_reward_mean'] = (
            _df['env_runners/episode_reward_mean']
            .div(num_agents)
            .replace('NaN',None)
            .bfill())
        # Add Observation type
        _df['obs_type'] = obs_type

        data.append(_df)

df = pd.concat(data)
df.rename(columns={'Unnamed: 0':'run_id','Unnamed: 1':'step','env_runners/episode_reward_mean':'reward'}, inplace=True)


Unnamed: 0,run_id,step,num_agents,reward,steps_pretrained,obs_type
0,run_0,0,3,0.015693,0,full
1,run_0,1,3,0.008773,0,full
2,run_0,2,3,0.009859,0,full
3,run_0,3,3,0.014618,0,full
4,run_0,4,3,0.011554,0,full


In [150]:
# ---------------------------
# Compute Summary Metric for Each Training Curve
# ---------------------------
# For each training run, we compute the area under the reward curve (AUC) using the trapezoidal rule.
auc_list = []
full_steps = np.arange(1, 500)  # define a common range for all runs

for (agent_count, obs_type, run_id), group in df.groupby(['num_agents', 'obs_type', 'run_id']):
    # # Sort the group by step
    # group = group.sort_values('step')
    # # Create a DataFrame with the full step range
    # full_df = pd.DataFrame({'step': full_steps})
    # # Merge with the available data for this run (left join on step)
    # merged = pd.merge(full_df, group[['step', 'reward']], on='step', how='left')
    # # Forward-fill missing reward values to handle early stopping
    # merged['reward'] = merged['reward'].ffill()
    # # If the first iterations are missing (shouldn't happen if runs start at 1), back-fill them
    # merged['reward'] = merged['reward'].bfill()
    # # Compute AUC using the trapezoidal rule
    # auc = np.trapz(merged['reward'], merged['step'])
    # auc_list.append({
    #     'num_agents': agent_count,
    #     'obs_type': obs_type,
    #     'run_id': run_id,
    #     'auc': auc
    # })

    # group_filtered = group[group['step'] < 500]
    # # Check if there is data to integrate; if not, skip this run.
    # if group_filtered.empty:
    #     continue
    # auc = np.trapz(group['reward'], group_filtered['step'])
    # auc_list.append({
    #     'num_agents': agent_count,
    #     'obs_type': obs_type,
    #     'run_id': run_id,
    #     'auc': auc
    # })

    auc = np.trapz(group['reward'], group['step'])
    auc_list.append({
        'num_agents': agent_count,
        'obs_type': obs_type,
        'run_id': run_id,
        'auc': auc
    })



df_auc = pd.DataFrame(auc_list)
print("\nAUC summary DataFrame head:")
# print(df_auc.head())
# print(df_auc.to_string())
df_auc.groupby(by=["num_agents","obs_type"])['auc'].mean()


AUC summary DataFrame head:


num_agents  obs_type
2           blind       13.141452
            partial      0.151874
3           blind       19.407080
            full         0.248280
            partial      0.294520
4           blind       24.668649
            full         0.921226
            partial      0.642989
5           blind       28.991111
            full         1.286036
            partial      3.010707
6           blind       32.777747
            full         5.591218
            partial      2.724067
7           blind       35.552424
            full        10.791648
            partial      9.475109
Name: auc, dtype: float64

In [152]:
from scipy import stats
from itertools import combinations
# ---------------------------
# Pairwise Comparisons between Observation Types for Each Fixed Agent Count
# ---------------------------
# For each agent count, we compare the AUC distributions between different observation types.
results = []
for num_agents, group in df_auc.groupby('num_agents'):
    obs_types = group['obs_type'].unique()
    for obs1, obs2 in combinations(obs_types, 2):
        data1 = group[group['obs_type'] == obs1]['auc']
        data2 = group[group['obs_type'] == obs2]['auc']
        t_stat, p_value = stats.ttest_ind(data1, data2)
        results.append({
            'num_agents': num_agents,
            'obs_type1': obs1,
            'obs_type2': obs2,
            't_stat': t_stat,
            'p_value': p_value
        })

results_df = pd.DataFrame(results)
# Apply Bonferroni correction: multiply each p-value by the total number of tests
m = len(results_df)
results_df['p_value_bonf'] = results_df['p_value'] * 3#m
results_df['p_value_bonf'] = results_df['p_value_bonf']#.apply(lambda x: min(x, 1.0))

# print("\nPairwise comparisons between observation types (using AUC) at each agent count:")
# print(results_df)

styled_table = results_df.style.format({
    "t_stat": "{:.2f}",
    "p_value": "{:.3f}",
    "p_value_bonf": "{:.3f}"
}).set_caption("Pairwise Comparisons Between Observation Types at Each Agent Count")

styled_table

Unnamed: 0,num_agents,obs_type1,obs_type2,t_stat,p_value,p_value_bonf
0,2,blind,partial,248.51,0.0,0.0
1,3,blind,full,306.28,0.0,0.0
2,3,blind,partial,252.39,0.0,0.0
3,3,full,partial,-0.97,0.336,1.008
4,4,blind,full,53.09,0.0,0.0
5,4,blind,partial,88.01,0.0,0.0
6,4,full,partial,0.56,0.576,1.728
7,5,blind,full,73.35,0.0,0.0
8,5,blind,partial,23.91,0.0,0.0
9,5,full,partial,-1.49,0.141,0.423


In [None]:
# # Pairwise Comparisons between Agent Counts for Each Fixed Observation Type
# # ---------------------------
# # ---------------------------
# results_agent = []
# for obs_type, group in df_auc.groupby('obs_type'):
#     num_agents = sorted(group['num_agents'].unique())
#     for count1, count2 in combinations(num_agents, 2):
#          data1 = group[group['num_agents'] == count1]['auc']
#          data2 = group[group['num_agents'] == count2]['auc']
#          t_stat, p_value = stats.ttest_ind(data1, data2)
#          results_agent.append({
#              'obs_type': obs_type,
#              'agent_count1': count1,
#              'agent_count2': count2,
#              't_stat': t_stat,
#              'p_value': p_value
#          })

# results_agent_df = pd.DataFrame(results_agent)
# m2 = len(results_agent_df)
# results_agent_df['p_value_bonf'] = results_agent_df['p_value'] * m2
# results_agent_df['p_value_bonf'] = results_agent_df['p_value_bonf'].apply(lambda x: min(x, 1.0))

# print("\nPairwise comparisons between agent counts (using AUC) at each observation type:")
# print(results_agent_df)