# Results

In [1]:
! pip install wandb matplotlib pandas numpy



In [2]:
import wandb
import pandas as pd
import numpy as np
from datetime import datetime
import yaml

In [3]:
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


[34m[1mwandb[0m: Currently logged in as: [33mbiermann-carla[0m ([33mbiermann-carla-university-of-cambridge[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
def get_runs(entity, project):
    api = wandb.Api(timeout=30)
    wandb_filters = {
        "config.algorithms": ["insertion_sort"],
        "state": "finished",
    }
    runs = api.runs(f"{entity}/{project}", filters=wandb_filters) 
    print(f"Found {len(runs)} runs.")
    history = []
    filtered_runs = []
    for run in runs:
        created_at = datetime.strptime(run.created_at, "%Y-%m-%dT%H:%M:%SZ")
        runtime = run.summary.get("_runtime", 0)
        if (
            created_at >= datetime(2025, 3, 24) # started on March 24
            and 10 * 60 <= runtime <= 11 * 3600 # between 10 minutes and 11 hours
        ):
            history_data = run.history()  # Fetch full history
            history_data = history_data[history_data["_step"] <= 80000]  # Keep only <= 80k steps

            history.append({"config": dict(run.config), **history_data.iloc[-1].to_dict()})  # Append only last valid step
            filtered_runs.append(run)

    print(f"{len(filtered_runs)} filtered runs")
    df = pd.json_normalize(history)
    return filtered_runs, df

In [5]:
# Takes about 7 minutes
runs, df = get_runs("biermann-carla-university-of-cambridge", "L65-project")

Found 49 runs.
18 filtered runs


In [6]:
df

Unnamed: 0,insertion_sort_test_aggregated_msg_avg_norm,insertion_sort_val_j_entropy,insertion_sort_aggregated_msg_mean,insertion_sort_test_post_subtraction_premlpmsg_mean,insertion_sort_postmlpmsg_avg_norm,insertion_sort_val_aggregated_msg_combined_avg_norm,insertion_sort_test_j_entropy,insertion_sort_test_aggregated_msg_combined_avg_norm,model_weights_norm,insertion_sort_test_no_subtraction_premlpmsg_avg_norm,...,config.mpnn_processor_aggregator,insertion_sort_val_msg_kappa_subtrahend_mean,insertion_sort_val_msg_kappa_subtrahend_avg_norm,insertion_sort_msg_kappa_subtrahend_avg_std,insertion_sort_test_msg_kappa_subtrahend_mean,insertion_sort_test_msg_kappa_subtrahend_avg_norm,insertion_sort_msg_kappa_subtrahend_mean,insertion_sort_val_msg_kappa_subtrahend_avg_std,insertion_sort_msg_kappa_subtrahend_avg_norm,insertion_sort_test_msg_kappa_subtrahend_avg_std
0,,,13.793355,,652.441772,,,,,,...,max,,,,,,,,,
1,,,-3.173209,,766.517761,,,,,,...,max,,,,,,,,,
2,,,0.106677,,1790.064697,,,,,,...,max,,,21.88328,,,10.213956,,276.785736,
3,,,7.041901,,3133.040039,,,,,,...,max,,,,,,,,,
4,,,-0.506094,,657.984436,,,,,,...,max,,,22.446213,,,3.21835,,261.925903,
5,,,5.251039,,430.78775,,,,,,...,max,,,,,,,,,
6,,,8.265682,,2113.494873,,,,,,...,max,,,34.384384,,,0.873057,,395.2659,
7,,,-0.577068,,2686.196045,,,,,,...,max,,,,,,,,,
8,,,11.852423,,393.684204,,,,,,...,max,,,,,,,,,
9,,,2.633747,,1937.247437,,,,,,...,max,,,,,,,,,


In [7]:
full_df = df

In [8]:
full_df["config.algorithms"] = full_df["config.algorithms"].apply(lambda x: x[0])

In [9]:
full_df.pivot_table(
    index=[
        "config.processor_type",
        "config.hint_teacher_forcing",
        "config.mpnn_processor_aggregator",
    ],
    values=[
        "test_score_current_model_insertion_sort",
        "insertion_sort_test_score",
    ],
    aggfunc=np.mean,
)

  full_df.pivot_table(


config.processor_type,config.hint_teacher_forcing,config.mpnn_processor_aggregator


In [10]:
algorithm = full_df["config.algorithms"]
print(algorithm)
print(
    full_df[full_df["config.algorithms"] == algorithm][
        ["config.processor_type", f"test_score_current_model_{algorithm}"]
    ].to_markdown()
)
print()

0     insertion_sort
1     insertion_sort
2     insertion_sort
3     insertion_sort
4     insertion_sort
5     insertion_sort
6     insertion_sort
7     insertion_sort
8     insertion_sort
9     insertion_sort
10    insertion_sort
11    insertion_sort
12    insertion_sort
13    insertion_sort
14    insertion_sort
15    insertion_sort
16    insertion_sort
17    insertion_sort
Name: config.algorithms, dtype: object


KeyError: "['test_score_current_model_0     insertion_sort\\n1     insertion_sort\\n2     insertion_sort\\n3     insertion_sort\\n4     insertion_sort\\n5     insertion_sort\\n6     insertion_sort\\n7     insertion_sort\\n8     insertion_sort\\n9     insertion_sort\\n10    insertion_sort\\n11    insertion_sort\\n12    insertion_sort\\n13    insertion_sort\\n14    insertion_sort\\n15    insertion_sort\\n16    insertion_sort\\n17    insertion_sort\\nName: config.algorithms, dtype: object'] not in index"

# Results Analysis

In [11]:
# Extract configurations from matching runs
filtered_configs = {run.name: run.config for run in runs}

# Save to a config file
with open("filtered_configs_insertion_sort.yaml", "w") as f:
    yaml.dump(filtered_configs, f)

print(f"Saved {len(filtered_configs)} filtered configs.")

Saved 15 filtered configs.


In [14]:
# Extract scores for all runs
all_scores = {}

score_field = "charts/eval_avg_episodic_reward_new"
for run in runs:
    cfg = run.config
    summary = run.summary

    print(run.name, "\n", cfg, "\n", summary)
    break

2025-03-24 14:08:21-['insertion_sort']-differential_mpnn_maxmax-ablate_subtract 
 {'seed': 42, 'use_ln': True, 'nb_heads': 1, 'use_lstm': False, 'hint_mode': 'encoded_decoded', 'algorithms': ['insertion_sort'], 'batch_size': 32, 'eval_every': 500, 'random_pos': True, 'test_every': 500, 'hidden_size': 128, 'train_steps': 100000, 'chunk_length': 16, 'dataset_path': '/tmp/CLRS30', 'dropout_prob': 0, 'encoder_init': 'xavier_on_scalars', 'learning_rate': 0.001, 'length_needle': -8, 'train_lengths': ['4', '7', '11', '13', '16'], 'nb_triplet_fts': 8, 'processor_type': 'differential_mpnn_maxmax-ablate_subtract', 'checkpoint_path': '/tmp/CLRS30', 'chunked_training': False, 'freeze_processor': False, 'hint_repred_mode': 'soft', 'grad_clip_max_norm': 1, 'enforce_permutations': True, 'hint_teacher_forcing': 0, 'nb_msg_passing_steps': 1, 'enforce_pred_as_input': True, 'mpnn_processor_aggregator': 'max'} 
 {'_runtime': 23814.54557141, '_step': 100000, '_timestamp': 1742849117.4234157, '_wandb': {'ru