# Analyzing Runs

In [1]:
import os
import json
import itertools
import numpy as np
import pandas as pd

MODEL_DIR = "models/good_sweep_1"

In [2]:
mu_dirs = [dir.path for dir in os.scandir(MODEL_DIR) 
    if dir.is_dir() and 
    dir.path.split('/')[-1].split('-')[1][14:] == "mu"]

print(len(mu_dirs))

mu_data = {}
for mu_dir in mu_dirs:
    try:
        with open(os.path.join(mu_dir, "plot_data.json"), "r") as f:
            name = mu_dir.split('/')[-1]
            mu_data[name] = json.load(f)
    except:
        print(f"Could not find plot_data.json in {mu_dir}")

print(len(mu_data))
print(mu_data.keys())

768
Could not find plot_data.json in models/good_sweep_1/transformer-plotting_type=mu-family=bert-n_positions=64-pretraining_sigma_tasks=1-pretraining_mu_tasks=2-default_sigma=0.2-family=bert-hid_dim=4-mlp=0-layer=2-head=8-tokenizer=learnable
Could not find plot_data.json in models/good_sweep_1/transformer-plotting_type=mu-family=bert-n_positions=64-pretraining_sigma_tasks=1-pretraining_mu_tasks=4-default_sigma=0.2-family=bert-hid_dim=4-mlp=0-layer=2-head=8-tokenizer=learnable
Could not find plot_data.json in models/good_sweep_1/transformer-plotting_type=mu-family=bert-n_positions=64-pretraining_sigma_tasks=1-pretraining_mu_tasks=8-default_sigma=1-family=bert-hid_dim=4-mlp=0-layer=8-head=8-tokenizer=learnable
Could not find plot_data.json in models/good_sweep_1/transformer-plotting_type=mu-family=bert-n_positions=64-pretraining_sigma_tasks=1-pretraining_mu_tasks=4-default_sigma=1-family=bert-hid_dim=4-mlp=0-layer=8-head=8-tokenizer=learnable
Could not find plot_data.json in models/good

In [4]:
data_dict = {"pretraining_mu_tasks": [], "hid_dim": [], "layer": [], "head": [], "default_sigma": [], "converged": [], "converged_epoch": [], "data": []}

for model in mu_data:
    model_params = {model_arg.split('=')[0]: model_arg.split('=')[1] for model_arg in model.split('-')[1:]}
    keys_to_show = ("pretraining_mu_tasks", "hid_dim", "layer", "head", "default_sigma")
    model_params = {key: model_params[key] for key in model_params if key in keys_to_show}

    data_dict["pretraining_mu_tasks"].append(model_params["pretraining_mu_tasks"])
    data_dict["hid_dim"].append(model_params["hid_dim"])
    data_dict["layer"].append(model_params["layer"])
    data_dict["head"].append(model_params["head"])
    data_dict["default_sigma"].append(model_params["default_sigma"])
    data_dict["converged"].append(False if len(mu_data[model].keys()) == 100000//50 + 1 else True)
    data_dict["converged_epoch"].append(max([int(key) for key in mu_data[model].keys()]))
    data_dict["data"].append(mu_data[model])

df = pd.DataFrame(data_dict)
df

Unnamed: 0,pretraining_mu_tasks,hid_dim,layer,head,default_sigma,converged,converged_epoch,data
0,4,16,1,1,0.2,True,67710,"{'0': {'d_m_bd': 52.55672014053681, 'd_m_bc': ..."
1,2,16,2,1,0.2,True,5690,"{'0': {'d_m_bd': 12.208204541829668, 'd_m_bc':..."
2,32,4,8,1,1,False,100000,"{'0': {'d_m_bd': 1.2458030804020015, 'd_m_bc':..."
3,1,16,1,1,1,True,2100,"{'0': {'d_m_bd': 0.8799767237827255, 'd_m_bc':..."
4,2,16,8,8,1,True,2400,"{'0': {'d_m_bd': 1.2398886814151278, 'd_m_bc':..."
...,...,...,...,...,...,...,...,...
715,1,256,1,1,1,True,2000,"{'0': {'d_m_bd': 1.3728572046925982, 'd_m_bc':..."
716,2,64,1,8,0.2,True,4380,"{'0': {'d_m_bd': 26.312910009824712, 'd_m_bc':..."
717,2,16,4,2,0.2,True,4980,"{'0': {'d_m_bd': 26.598974338000975, 'd_m_bc':..."
718,32,256,8,2,1,True,83400,"{'0': {'d_m_bd': 8.974372054153871, 'd_m_bc': ..."


In [34]:
df_no_data = df.drop(columns=["data"])
# sort by pretraining_mu_tasks, then hid_dim, then layer, then head, then default_sigma
df_no_data = df_no_data.sort_values(by=["pretraining_mu_tasks", "hid_dim", "layer", "head", "default_sigma"])

df_no_data.to_csv("mu_data.csv", index=False)

## Models that didn't converge

In [24]:
# Save all the entries of mu_data that do not have 100000 epochs
models_that_failed_to_converge = []
for mu in mu_data.keys():
    if len(mu_data[mu].keys()) == 100000//50 + 1:
        models_that_failed_to_converge.append(mu)

print(f"Out of the {len(mu_data.keys())} trained models, {len(models_that_failed_to_converge)} failed to converge.")
for model in models_that_failed_to_converge:
    # model_name = model.split('-')[0]
    model_params = {model_arg.split('=')[0]: model_arg.split('=')[1] for model_arg in model.split('-')[1:]}
    keys_to_show = ("pretraining_mu_tasks", "hid_dim", "layer", "head", "default_sigma")
    model_params = {key: model_params[key] for key in model_params if key in keys_to_show}
    
    if model_params["default_sigma"] == "1":
        continue
    print(model_params)

Out of the 651 trained models, 220 failed to converge.
{'pretraining_mu_tasks': '16', 'default_sigma': '0.2', 'hid_dim': '4', 'layer': '4', 'head': '4'}
{'pretraining_mu_tasks': '16', 'default_sigma': '0.2', 'hid_dim': '4', 'layer': '8', 'head': '1'}
{'pretraining_mu_tasks': '16', 'default_sigma': '0.2', 'hid_dim': '256', 'layer': '8', 'head': '8'}
{'pretraining_mu_tasks': '8', 'default_sigma': '0.2', 'hid_dim': '256', 'layer': '2', 'head': '4'}
{'pretraining_mu_tasks': '4', 'default_sigma': '0.2', 'hid_dim': '4', 'layer': '4', 'head': '1'}
{'pretraining_mu_tasks': '16', 'default_sigma': '0.2', 'hid_dim': '4', 'layer': '8', 'head': '4'}
{'pretraining_mu_tasks': '4', 'default_sigma': '0.2', 'hid_dim': '256', 'layer': '4', 'head': '2'}
{'pretraining_mu_tasks': '8', 'default_sigma': '0.2', 'hid_dim': '256', 'layer': '4', 'head': '1'}
{'pretraining_mu_tasks': '32', 'default_sigma': '0.2', 'hid_dim': '16', 'layer': '1', 'head': '4'}
{'pretraining_mu_tasks': '8', 'default_sigma': '0.2', 'hid