# (Currently chess only) Dataframe comparing SAE statistics

In [1]:
# Imports
import sys
sys.path.append("../")

from tqdm import tqdm
import pickle
import torch
import einops
from datasets import load_dataset
from typing import Callable, Optional
import math
import os
import itertools
import json
import gc
from joblib import Parallel, delayed

import pandas as pd

from dataclasses import dataclass
import torch
from nnsight import NNsight
import json
from typing import Any
from datasets import load_dataset
from einops import rearrange
from jaxtyping import Int, Float, jaxtyped
from torch import Tensor
import os
from tqdm import tqdm
from transformers import GPT2LMHeadModel
from transformer_lens import HookedTransformer

from circuits.dictionary_learning import AutoEncoder
from circuits.chess_utils import encode_string
from circuits.dictionary_learning import ActivationBuffer
from circuits.dictionary_learning.dictionary import AutoEncoder, GatedAutoEncoder
from circuits.dictionary_learning.trainers.gated_anneal import GatedAnnealTrainer
from circuits.dictionary_learning.trainers.gdm import GatedSAETrainer
from circuits.dictionary_learning.trainers.p_anneal import PAnnealTrainer
from circuits.dictionary_learning.trainers.standard import StandardTrainer
from circuits.dictionary_learning.evaluation import evaluate
from circuits.nanogpt_to_hf_transformers import NanogptTokenizer, convert_nanogpt_model
from circuits.eval_sae_as_classifier import (
    initialize_results_dict, 
    get_data_batch, 
    apply_indexing_function,
    construct_eval_dataset,
    construct_othello_dataset,
    prep_firing_rate_data,
)
from circuits.utils import (
    get_model, 
    get_submodule,
    get_ae_bundle,
    collect_activations_batch,
    get_nested_folders,
    get_firing_features,
    to_device,
    AutoEncoderBundle,
)
import circuits.chess_utils as chess_utils
import circuits.othello_utils as othello_utils
import circuits.othello_engine_utils as othello_engine_utils

from circuits.dictionary_learning.evaluation import evaluate

from IPython import embed

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Globals

# Dimension key (from https://medium.com/@NoamShazeer/shape-suffixes-good-coding-style-f836e72e24fd):
# F  = features and minibatch size depending on the context (maybe this is stupid)
# B = batch_size
# L = seq length (context length)
# T = thresholds
# R = rows (or cols)
# C = classes for one hot encoding

home_dir = '/project/pi_mccallum_umass_edu/rangell_umass_edu/'
repo_dir = f'{home_dir}/chess-gpt-circuits'

#DEVICE = 'cuda:0'
torch.set_grad_enabled(False)
batch_size = 32
feature_batch_size = batch_size
n_inputs = 2048 # Length of the eval dataset
GAME = "chess" # "chess" or "othello"

models_path = repo_dir + "/models/"

In [3]:
# Load dataset and init game specific variables

if GAME == "chess":
    othello = False

    autoencoder_group_paths = ["/autoencoders/group-2024-05-11/"]
    custom_functions = [chess_utils.board_to_piece_state] #, chess_utils.board_to_pin_state]
    model_name = "adamkarvonen/8LayerChessGPT2"
    # data = construct_eval_dataset(custom_functions, n_inputs, models_path=models_path, device=DEVICE)
    indexing_functions = [chess_utils.find_dots_indices]

elif GAME == "othello":
    othello = True

    autoencoder_group_paths = ["/autoencoders/othello_layer0/"]
    # autoencoder_group_paths = ["autoencoders/othello_layer0/", "autoencoders/othello_layer5_ef4/"]
    custom_functions = [
            # othello_utils.games_batch_no_last_move_to_state_stack_BLRRC,
            othello_utils.games_batch_to_state_stack_BLRRC,
            othello_utils.games_batch_to_state_stack_mine_yours_BLRRC,
        ]
    model_name = "Baidicoot/Othello-GPT-Transformer-Lens"
    # data = construct_othello_dataset(custom_functions, n_inputs, models_path=models_path, device=DEVICE)
    indexing_functions = [None]  # I'm experimenting with these for Othello
else:
    raise ValueError("Invalid game")

## General dataset statistic

This is only dataset dependent, but not SAE dependent and can be calculated once after loading the dataset

In [4]:
def get_true_board_state_counts(pgn_strings, device):
    # Find the true counts of board states over all movers and games in the dataset
    # This could be calculated within the board_to_piece_state evaluation!
    true_board_states_counts = chess_utils.create_state_stacks(pgn_strings, chess_utils.board_to_piece_state)
    true_board_states_counts = chess_utils.state_stack_to_one_hot(
        chess_utils.config_lookup[chess_utils.board_to_piece_state.__name__], 
        device, 
        true_board_states_counts)
    true_board_states_counts = true_board_states_counts.sum(dim=(0,1))
    true_board_states_counts.shape # [RRC]
    return true_board_states_counts

## SAE specific statistic

In [5]:
# Standard evals
def do_standard_evals(results, ae_bundle, device):
    eval_results = evaluate(
        ae_bundle.ae,
        ae_bundle.buffer,
        max_len=ae_bundle.context_length,
        batch_size=min(512, batch_size), # min(n_eval_samples, activation_buffer_out_batch_size) matters
        io="out",
        device=device,
        #n_batches=1000
    )
    for k, v in eval_results.items():
        results[k] = v
    return results

In [6]:
# Evaluation of custom functions
def eval_custom_fn(
    data,
    results,
    n_act_threshs,
    alive_features_F,
    max_activations_F,
    ae_bundle,
    pgn_strings,
    custom_functions,
    encoded_inputs,
    firing_rate_n_inputs,
    indexing_function,
    device
):
    num_features = len(alive_features_F)
    print(
        f"Out of {ae_bundle.dictionary_size} features, on {firing_rate_n_inputs} activations, {num_features} are alive."
    )

    assert len(pgn_strings) >= n_inputs
    assert n_inputs % batch_size == 0

    n_iters = n_inputs // batch_size
    # We round up to ensure we don't ignore the remainder of features
    num_feature_iters = math.ceil(num_features / feature_batch_size)

    thresholds_T = torch.linspace(0, 1, n_act_threshs).to(device)
    thresholds_TF11 = einops.repeat(thresholds_T, "T -> T F 1 1", F=num_features)
    max_activations_1F11 = einops.repeat(max_activations_F, "F -> 1 F 1 1")
    thresholds_TF11 = thresholds_TF11 * max_activations_1F11

    for i in tqdm(range(n_iters), desc="Aggregating statistics"):
        start = i * batch_size
        end = (i + 1) * batch_size
        pgn_strings_BL = pgn_strings[start:end]
        encoded_inputs_BL = encoded_inputs[start:end]
        encoded_inputs_BL = torch.tensor(encoded_inputs_BL).to(device)

        batch_data = get_data_batch(data, pgn_strings_BL, start, end, custom_functions, device)

        all_activations_FBL, encoded_token_inputs = collect_activations_batch(
            ae_bundle, encoded_inputs_BL, alive_features_F
        )

        if indexing_function is not None:
            all_activations_FBL, batch_data = apply_indexing_function(
                pgn_strings[start:end], all_activations_FBL, batch_data, device, indexing_function
            )
        # For thousands of features, this would be many GB of memory. So, we minibatch.
        for feature in range(num_feature_iters):
            f_start = feature * feature_batch_size
            f_end = min((feature + 1) * feature_batch_size, num_features)
            f_batch_size = f_end - f_start

            activations_FBL = all_activations_FBL[
                f_start:f_end
            ]  
            
            thresholds_TF11_slice = thresholds_TF11[:, f_start:f_end, :, :]
            # NOTE: Now F == feature_batch_size
            # Maybe that's stupid and inconsistent and I should use a new letter for annotations
            # I'll roll with it for now


            ### Aggregate batch statistics
            active_indices_TFBL = activations_FBL > thresholds_TF11_slice
            active_counts_TF = einops.reduce(active_indices_TFBL, "T F B L -> T F", "sum")
            off_counts_TF = einops.reduce(~active_indices_TFBL, "T F B L -> T F", "sum")

            results["on_count"][:, f_start:f_end] += active_counts_TF
            results["off_count"][:, f_start:f_end] += off_counts_TF

            for custom_function in custom_functions:
                on_tracker_TFRRC = results[custom_function.__name__]["on"]
                off_tracker_FTRRC = results[custom_function.__name__]["off"]

                boards_BLRRC = batch_data[custom_function.__name__]
                boards_TFBLRRC = einops.repeat(
                    boards_BLRRC,
                    "B L R1 R2 C -> T F B L R1 R2 C",
                    F=f_batch_size,
                    T=thresholds_TF11_slice.shape[0],
                )

                # TODO The next 2 operations consume almost all of the compute. I don't think it will work,
                # but maybe we can only do 1 of these operations?
                active_boards_sum_TFRRC = einops.reduce(
                    boards_TFBLRRC * active_indices_TFBL[:, :, :, :, None, None, None],
                    "T F B L R1 R2 C -> T F R1 R2 C",
                    "sum",
                )
                off_boards_sum_TFRRC = einops.reduce(
                    boards_TFBLRRC * ~active_indices_TFBL[:, :, :, :, None, None, None],
                    "T F B L R1 R2 C -> T F R1 R2 C",
                    "sum",
                )

                on_tracker_TFRRC[:, f_start:f_end, :, :, :] += active_boards_sum_TFRRC
                off_tracker_FTRRC[:, f_start:f_end, :, :, :] += off_boards_sum_TFRRC

                results[custom_function.__name__]["on"] = on_tracker_TFRRC
                results[custom_function.__name__]["off"] = off_tracker_FTRRC

    return results

In [7]:
# Precision, recall, and F1

def get_classification_metrics(results, true_board_states_counts):
    precision_thresh = 0.9
    recall_thresh = 0.01
    f1_thresh = 0.01
    threshs = [precision_thresh, recall_thresh, f1_thresh]
    eps = 1e-8
    R = 8
    C = 13

    true_pos_TFRRC = results['board_to_piece_state']['on'] 
    pos_all_TF = results['on_count']
    true_all_RRC = true_board_states_counts

    precision = true_pos_TFRRC / (pos_all_TF[:, :, None, None, None] +eps) # Note that a feature which always fires (piece present/absent) will have a precision of 1
    recall = true_pos_TFRRC / (true_all_RRC[None, None, :, :, :] +eps)
    f1 = 2 * (precision * recall) / (precision + recall + eps)
    metrics_TFRRC = [precision, recall, f1]

    # Apply threshold
    counts_TFRRC = [metric > thresh for metric, thresh in zip(metrics_TFRRC, threshs)]

    # Drop empty square state counts
    for i in range(len(counts_TFRRC)):
        counts_TFRRC[i][..., 6] = False
    num_board_states = R * R * (C-1)


    ### Fraction of features with high metric on at least one board state
    # High metric for at least one board state
    counts_any_board_TF = [metric.any(dim=(-1,-2,-3)) for metric in counts_TFRRC]

    # Report fraction of all features for count_as_firing_threshold = 0
    frac_any_board_nonzero_1 = [metric[0].float().mean() for metric in counts_any_board_TF]

    # Report fraction of all features for any threshold (choose threshold per feature that maximizes ratio)
    frac_any_board_best_1 = [metric.any(dim=0).float().mean() for metric in counts_any_board_TF]


    ### Fraction of board states that have at least one feature with high metric
    # Check for each board state whether at least one feature has a high metric (using count_as_firing_threshold = 0)
    counts_any_feature_nonzero_RCC = [metric[0].any(dim=0) for metric in counts_TFRRC]

    # Check for each board state whether at least one feature has a high metric (for any count_as_firing threshold)
    counts_any_feature_best_RCC = [metric.any(dim=(0,1)) for metric in counts_TFRRC]

    # Fraction of individual board states at least one feature has a high metric
    frac_any_feature_nonzero_RCC = [metric.sum() / num_board_states for metric in counts_any_feature_nonzero_RCC]
    frac_any_feature_best_RCC = [metric.sum() / num_board_states for metric in counts_any_feature_best_RCC]

    print(frac_any_board_nonzero_1)
    print(frac_any_board_best_1)
    print(frac_any_feature_nonzero_RCC)
    print(frac_any_feature_best_RCC)

    names = ['precision', 'recall', 'f1']
    for i, (name, t) in enumerate(zip(names, threshs)):
        results[f'frac_any_board_per_feature_act-nonzero_{name}-{t}'] = frac_any_board_nonzero_1[i].item()
        results[f'frac_any_board_per_feature_act-best_{name}-{t}'] = frac_any_board_best_1[i].item()
        results[f'frac_any_feature_per_board_act-nonzero_{name}-{t}'] = frac_any_feature_nonzero_RCC[i].item()
        results[f'frac_any_feature_per_board_act-best_{name}-{t}'] = frac_any_feature_best_RCC[i].item()

    return results

## Loop over SAEs

In [None]:
# Choose aes and indexing functions

# This could be computed once before the loop if adapting loading pgn_strings
# true_board_state_counts = get_true_board_state_counts(pgn_strings)

sweep_results = {}
sweep_result_keys = ['l0', 'frac_variance_explained', 'cossim', 'l2_ratio', 'frac_any_board_per_feature_act-nonzero_precision-0.9', 'frac_any_board_per_feature_act-best_precision-0.9', 'frac_any_feature_per_board_act-nonzero_precision-0.9', 'frac_any_feature_per_board_act-best_precision-0.9', 'frac_any_board_per_feature_act-nonzero_recall-0.01', 'frac_any_board_per_feature_act-best_recall-0.01', 'frac_any_feature_per_board_act-nonzero_recall-0.01', 'frac_any_feature_per_board_act-best_recall-0.01', 'frac_any_board_per_feature_act-nonzero_f1-0.01', 'frac_any_board_per_feature_act-best_f1-0.01', 'frac_any_feature_per_board_act-nonzero_f1-0.01', 'frac_any_feature_per_board_act-best_f1-0.01']

all_autoencoder_paths = []
for group_path in autoencoder_group_paths:
    all_autoencoder_paths += get_nested_folders(repo_dir + group_path) 

param_combinations = list(itertools.product(all_autoencoder_paths, indexing_functions))

#for ae_dir, idx_fn in param_combinations:
#    print(f'ae_dir: {ae_dir}')
#    print(f'idx_fn: {idx_fn}\n')

# autoencoder_path, indexing_function = param_combinations[1]

#for autoencoder_path, indexing_function in tqdm(param_combinations, desc="Autoencoder loop", total=len(param_combinations)):
def compute_results(device_id, autoencoder_path, indexing_function):
    
    device = f"cuda:{device_id}"
    
    torch.cuda.empty_cache()
    gc.collect()
    
    indexing_function_name = "None"
    if indexing_function is not None:
        indexing_function_name = indexing_function.__name__

    print(f"Autoencoder: {autoencoder_path}")
    print(f"Indexing function: {indexing_function_name}")

    # TODO Function below manipulates the loaded data. If we change that, we can load data once and for all at the top of the file
    data = construct_eval_dataset(custom_functions, n_inputs, models_path=models_path, device=device)
    data, ae_bundle, pgn_strings, encoded_inputs = prep_firing_rate_data(
        autoencoder_path, batch_size, models_path, model_name, data, device, n_inputs, othello
    )

    firing_rate_n_inputs = min(int(n_inputs * 0.5), 1000) * ae_bundle.context_length
    # TODO: Custom thresholds per feature based on max activations
    alive_features_F, max_activations_F = get_firing_features(
        ae_bundle, firing_rate_n_inputs, batch_size, device
    )
    true_board_states_counts = get_true_board_state_counts(pgn_strings, device)
    assert true_board_states_counts is not None

    # initialize result dictionary
    n_act_threshs = 10
    results = initialize_results_dict(custom_functions, n_act_threshs, alive_features_F, device)

    # Standard evaluation metrics
    print('do_standard_evals')
    results = do_standard_evals(results, ae_bundle, device)
    
    # delete the buffer
    del ae_bundle.buffer
    
    # Do custom eval metrics
    print('do custom eval metrics')
    results = eval_custom_fn(
        data,
        results,
        n_act_threshs,
        alive_features_F,
        max_activations_F,
        ae_bundle,
        pgn_strings,
        custom_functions,
        encoded_inputs,
        firing_rate_n_inputs,
        indexing_function,
        device
    )

    torch.cuda.empty_cache()
    gc.collect()
    
    results = get_classification_metrics(results, true_board_states_counts)
    ae_name = "-".join(autoencoder_path.split('/')[-3:-1])
    #sweep_results[ae_name] = {}
    #for sweep_key in sweep_result_keys:
    #    sweep_results[ae_name][sweep_key] = results[sweep_key]
    return ae_name, results

results_list = Parallel(n_jobs=16)(delayed(compute_results)(i%4, autoencoder_path, indexing_function) for i, (autoencoder_path, indexing_function) in enumerate(tqdm(param_combinations, desc="Autoencoder loop", total=len(param_combinations))))
#results_list = [compute_results(i%4, autoencoder_path, indexing_function) for i, (autoencoder_path, indexing_function) in enumerate(tqdm(param_combinations, desc="Autoencoder loop", total=len(param_combinations)))]

#autoencoder_path, indexing_function = param_combinations[1]
#compute_results(0, autoencoder_path, indexing_function)




Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer4/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer0/
Indexing function: find_dots_indices




Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer8/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer12/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer6/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer14/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer1/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer5/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer11/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer7/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer15/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer9/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer3/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer13/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer2/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer10/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:33<00:00, 237.69it/s]
Collecting features: 100%|██████████| 8000/8000 [00:33<00:00, 238.87it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 244.57it/s]
Collecting features: 100%|██████████| 8000/8000 [00:31<00:00, 252.29it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 245.93it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 246.08it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 245.59it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 246.19it/s]
Collecting features: 100%|██████████| 8000/8000 [00:31<00:00, 250.25it/s]
Collecting features: 100%|██████████| 8000/8000 [00:31<00:00, 250.11it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 247.16it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 245.13it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 246.74it/s]
Collecting features: 100%|██████████| 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4091 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4094 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4093 are alive.
do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8136 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4091 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4083 are alive.
do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8131 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4091 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4094 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4096 are alive.
do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8133 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8146 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4091 are alive.
do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8108 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4087 are alive.


Aggregating statistics:   2%|▏         | 1/64 [00:02<02:24,  2.30s/it]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8140 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:42<00:00,  3.48s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:42<00:00,  3.48s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:43<00:00,  3.49s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:43<00:00,  3.50s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:45<00:00,  3.53s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:46<00:00,  3.54s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:44<00:00,  3.51s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:45<00:00,  3.52s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:45<00:00,  3.53s/it]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]

[tensor(0.4386, device='cuda:2'), tensor(0.7935, device='cuda:2'), tensor(0.8229, device='cuda:2')]
[tensor(0.9566, device='cuda:2'), tensor(0.7935, device='cuda:2'), tensor(0.8288, device='cuda:2')]
[tensor(0.2318, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7422, device='cuda:2')]
[tensor(0.6536, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8125, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer17/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [03:47<00:00,  3.55s/it]
Collecting features:   0%|          | 1/8000 [00:01<3:29:21,  1.57s/it]

[tensor(0.4302, device='cuda:2'), tensor(0.6111, device='cuda:2'), tensor(0.6470, device='cuda:2')]
[tensor(0.8511, device='cuda:2'), tensor(0.6111, device='cuda:2'), tensor(0.6495, device='cuda:2')]
[tensor(0.3958, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6276, device='cuda:2')]
[tensor(0.6211, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7812, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer18/
Indexing function: find_dots_indices
[tensor(0.4168, device='cuda:1'), tensor(0.5649, device='cuda:1'), tensor(0.6065, device='cuda:1')]
[tensor(0.8289, device='cuda:1'), tensor(0.5649, device='cuda:1'), tensor(0.6101, device='cuda:1')]
[tensor(0.4388, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6198, device='cuda:1')]
[tensor(0.6094, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7604, device='cuda:1')]
Autoencoder

Collecting features:   0%|          | 22/8000 [00:01<06:28, 20.53it/s] 

[tensor(0.4251, device='cuda:0'), tensor(0.8216, device='cuda:0'), tensor(0.8527, device='cuda:0')]
[tensor(0.9717, device='cuda:0'), tensor(0.8216, device='cuda:0'), tensor(0.8559, device='cuda:0')]
[tensor(0.1680, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6849, device='cuda:0')]
[tensor(0.6341, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8021, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer20/
Indexing function: find_dots_indices


Collecting features:   0%|          | 27/8000 [00:01<04:52, 27.29it/s]]

[tensor(0.3999, device='cuda:0'), tensor(0.5397, device='cuda:0'), tensor(0.5725, device='cuda:0')]
[tensor(0.7927, device='cuda:0'), tensor(0.5397, device='cuda:0'), tensor(0.5754, device='cuda:0')]
[tensor(0.4388, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.5964, device='cuda:0')]
[tensor(0.6133, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7448, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer16/
Indexing function: find_dots_indices


Aggregating statistics:  56%|█████▋    | 36/64 [03:50<02:43,  5.85s/it]

[tensor(0.4191, device='cuda:0'), tensor(0.6771, device='cuda:0'), tensor(0.7049, device='cuda:0')]
[tensor(0.8749, device='cuda:0'), tensor(0.6771, device='cuda:0'), tensor(0.7084, device='cuda:0')]
[tensor(0.3464, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6875, device='cuda:0')]
[tensor(0.6120, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7891, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer21/
Indexing function: find_dots_indices




[tensor(0.4358, device='cuda:1'), tensor(0.8145, device='cuda:1'), tensor(0.8476, device='cuda:1')]
[tensor(0.9631, device='cuda:1'), tensor(0.8145, device='cuda:1'), tensor(0.8525, device='cuda:1')]
[tensor(0.2161, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7018, device='cuda:1')]
[tensor(0.6484, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8008, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer22/
Indexing function: find_dots_indices


Collecting features:   0%|          | 37/8000 [00:01<03:25, 38.73it/s]]

[tensor(0.3840, device='cuda:3'), tensor(0.8273, device='cuda:3'), tensor(0.8522, device='cuda:3')]
[tensor(0.9709, device='cuda:3'), tensor(0.8273, device='cuda:3'), tensor(0.8561, device='cuda:3')]
[tensor(0.1758, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6536, device='cuda:3')]
[tensor(0.6432, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7930, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer23/
Indexing function: find_dots_indices


Collecting features:   2%|▏         | 172/8000 [00:03<01:11, 109.62it/s]

[tensor(0.4283, device='cuda:3'), tensor(0.6487, device='cuda:3'), tensor(0.6891, device='cuda:3')]
[tensor(0.8648, device='cuda:3'), tensor(0.6487, device='cuda:3'), tensor(0.6927, device='cuda:3')]
[tensor(0.3477, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6771, device='cuda:3')]
[tensor(0.6042, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7839, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer24/
Indexing function: find_dots_indices


Collecting features:   2%|▏         | 167/8000 [00:04<01:29, 87.11it/s]s]

[tensor(0.3789, device='cuda:2'), tensor(0.8284, device='cuda:2'), tensor(0.8547, device='cuda:2')]
[tensor(0.9729, device='cuda:2'), tensor(0.8284, device='cuda:2'), tensor(0.8579, device='cuda:2')]
[tensor(0.1341, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6185, device='cuda:2')]
[tensor(0.6380, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7969, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer25/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:44<00:00, 179.63it/s]
Collecting features: 100%|██████████| 8000/8000 [00:43<00:00, 184.15it/s] 
Collecting features: 100%|██████████| 8000/8000 [00:59<00:00, 134.03it/s]
Collecting features: 100%|██████████| 8000/8000 [00:59<00:00, 135.53it/s]
Collecting features:  57%|█████▋    | 4553/8000 [00:58<00:47, 72.66it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8043 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:57<00:00, 139.69it/s]
Collecting features:  53%|█████▎    | 4260/8000 [01:01<00:43, 85.73it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 461 are alive.


Aggregating statistics: 100%|██████████| 64/64 [04:50<00:00,  4.53s/it]]
Collecting features:  63%|██████▎   | 5009/8000 [01:04<00:39, 75.76it/s]

[tensor(0.4581, device='cuda:2'), tensor(0.3599, device='cuda:2'), tensor(0.4050, device='cuda:2')]
[tensor(0.6956, device='cuda:2'), tensor(0.3599, device='cuda:2'), tensor(0.4070, device='cuda:2')]
[tensor(0.5625, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6628, device='cuda:2')]
[tensor(0.6836, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7839, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer26/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [04:56<00:00,  4.64s/it]]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 62.51it/s]

[tensor(0.4760, device='cuda:0'), tensor(0.4512, device='cuda:0'), tensor(0.4945, device='cuda:0')]
[tensor(0.7620, device='cuda:0'), tensor(0.4512, device='cuda:0'), tensor(0.4980, device='cuda:0')]
[tensor(0.5221, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7214, device='cuda:0')]
[tensor(0.6823, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8008, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer27/
Indexing function: find_dots_indices


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]61.72it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 314 are alive.


Collecting features:  71%|███████   | 5672/8000 [01:11<00:37, 61.54it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8130 are alive.


Collecting features:  63%|██████▎   | 5020/8000 [01:13<00:47, 62.42it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 866 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 244.80it/s]
Collecting features: 100%|██████████| 8000/8000 [01:40<00:00, 79.55it/s] 
Collecting features: 100%|██████████| 8000/8000 [01:39<00:00, 80.39it/s] 
Aggregating statistics: 100%|██████████| 64/64 [00:40<00:00,  1.58it/s]]
Collecting features: 100%|██████████| 8000/8000 [01:37<00:00, 81.64it/s]
Collecting features:  88%|████████▊ | 7009/8000 [01:44<00:18, 53.73it/s]

[tensor(0.3189, device='cuda:2'), tensor(0.0933, device='cuda:2'), tensor(0.1106, device='cuda:2')]
[tensor(0.3471, device='cuda:2'), tensor(0.0933, device='cuda:2'), tensor(0.1106, device='cuda:2')]
[tensor(0.1510, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.4141, device='cuda:2')]
[tensor(0.2227, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.4492, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer28/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [05:42<00:00,  5.35s/it]]
Aggregating statistics: 100%|██████████| 64/64 [00:39<00:00,  1.63it/s]]]


do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 18 are alive.
do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8085 are alive.


Aggregating statistics: 100%|██████████| 64/64 [05:43<00:00,  5.36s/it]s]
Collecting features: 100%|██████████| 8000/8000 [01:56<00:00, 68.77it/s] 
Collecting features: 100%|██████████| 8000/8000 [01:53<00:00, 70.22it/s] 
Collecting features:  18%|█▊        | 1436/8000 [00:13<00:54, 121.05it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 14 are alive.


Collecting features:  69%|██████▊   | 5493/8000 [00:46<00:01, 1747.15it/s]

[tensor(0.3439, device='cuda:0'), tensor(0.1274, device='cuda:0'), tensor(0.1561, device='cuda:0')]
[tensor(0.4013, device='cuda:0'), tensor(0.1274, device='cuda:0'), tensor(0.1561, device='cuda:0')]
[tensor(0.2188, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.4232, device='cuda:0')]
[tensor(0.2578, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.4688, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer29/
Indexing function: find_dots_indices
[tensor(0.5352, device='cuda:3'), tensor(0.5426, device='cuda:3'), tensor(0.5782, device='cuda:3')]
[tensor(0.9055, device='cuda:3'), tensor(0.5426, device='cuda:3'), tensor(0.5862, device='cuda:3')]
[tensor(0.4076, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7539, device='cuda:3')]
[tensor(0.7096, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8177, device='cuda:3')]
Autoencoder

Collecting features:  23%|██▎       | 1857/8000 [00:15<00:38, 158.34it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 34 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:48<00:00, 165.44it/s] 
Collecting features:  24%|██▍       | 1917/8000 [00:15<00:50, 119.85it/s]

[tensor(0.4684, device='cuda:3'), tensor(0.4091, device='cuda:3'), tensor(0.4512, device='cuda:3')]
[tensor(0.7324, device='cuda:3'), tensor(0.4091, device='cuda:3'), tensor(0.4544, device='cuda:3')]
[tensor(0.5560, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7044, device='cuda:3')]
[tensor(0.6836, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7917, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer1/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [05:51<00:00,  5.49s/it]s]]
Aggregating statistics: 100%|██████████| 64/64 [05:52<00:00,  5.50s/it]s]]
Collecting features: 100%|██████████| 8000/8000 [00:05<00:00, 1438.22it/s]
Aggregating statistics:  31%|███▏      | 20/64 [00:10<00:20,  2.18it/s]s]

[tensor(0.4246, device='cuda:1'), tensor(0.3325, device='cuda:1'), tensor(0.3701, device='cuda:1')]
[tensor(0.6464, device='cuda:1'), tensor(0.3325, device='cuda:1'), tensor(0.3720, device='cuda:1')]
[tensor(0.5768, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6393, device='cuda:1')]
[tensor(0.6797, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7682, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer2/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:52<00:00,  1.23it/s]s]
Aggregating statistics:  33%|███▎      | 21/64 [00:11<00:20,  2.14it/s]s]

[tensor(0.4739, device='cuda:1'), tensor(0.4969, device='cuda:1'), tensor(0.5354, device='cuda:1')]
[tensor(0.7875, device='cuda:1'), tensor(0.4969, device='cuda:1'), tensor(0.5383, device='cuda:1')]
[tensor(0.5260, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7253, device='cuda:1')]
[tensor(0.6966, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8086, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer3/
Indexing function: find_dots_indices


Collecting features:  47%|████▋     | 3747/8000 [00:27<00:30, 141.08it/s]

[tensor(0.4515, device='cuda:0'), tensor(0.1905, device='cuda:0'), tensor(0.2356, device='cuda:0')]
[tensor(0.5427, device='cuda:0'), tensor(0.1905, device='cuda:0'), tensor(0.2367, device='cuda:0')]
[tensor(0.2812, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6380, device='cuda:0')]
[tensor(0.3802, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6966, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer4/
Indexing function: find_dots_indices


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s].33it/s]s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7984 are alive.


Aggregating statistics:  42%|████▏     | 27/64 [00:14<00:17,  2.10it/s]]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 421 are alive.


Collecting features:   5%|▍         | 388/8000 [00:04<00:47, 161.69it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 34 are alive.


Collecting features:  69%|██████▉   | 5533/8000 [00:38<00:20, 118.29it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4050 are alive.


Aggregating statistics: 100%|██████████| 64/64 [00:29<00:00,  2.14it/s]s]
Collecting features:  76%|███████▌  | 6094/8000 [00:28<00:15, 123.27it/s]

[tensor(0.1111, device='cuda:2'), tensor(0.1111, device='cuda:2'), tensor(0.1111, device='cuda:2')]
[tensor(0.1111, device='cuda:2'), tensor(0.1111, device='cuda:2'), tensor(0.1111, device='cuda:2')]
[tensor(0.0508, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2539, device='cuda:2')]
[tensor(0.0508, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2617, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer5/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:31<00:00,  2.03it/s]s]
Collecting features:   0%|          | 23/8000 [00:03<09:21, 14.20it/s]]s]

[tensor(0.1429, device='cuda:1'), tensor(0.2143, device='cuda:1'), tensor(0.2143, device='cuda:1')]
[tensor(0.2857, device='cuda:1'), tensor(0.2143, device='cuda:1'), tensor(0.2143, device='cuda:1')]
[tensor(0.0326, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2539, device='cuda:1')]
[tensor(0.0443, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2721, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer6/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:32<00:00,  1.99it/s]] 
Collecting features:  36%|███▌      | 2850/8000 [00:23<00:48, 105.62it/s]

[tensor(0.1176, device='cuda:1'), tensor(0.1471, device='cuda:1'), tensor(0.1471, device='cuda:1')]
[tensor(0.1176, device='cuda:1'), tensor(0.1471, device='cuda:1'), tensor(0.1471, device='cuda:1')]
[tensor(0.0417, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2539, device='cuda:1')]
[tensor(0.0417, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2682, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer7/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:39<00:00, 203.40it/s]
Collecting features: 100%|██████████| 8000/8000 [00:54<00:00, 146.91it/s]
Collecting features: 100%|██████████| 8000/8000 [00:37<00:00, 213.07it/s]
Aggregating statistics: 100%|██████████| 64/64 [00:33<00:00,  1.90it/s]s]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 99.70it/s]]

[tensor(0.1176, device='cuda:3'), tensor(0.0882, device='cuda:3'), tensor(0.0882, device='cuda:3')]
[tensor(0.1471, device='cuda:3'), tensor(0.0882, device='cuda:3'), tensor(0.0882, device='cuda:3')]
[tensor(0.0651, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.2760, device='cuda:3')]
[tensor(0.0924, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.3125, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer8/
Indexing function: find_dots_indices


Collecting features:  10%|▉         | 777/8000 [00:02<00:06, 1192.68it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4050 are alive.


Collecting features:  24%|██▍       | 1958/8000 [00:26<01:28, 68.19it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:49<00:00, 160.87it/s]
Aggregating statistics: 100%|██████████| 64/64 [00:46<00:00,  1.38it/s]s]
Collecting features: 100%|██████████| 8000/8000 [01:01<00:00, 130.79it/s]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 4.19s/it]s]

[tensor(0.2542, device='cuda:3'), tensor(0.0736, device='cuda:3'), tensor(0.0903, device='cuda:3')]
[tensor(0.2637, device='cuda:3'), tensor(0.0736, device='cuda:3'), tensor(0.0903, device='cuda:3')]
[tensor(0.1120, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.3255, device='cuda:3')]
[tensor(0.1341, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.3568, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer9/
Indexing function: find_dots_indices


Collecting features:   2%|▏         | 138/8000 [00:03<01:52, 70.10it/s]s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4051 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:38<00:00, 210.15it/s]
Collecting features: 100%|██████████| 8000/8000 [00:59<00:00, 133.84it/s]
Collecting features:  32%|███▏      | 2569/8000 [00:22<01:21, 66.26it/s]  

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4063 are alive.


Collecting features:  45%|████▌     | 3623/8000 [00:48<00:56, 77.61it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 27 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:43<00:00, 182.60it/s]
Aggregating statistics: 100%|██████████| 64/64 [00:29<00:00,  2.15it/s]] 
Collecting features:  25%|██▍       | 1993/8000 [00:23<01:04, 92.83it/s]

[tensor(0., device='cuda:0'), tensor(0.1429, device='cuda:0'), tensor(0.1429, device='cuda:0')]
[tensor(0.1429, device='cuda:0'), tensor(0.1429, device='cuda:0'), tensor(0.1429, device='cuda:0')]
[tensor(0., device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2539, device='cuda:0')]
[tensor(0.0143, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2617, device='cuda:0')]
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4073 are alive.


Collecting features:  56%|█████▌    | 4485/8000 [00:58<00:56, 62.10it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4047 are alive.


Aggregating statistics:  14%|█▍        | 9/64 [00:19<02:11,  2.39s/it]/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4078 are alive.


Collecting features:  46%|████▋     | 3703/8000 [00:43<01:31, 46.80it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer10/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:31<00:00,  2.01it/s]] 
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 82.44it/s]

[tensor(0.3704, device='cuda:1'), tensor(0.1111, device='cuda:1'), tensor(0.1111, device='cuda:1')]
[tensor(0.4074, device='cuda:1'), tensor(0.1111, device='cuda:1'), tensor(0.1111, device='cuda:1')]
[tensor(0.0573, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2617, device='cuda:1')]
[tensor(0.0573, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2643, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer11/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:38<00:00, 81.00it/s] 
Aggregating statistics: 100%|██████████| 64/64 [03:16<00:00,  3.08s/it]]]
Collecting features:  89%|████████▉ | 7124/8000 [01:20<00:08, 99.43it/s]]

[tensor(0.5127, device='cuda:2'), tensor(0.6015, device='cuda:2'), tensor(0.6328, device='cuda:2')]
[tensor(0.8871, device='cuda:2'), tensor(0.6015, device='cuda:2'), tensor(0.6398, device='cuda:2')]
[tensor(0.4141, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7695, device='cuda:2')]
[tensor(0.7292, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8177, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer12/
Indexing function: find_dots_indices


Collecting features:   0%|          | 1/8000 [00:01<4:09:19,  1.87s/it]] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4067 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:28<00:00, 90.25it/s] 
Collecting features: 100%|██████████| 8000/8000 [01:43<00:00, 77.09it/s] 
Collecting features:  45%|████▌     | 3612/8000 [01:11<01:11, 61.46it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4078 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]48.54it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4072 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:32<00:00,  2.38s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [03:38<00:00,  3.42s/it]]
Collecting features:  44%|████▍     | 3559/8000 [01:09<01:25, 51.70it/s] 

[tensor(0.2919, device='cuda:3'), tensor(0.2867, device='cuda:3'), tensor(0.3131, device='cuda:3')]
[tensor(0.5091, device='cuda:3'), tensor(0.2867, device='cuda:3'), tensor(0.3136, device='cuda:3')]
[tensor(0.3828, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.5143, device='cuda:3')]
[tensor(0.5117, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6875, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer13/
Indexing function: find_dots_indices


Collecting features:  88%|████████▊ | 7041/8000 [00:38<00:12, 78.57it/s]

[tensor(0.5357, device='cuda:0'), tensor(0.5555, device='cuda:0'), tensor(0.5907, device='cuda:0')]
[tensor(0.9010, device='cuda:0'), tensor(0.5555, device='cuda:0'), tensor(0.5982, device='cuda:0')]
[tensor(0.4102, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7500, device='cuda:0')]
[tensor(0.7122, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8125, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer14/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:48<00:00, 165.01it/s]
Collecting features:  25%|██▌       | 2005/8000 [00:26<01:24, 70.92it/s]  

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7790 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:44<00:00,  2.57s/it]] 
Collecting features:  41%|████▏     | 3301/8000 [00:43<00:48, 96.98it/s]

[tensor(0.2800, device='cuda:2'), tensor(0.3175, device='cuda:2'), tensor(0.3385, device='cuda:2')]
[tensor(0.5267, device='cuda:2'), tensor(0.3175, device='cuda:2'), tensor(0.3393, device='cuda:2')]
[tensor(0.3945, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.5286, device='cuda:2')]
[tensor(0.5404, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6927, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer15/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [02:09<00:00, 61.55it/s]]
Aggregating statistics: 100%|██████████| 64/64 [02:32<00:00,  2.39s/it]] 
Aggregating statistics:  34%|███▍      | 22/64 [01:04<02:01,  2.88s/it]]]

[tensor(0.2998, device='cuda:0'), tensor(0.2656, device='cuda:0'), tensor(0.2897, device='cuda:0')]
[tensor(0.4913, device='cuda:0'), tensor(0.2656, device='cuda:0'), tensor(0.2899, device='cuda:0')]
[tensor(0.4062, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.5156, device='cuda:0')]
[tensor(0.5208, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6875, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer16/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:29<00:00,  2.34s/it]]] 
Aggregating statistics: 100%|██████████| 64/64 [02:49<00:00,  2.65s/it]
Collecting features: 100%|██████████| 8000/8000 [00:55<00:00, 144.71it/s]
Collecting features:   3%|▎         | 222/8000 [00:05<01:19, 97.93it/s] 

[tensor(0.3317, device='cuda:0'), tensor(0.3079, device='cuda:0'), tensor(0.3376, device='cuda:0')]
[tensor(0.5610, device='cuda:0'), tensor(0.3079, device='cuda:0'), tensor(0.3383, device='cuda:0')]
[tensor(0.4180, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.5612, device='cuda:0')]
[tensor(0.5443, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7109, device='cuda:0')]


Collecting features:  83%|████████▎ | 6628/8000 [02:09<00:30, 44.97it/s]]

[tensor(0.3607, device='cuda:2'), tensor(0.2333, device='cuda:2'), tensor(0.2659, device='cuda:2')]
[tensor(0.5246, device='cuda:2'), tensor(0.2333, device='cuda:2'), tensor(0.2664, device='cuda:2')]
[tensor(0.4076, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.5430, device='cuda:2')]
[tensor(0.5182, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6940, device='cuda:2')]


Collecting features:  84%|████████▎ | 6699/8000 [02:11<00:36, 35.45it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7596 are alive.


Aggregating statistics: 100%|██████████| 64/64 [04:08<00:00,  3.88s/it]]]
Aggregating statistics:  89%|████████▉ | 57/64 [03:54<00:33,  4.78s/it]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer18/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 184.23it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer17/
Indexing function: find_dots_indices


Collecting features:  68%|██████▊   | 5479/8000 [01:09<00:41, 60.79it/s]]

[tensor(0.5363, device='cuda:1'), tensor(0.5790, device='cuda:1'), tensor(0.6172, device='cuda:1')]
[tensor(0.8973, device='cuda:1'), tensor(0.5790, device='cuda:1'), tensor(0.6255, device='cuda:1')]
[tensor(0.4023, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7656, device='cuda:1')]
[tensor(0.7161, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8177, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer19/
Indexing function: find_dots_indices


Collecting features:  21%|██▏       | 1715/8000 [00:04<00:07, 804.02it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7849 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:52<00:00,  2.69s/it]] 
Collecting features: 100%|██████████| 8000/8000 [02:29<00:00, 53.50it/s]]
Collecting features:  78%|███████▊  | 6240/8000 [01:23<00:24, 73.25it/s]]

[tensor(0.3225, device='cuda:1'), tensor(0.2459, device='cuda:1'), tensor(0.2728, device='cuda:1')]
[tensor(0.4947, device='cuda:1'), tensor(0.2459, device='cuda:1'), tensor(0.2730, device='cuda:1')]
[tensor(0.3984, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.5378, device='cuda:1')]
[tensor(0.5117, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6862, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer20/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:50<00:00,  2.67s/it]s]
Collecting features:  46%|████▌     | 3650/8000 [00:35<01:02, 69.45it/s]]

[tensor(0.3404, device='cuda:1'), tensor(0.2869, device='cuda:1'), tensor(0.3195, device='cuda:1')]
[tensor(0.5459, device='cuda:1'), tensor(0.2869, device='cuda:1'), tensor(0.3207, device='cuda:1')]
[tensor(0.3932, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.5365, device='cuda:1')]
[tensor(0.5052, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6862, device='cuda:1')]


Collecting features: 100%|██████████| 8000/8000 [00:50<00:00, 158.51it/s]
Collecting features: 100%|██████████| 8000/8000 [00:26<00:00, 302.17it/s] 
Collecting features:  87%|████████▋ | 6935/8000 [01:36<00:12, 82.52it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7684 are alive.


Collecting features:  18%|█▊        | 1426/8000 [00:28<01:42, 64.41it/s] 

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer21/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [04:31<00:00,  4.25s/it]] 
Collecting features:  97%|█████████▋| 7747/8000 [01:50<00:01, 174.73it/s]

[tensor(0.4964, device='cuda:3'), tensor(0.6146, device='cuda:3'), tensor(0.6477, device='cuda:3')]
[tensor(0.8839, device='cuda:3'), tensor(0.6146, device='cuda:3'), tensor(0.6536, device='cuda:3')]
[tensor(0.4375, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7812, device='cuda:3')]
[tensor(0.7344, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8164, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer22/
Indexing function: find_dots_indices


Collecting features:  62%|██████▏   | 4955/8000 [00:42<00:50, 59.76it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7875 are alive.


Aggregating statistics:  14%|█▍        | 9/64 [00:15<01:44,  1.90s/it]s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7985 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:52<00:00, 71.19it/s] 
Collecting features:  36%|███▌      | 2841/8000 [00:32<00:58, 88.90it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7835 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:26<00:00, 92.96it/s] 
Aggregating statistics: 100%|██████████| 64/64 [02:33<00:00,  2.40s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [03:02<00:00,  2.86s/it]]]
Collecting features:  32%|███▏      | 2563/8000 [00:38<01:23, 65.25it/s]s]

[tensor(0.3779, device='cuda:2'), tensor(0.2716, device='cuda:2'), tensor(0.3035, device='cuda:2')]
[tensor(0.5661, device='cuda:2'), tensor(0.2716, device='cuda:2'), tensor(0.3043, device='cuda:2')]
[tensor(0.4036, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.5664, device='cuda:2')]
[tensor(0.5221, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7044, device='cuda:2')]


Aggregating statistics:  53%|█████▎    | 34/64 [02:04<01:22,  2.74s/it]]]

[tensor(0.3137, device='cuda:3'), tensor(0.3317, device='cuda:3'), tensor(0.3607, device='cuda:3')]
[tensor(0.5707, device='cuda:3'), tensor(0.3317, device='cuda:3'), tensor(0.3619, device='cuda:3')]
[tensor(0.3958, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.5651, device='cuda:3')]
[tensor(0.5260, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7096, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer23/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:09<00:00, 115.24it/s]
Collecting features: 100%|██████████| 8000/8000 [01:25<00:00, 93.61it/s] 
Collecting features:  55%|█████▌    | 4405/8000 [00:57<01:06, 54.42it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7958 are alive.


Collecting features:  43%|████▎     | 3403/8000 [00:49<01:07, 68.39it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer24/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:57<00:00,  2.78s/it]] 
Collecting features:  52%|█████▏    | 4155/8000 [00:58<00:53, 72.46it/s]]

[tensor(0.4183, device='cuda:3'), tensor(0.2680, device='cuda:3'), tensor(0.3109, device='cuda:3')]
[tensor(0.5988, device='cuda:3'), tensor(0.2680, device='cuda:3'), tensor(0.3124, device='cuda:3')]
[tensor(0.4466, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.5938, device='cuda:3')]
[tensor(0.5573, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7148, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer25/
Indexing function: find_dots_indices


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]8.44it/s]]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4088 are alive.


Collecting features:  10%|▉         | 770/8000 [00:17<02:18, 52.28it/s]s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7986 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:45<00:00, 75.92it/s] 
Collecting features:  15%|█▌        | 1232/8000 [00:26<02:17, 49.08it/s]]

In [9]:
all_autoencoder_paths

['/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer0/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer1/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer10/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer11/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer12/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer13/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer14/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer

In [9]:
for ae_name, results in results_list:
    sweep_results[ae_name] = {}
    for sweep_key in sweep_result_keys:
        sweep_results[ae_name][sweep_key] = results[sweep_key]

In [10]:
df = pd.DataFrame.from_dict(sweep_results, orient='index').sort_values('l0')
df = df.round(3)
df
#df[['frac_variance_explained', 'l0', 'frac_any_board_per_feature_act-nonzero_precision-0.9', 'frac_any_board_per_feature_act-best_precision-0.9', 'frac_any_feature_per_board_act-nonzero_precision-0.9', 'frac_any_feature_per_board_act-best_precision-0.9', 'frac_any_board_per_feature_act-nonzero_recall-0.01', 'frac_any_board_per_feature_act-best_recall-0.01', 'frac_any_feature_per_board_act-nonzero_recall-0.01', 'frac_any_feature_per_board_act-best_recall-0.01', 'frac_any_board_per_feature_act-nonzero_f1-0.01', 'frac_any_board_per_feature_act-best_f1-0.01', 'frac_any_feature_per_board_act-nonzero_f1-0.01', 'frac_any_feature_per_board_act-best_f1-0.01']]
#df.to_csv("small_sae_sweep.csv")

Unnamed: 0,l0,frac_variance_explained,cossim,l2_ratio,frac_any_board_per_feature_act-nonzero_precision-0.9,frac_any_board_per_feature_act-best_precision-0.9,frac_any_feature_per_board_act-nonzero_precision-0.9,frac_any_feature_per_board_act-best_precision-0.9,frac_any_board_per_feature_act-nonzero_recall-0.01,frac_any_board_per_feature_act-best_recall-0.01,frac_any_feature_per_board_act-nonzero_recall-0.01,frac_any_feature_per_board_act-best_recall-0.01,frac_any_board_per_feature_act-nonzero_f1-0.01,frac_any_board_per_feature_act-best_f1-0.01,frac_any_feature_per_board_act-nonzero_f1-0.01,frac_any_feature_per_board_act-best_f1-0.01
trainer49,2.906,0.978,0.895,0.886,0.337,0.417,0.316,0.438,0.2,0.2,0.762,0.762,0.233,0.234,0.746,0.772
trainer59,2.969,0.979,0.903,0.894,0.295,0.36,0.322,0.449,0.19,0.19,0.814,0.814,0.221,0.223,0.706,0.73
trainer54,3.031,0.979,0.9,0.883,0.345,0.414,0.341,0.457,0.205,0.205,0.745,0.745,0.242,0.243,0.742,0.771
trainer44,3.031,0.98,0.905,0.889,0.332,0.395,0.332,0.448,0.205,0.205,0.809,0.809,0.238,0.239,0.728,0.746
trainer53,4.375,0.982,0.92,0.91,0.335,0.436,0.376,0.505,0.21,0.21,0.807,0.807,0.249,0.251,0.781,0.799
trainer58,4.438,0.981,0.913,0.908,0.309,0.397,0.324,0.492,0.213,0.213,0.818,0.818,0.248,0.248,0.762,0.783
trainer48,4.469,0.982,0.919,0.912,0.335,0.431,0.345,0.47,0.223,0.223,0.809,0.809,0.256,0.258,0.758,0.792
trainer43,4.656,0.981,0.914,0.911,0.294,0.386,0.335,0.496,0.222,0.222,0.818,0.818,0.246,0.247,0.78,0.801
trainer47,6.844,0.985,0.93,0.928,0.306,0.424,0.329,0.467,0.229,0.229,0.818,0.818,0.26,0.261,0.777,0.797
trainer42,6.875,0.985,0.934,0.919,0.305,0.426,0.332,0.495,0.223,0.223,0.818,0.818,0.253,0.253,0.793,0.805


[tensor(0.3307, device='cuda:0'), tensor(0.3707, device='cuda:0'), tensor(0.4022, device='cuda:0')]
[tensor(0.6056, device='cuda:0'), tensor(0.3707, device='cuda:0'), tensor(0.4029, device='cuda:0')]
[tensor(0.3060, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6901, device='cuda:0')]
[tensor(0.4492, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8047, device='cuda:0')]
[tensor(0.3799, device='cuda:2'), tensor(0.3160, device='cuda:2'), tensor(0.3588, device='cuda:2')]
[tensor(0.5892, device='cuda:2'), tensor(0.3160, device='cuda:2'), tensor(0.3596, device='cuda:2')]
[tensor(0.2982, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7630, device='cuda:2')]
[tensor(0.4427, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7956, device='cuda:2')]
[tensor(0.3675, device='cuda:2'), tensor(0.2975, device='cuda:2'), tensor(0.3436, device='cuda:2')]
[tensor(0.5673, device='cuda:2'), tensor(0.2975, device='cuda:2'), tensor(0.3436, device='cuda:2')]
