# (Currently chess only) Dataframe comparing SAE statistics

In [1]:
# Imports
import sys
sys.path.append("../")

from tqdm import tqdm
import pickle
import torch
import einops
from datasets import load_dataset
from typing import Callable, Optional
import math
import os
import itertools
import json
import gc
from joblib import Parallel, delayed

import pandas as pd

from dataclasses import dataclass
import torch
from nnsight import NNsight
import json
from typing import Any
from datasets import load_dataset
from einops import rearrange
from jaxtyping import Int, Float, jaxtyped
from torch import Tensor
import os
from tqdm import tqdm
from transformers import GPT2LMHeadModel
from transformer_lens import HookedTransformer

from circuits.dictionary_learning import AutoEncoder
from circuits.chess_utils import encode_string
from circuits.dictionary_learning import ActivationBuffer
from circuits.dictionary_learning.dictionary import AutoEncoder, GatedAutoEncoder
from circuits.dictionary_learning.trainers.gated_anneal import GatedAnnealTrainer
from circuits.dictionary_learning.trainers.gdm import GatedSAETrainer
from circuits.dictionary_learning.trainers.p_anneal import PAnnealTrainer
from circuits.dictionary_learning.trainers.standard import StandardTrainer
from circuits.dictionary_learning.evaluation import evaluate
from circuits.nanogpt_to_hf_transformers import NanogptTokenizer, convert_nanogpt_model
from circuits.eval_sae_as_classifier import (
    initialize_results_dict, 
    get_data_batch, 
    apply_indexing_function,
    construct_eval_dataset,
    construct_othello_dataset,
    prep_firing_rate_data,
)
from circuits.utils import (
    get_model, 
    get_submodule,
    get_ae_bundle,
    collect_activations_batch,
    get_nested_folders,
    get_firing_features,
    to_device,
    AutoEncoderBundle,
)
import circuits.chess_utils as chess_utils
import circuits.othello_utils as othello_utils
import circuits.othello_engine_utils as othello_engine_utils

from circuits.dictionary_learning.evaluation import evaluate

from IPython import embed

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Globals

# Dimension key (from https://medium.com/@NoamShazeer/shape-suffixes-good-coding-style-f836e72e24fd):
# F  = features and minibatch size depending on the context (maybe this is stupid)
# B = batch_size
# L = seq length (context length)
# T = thresholds
# R = rows (or cols)
# C = classes for one hot encoding

home_dir = '/project/pi_mccallum_umass_edu/rangell_umass_edu/'
repo_dir = f'{home_dir}/chess-gpt-circuits'

#DEVICE = 'cuda:0'
torch.set_grad_enabled(False)
batch_size = 32
feature_batch_size = batch_size
n_inputs = 2048 # Length of the eval dataset
GAME = "chess" # "chess" or "othello"

models_path = repo_dir + "/models/"

In [3]:
# Load dataset and init game specific variables

if GAME == "chess":
    othello = False

    autoencoder_group_paths = ["/autoencoders/group-2024-05-11/"]
    custom_functions = [chess_utils.board_to_piece_state] #, chess_utils.board_to_pin_state]
    model_name = "adamkarvonen/8LayerChessGPT2"
    # data = construct_eval_dataset(custom_functions, n_inputs, models_path=models_path, device=DEVICE)
    indexing_functions = [chess_utils.find_dots_indices]

elif GAME == "othello":
    othello = True

    autoencoder_group_paths = ["/autoencoders/othello_layer0/"]
    # autoencoder_group_paths = ["autoencoders/othello_layer0/", "autoencoders/othello_layer5_ef4/"]
    custom_functions = [
            # othello_utils.games_batch_no_last_move_to_state_stack_BLRRC,
            othello_utils.games_batch_to_state_stack_BLRRC,
            othello_utils.games_batch_to_state_stack_mine_yours_BLRRC,
        ]
    model_name = "Baidicoot/Othello-GPT-Transformer-Lens"
    # data = construct_othello_dataset(custom_functions, n_inputs, models_path=models_path, device=DEVICE)
    indexing_functions = [None]  # I'm experimenting with these for Othello
else:
    raise ValueError("Invalid game")

## General dataset statistic

This is only dataset dependent, but not SAE dependent and can be calculated once after loading the dataset

In [4]:
def get_true_board_state_counts(pgn_strings, device):
    # Find the true counts of board states over all movers and games in the dataset
    # This could be calculated within the board_to_piece_state evaluation!
    true_board_states_counts = chess_utils.create_state_stacks(pgn_strings, chess_utils.board_to_piece_state)
    true_board_states_counts = chess_utils.state_stack_to_one_hot(
        chess_utils.config_lookup[chess_utils.board_to_piece_state.__name__], 
        device, 
        true_board_states_counts)
    true_board_states_counts = true_board_states_counts.sum(dim=(0,1))
    true_board_states_counts.shape # [RRC]
    return true_board_states_counts

## SAE specific statistic

In [5]:
# Standard evals
def do_standard_evals(results, ae_bundle, device):
    eval_results = evaluate(
        ae_bundle.ae,
        ae_bundle.buffer,
        max_len=ae_bundle.context_length,
        batch_size=min(512, batch_size), # min(n_eval_samples, activation_buffer_out_batch_size) matters
        io="out",
        device=device,
        #n_batches=1000
    )
    for k, v in eval_results.items():
        results[k] = v
    return results

In [6]:
# Evaluation of custom functions
def eval_custom_fn(
    data,
    results,
    n_act_threshs,
    alive_features_F,
    max_activations_F,
    ae_bundle,
    pgn_strings,
    custom_functions,
    encoded_inputs,
    firing_rate_n_inputs,
    indexing_function,
    device
):
    num_features = len(alive_features_F)
    print(
        f"Out of {ae_bundle.dictionary_size} features, on {firing_rate_n_inputs} activations, {num_features} are alive."
    )

    assert len(pgn_strings) >= n_inputs
    assert n_inputs % batch_size == 0

    n_iters = n_inputs // batch_size
    # We round up to ensure we don't ignore the remainder of features
    num_feature_iters = math.ceil(num_features / feature_batch_size)

    thresholds_T = torch.linspace(0, 1, n_act_threshs).to(device)
    thresholds_TF11 = einops.repeat(thresholds_T, "T -> T F 1 1", F=num_features)
    max_activations_1F11 = einops.repeat(max_activations_F, "F -> 1 F 1 1")
    thresholds_TF11 = thresholds_TF11 * max_activations_1F11

    for i in tqdm(range(n_iters), desc="Aggregating statistics"):
        start = i * batch_size
        end = (i + 1) * batch_size
        pgn_strings_BL = pgn_strings[start:end]
        encoded_inputs_BL = encoded_inputs[start:end]
        encoded_inputs_BL = torch.tensor(encoded_inputs_BL).to(device)

        batch_data = get_data_batch(data, pgn_strings_BL, start, end, custom_functions, device)

        all_activations_FBL, encoded_token_inputs = collect_activations_batch(
            ae_bundle, encoded_inputs_BL, alive_features_F
        )

        if indexing_function is not None:
            all_activations_FBL, batch_data = apply_indexing_function(
                pgn_strings[start:end], all_activations_FBL, batch_data, device, indexing_function
            )
        # For thousands of features, this would be many GB of memory. So, we minibatch.
        for feature in range(num_feature_iters):
            f_start = feature * feature_batch_size
            f_end = min((feature + 1) * feature_batch_size, num_features)
            f_batch_size = f_end - f_start

            activations_FBL = all_activations_FBL[
                f_start:f_end
            ]  
            
            thresholds_TF11_slice = thresholds_TF11[:, f_start:f_end, :, :]
            # NOTE: Now F == feature_batch_size
            # Maybe that's stupid and inconsistent and I should use a new letter for annotations
            # I'll roll with it for now


            ### Aggregate batch statistics
            active_indices_TFBL = activations_FBL > thresholds_TF11_slice
            active_counts_TF = einops.reduce(active_indices_TFBL, "T F B L -> T F", "sum")
            off_counts_TF = einops.reduce(~active_indices_TFBL, "T F B L -> T F", "sum")

            results["on_count"][:, f_start:f_end] += active_counts_TF
            results["off_count"][:, f_start:f_end] += off_counts_TF

            for custom_function in custom_functions:
                on_tracker_TFRRC = results[custom_function.__name__]["on"]
                off_tracker_FTRRC = results[custom_function.__name__]["off"]

                boards_BLRRC = batch_data[custom_function.__name__]
                boards_TFBLRRC = einops.repeat(
                    boards_BLRRC,
                    "B L R1 R2 C -> T F B L R1 R2 C",
                    F=f_batch_size,
                    T=thresholds_TF11_slice.shape[0],
                )

                # TODO The next 2 operations consume almost all of the compute. I don't think it will work,
                # but maybe we can only do 1 of these operations?
                active_boards_sum_TFRRC = einops.reduce(
                    boards_TFBLRRC * active_indices_TFBL[:, :, :, :, None, None, None],
                    "T F B L R1 R2 C -> T F R1 R2 C",
                    "sum",
                )
                off_boards_sum_TFRRC = einops.reduce(
                    boards_TFBLRRC * ~active_indices_TFBL[:, :, :, :, None, None, None],
                    "T F B L R1 R2 C -> T F R1 R2 C",
                    "sum",
                )

                on_tracker_TFRRC[:, f_start:f_end, :, :, :] += active_boards_sum_TFRRC
                off_tracker_FTRRC[:, f_start:f_end, :, :, :] += off_boards_sum_TFRRC

                results[custom_function.__name__]["on"] = on_tracker_TFRRC
                results[custom_function.__name__]["off"] = off_tracker_FTRRC

    return results

In [7]:
# Precision, recall, and F1

def get_classification_metrics(results, true_board_states_counts):
    precision_thresh = 0.9
    recall_thresh = 0.01
    f1_thresh = 0.01
    threshs = [precision_thresh, recall_thresh, f1_thresh]
    eps = 1e-8
    R = 8
    C = 13

    true_pos_TFRRC = results['board_to_piece_state']['on'] 
    pos_all_TF = results['on_count']
    true_all_RRC = true_board_states_counts

    precision = true_pos_TFRRC / (pos_all_TF[:, :, None, None, None] +eps) # Note that a feature which always fires (piece present/absent) will have a precision of 1
    recall = true_pos_TFRRC / (true_all_RRC[None, None, :, :, :] +eps)
    f1 = 2 * (precision * recall) / (precision + recall + eps)
    metrics_TFRRC = [precision, recall, f1]

    # Apply threshold
    counts_TFRRC = [metric > thresh for metric, thresh in zip(metrics_TFRRC, threshs)]

    # Drop empty square state counts
    for i in range(len(counts_TFRRC)):
        counts_TFRRC[i][..., 6] = False
    num_board_states = R * R * (C-1)


    ### Fraction of features with high metric on at least one board state
    # High metric for at least one board state
    counts_any_board_TF = [metric.any(dim=(-1,-2,-3)) for metric in counts_TFRRC]

    # Report fraction of all features for count_as_firing_threshold = 0
    frac_any_board_nonzero_1 = [metric[0].float().mean() for metric in counts_any_board_TF]

    # Report fraction of all features for any threshold (choose threshold per feature that maximizes ratio)
    frac_any_board_best_1 = [metric.any(dim=0).float().mean() for metric in counts_any_board_TF]


    ### Fraction of board states that have at least one feature with high metric
    # Check for each board state whether at least one feature has a high metric (using count_as_firing_threshold = 0)
    counts_any_feature_nonzero_RCC = [metric[0].any(dim=0) for metric in counts_TFRRC]

    # Check for each board state whether at least one feature has a high metric (for any count_as_firing threshold)
    counts_any_feature_best_RCC = [metric.any(dim=(0,1)) for metric in counts_TFRRC]

    # Fraction of individual board states at least one feature has a high metric
    frac_any_feature_nonzero_RCC = [metric.sum() / num_board_states for metric in counts_any_feature_nonzero_RCC]
    frac_any_feature_best_RCC = [metric.sum() / num_board_states for metric in counts_any_feature_best_RCC]

    print(frac_any_board_nonzero_1)
    print(frac_any_board_best_1)
    print(frac_any_feature_nonzero_RCC)
    print(frac_any_feature_best_RCC)

    names = ['precision', 'recall', 'f1']
    for i, (name, t) in enumerate(zip(names, threshs)):
        results[f'frac_any_board_per_feature_act-nonzero_{name}-{t}'] = frac_any_board_nonzero_1[i].item()
        results[f'frac_any_board_per_feature_act-best_{name}-{t}'] = frac_any_board_best_1[i].item()
        results[f'frac_any_feature_per_board_act-nonzero_{name}-{t}'] = frac_any_feature_nonzero_RCC[i].item()
        results[f'frac_any_feature_per_board_act-best_{name}-{t}'] = frac_any_feature_best_RCC[i].item()

    return results

## Loop over SAEs

In [8]:
# Choose aes and indexing functions

# This could be computed once before the loop if adapting loading pgn_strings
# true_board_state_counts = get_true_board_state_counts(pgn_strings)

sweep_results = {}
sweep_result_keys = ['l0', 'frac_variance_explained', 'cossim', 'l2_ratio', 'frac_any_board_per_feature_act-nonzero_precision-0.9', 'frac_any_board_per_feature_act-best_precision-0.9', 'frac_any_feature_per_board_act-nonzero_precision-0.9', 'frac_any_feature_per_board_act-best_precision-0.9', 'frac_any_board_per_feature_act-nonzero_recall-0.01', 'frac_any_board_per_feature_act-best_recall-0.01', 'frac_any_feature_per_board_act-nonzero_recall-0.01', 'frac_any_feature_per_board_act-best_recall-0.01', 'frac_any_board_per_feature_act-nonzero_f1-0.01', 'frac_any_board_per_feature_act-best_f1-0.01', 'frac_any_feature_per_board_act-nonzero_f1-0.01', 'frac_any_feature_per_board_act-best_f1-0.01']

all_autoencoder_paths = []
for group_path in autoencoder_group_paths:
    all_autoencoder_paths += get_nested_folders(repo_dir + group_path) 

param_combinations = list(itertools.product(all_autoencoder_paths, indexing_functions))

#for ae_dir, idx_fn in param_combinations:
#    print(f'ae_dir: {ae_dir}')
#    print(f'idx_fn: {idx_fn}\n')

# autoencoder_path, indexing_function = param_combinations[1]

#for autoencoder_path, indexing_function in tqdm(param_combinations, desc="Autoencoder loop", total=len(param_combinations)):
def compute_results(device_id, autoencoder_path, indexing_function):
    
    device = f"cuda:{device_id}"
    
    torch.cuda.empty_cache()
    gc.collect()
    
    indexing_function_name = "None"
    if indexing_function is not None:
        indexing_function_name = indexing_function.__name__

    print(f"Autoencoder: {autoencoder_path}")
    print(f"Indexing function: {indexing_function_name}")

    # TODO Function below manipulates the loaded data. If we change that, we can load data once and for all at the top of the file
    data = construct_eval_dataset(custom_functions, n_inputs, models_path=models_path, device=device)
    data, ae_bundle, pgn_strings, encoded_inputs = prep_firing_rate_data(
        autoencoder_path, batch_size, models_path, model_name, data, device, n_inputs, othello
    )

    firing_rate_n_inputs = min(int(n_inputs * 0.5), 1000) * ae_bundle.context_length
    # TODO: Custom thresholds per feature based on max activations
    alive_features_F, max_activations_F = get_firing_features(
        ae_bundle, firing_rate_n_inputs, batch_size, device
    )
    true_board_states_counts = get_true_board_state_counts(pgn_strings, device)
    assert true_board_states_counts is not None

    # initialize result dictionary
    n_act_threshs = 10
    results = initialize_results_dict(custom_functions, n_act_threshs, alive_features_F, device)

    # Standard evaluation metrics
    print('do_standard_evals')
    results = do_standard_evals(results, ae_bundle, device)
    
    # delete the buffer
    del ae_bundle.buffer
    
    # Do custom eval metrics
    print('do custom eval metrics')
    results = eval_custom_fn(
        data,
        results,
        n_act_threshs,
        alive_features_F,
        max_activations_F,
        ae_bundle,
        pgn_strings,
        custom_functions,
        encoded_inputs,
        firing_rate_n_inputs,
        indexing_function,
        device
    )

    torch.cuda.empty_cache()
    gc.collect()
    
    results = get_classification_metrics(results, true_board_states_counts)
    ae_name = "-".join(autoencoder_path.split('/')[-3:-1])
    #sweep_results[ae_name] = {}
    #for sweep_key in sweep_result_keys:
    #    sweep_results[ae_name][sweep_key] = results[sweep_key]
    return ae_name, results

results_list = Parallel(n_jobs=16)(delayed(compute_results)(i%4, autoencoder_path, indexing_function) for i, (autoencoder_path, indexing_function) in enumerate(tqdm(param_combinations, desc="Autoencoder loop", total=len(param_combinations))))
#results_list = [compute_results(i%4, autoencoder_path, indexing_function) for i, (autoencoder_path, indexing_function) in enumerate(tqdm(param_combinations, desc="Autoencoder loop", total=len(param_combinations)))]

#autoencoder_path, indexing_function = param_combinations[1]
#compute_results(0, autoencoder_path, indexing_function)




Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer0/
Indexing function: find_dots_indices




Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer11/
Indexing function: find_dots_indices




Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer10/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer12/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer1/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer2/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer4/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_

Collecting features:   0%|          | 1/8000 [00:01<3:11:14,  1.43s/it]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer9/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer8/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer13/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer5/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer3/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 1161.95it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer15/
Indexing function: find_dots_indices
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer7/
Indexing function: find_dots_indices


Collecting features:  18%|█▊        | 1440/8000 [00:03<00:20, 319.58it/s] 

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer6/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:27<00:00, 293.29it/s] 
Collecting features: 100%|██████████| 8000/8000 [00:27<00:00, 286.61it/s]
Collecting features: 100%|██████████| 8000/8000 [00:31<00:00, 257.22it/s]
Collecting features: 100%|██████████| 8000/8000 [00:30<00:00, 258.11it/s]
Collecting features: 100%|██████████| 8000/8000 [00:30<00:00, 264.81it/s]
Collecting features: 100%|██████████| 8000/8000 [00:30<00:00, 261.40it/s]
Collecting features: 100%|██████████| 8000/8000 [00:30<00:00, 260.99it/s]
Collecting features: 100%|██████████| 8000/8000 [00:30<00:00, 260.44it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 246.52it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 246.12it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 245.84it/s]
Collecting features: 100%|██████████| 8000/8000 [00:30<00:00, 262.05it/s]
Collecting features: 100%|██████████| 8000/8000 [00:32<00:00, 243.51it/s]
Collecting features: 100%|██████████|

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4091 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4094 are alive.


Aggregating statistics:   5%|▍         | 3/64 [00:03<01:03,  1.05s/it]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8137 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4093 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4094 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s].07s/it]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4091 are alive.
do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8150 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s].11s/it]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8139 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4083 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4096 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4092 are alive.
do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8134 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4090 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4087 are alive.
do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8105 are alive.


Aggregating statistics:   2%|▏         | 1/64 [00:02<02:21,  2.25s/it]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8132 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:35<00:00,  3.37s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:35<00:00,  3.37s/it]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 3.20s/it]

[tensor(0.3999, device='cuda:0'), tensor(0.5397, device='cuda:0'), tensor(0.5725, device='cuda:0')]
[tensor(0.7927, device='cuda:0'), tensor(0.5397, device='cuda:0'), tensor(0.5754, device='cuda:0')]
[tensor(0.4388, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.5964, device='cuda:0')]
[tensor(0.6133, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7422, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer16/
Indexing function: find_dots_indices


Aggregating statistics:  53%|█████▎    | 34/64 [03:38<02:58,  5.94s/it]

[tensor(0.3840, device='cuda:3'), tensor(0.8273, device='cuda:3'), tensor(0.8522, device='cuda:3')]
[tensor(0.9707, device='cuda:3'), tensor(0.8273, device='cuda:3'), tensor(0.8564, device='cuda:3')]
[tensor(0.1758, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6536, device='cuda:3')]
[tensor(0.6445, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7943, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer17/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [03:41<00:00,  3.46s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:43<00:00,  3.49s/it]]
Aggregating statistics: 100%|██████████| 64/64 [03:43<00:00,  3.49s/it] 
Aggregating statistics: 100%|██████████| 64/64 [03:42<00:00,  3.48s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:45<00:00,  3.52s/it]s] 
Aggregating statistics: 100%|██████████| 64/64 [03:45<00:00,  3.53s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:46<00:00,  3.54s/it]s]


[tensor(0.4251, device='cuda:0'), tensor(0.8216, device='cuda:0'), tensor(0.8527, device='cuda:0')]
[tensor(0.9712, device='cuda:0'), tensor(0.8216, device='cuda:0'), tensor(0.8559, device='cuda:0')]
[tensor(0.1680, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6849, device='cuda:0')]
[tensor(0.6393, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8021, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer18/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [03:47<00:00,  3.55s/it]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]40.59it/s]

[tensor(0.4191, device='cuda:0'), tensor(0.6771, device='cuda:0'), tensor(0.7049, device='cuda:0')]
[tensor(0.8747, device='cuda:0'), tensor(0.6771, device='cuda:0'), tensor(0.7088, device='cuda:0')]
[tensor(0.3464, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6875, device='cuda:0')]
[tensor(0.6068, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7904, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer19/
Indexing function: find_dots_indices
[tensor(0.4283, device='cuda:3'), tensor(0.6487, device='cuda:3'), tensor(0.6891, device='cuda:3')]
[tensor(0.8629, device='cuda:3'), tensor(0.6487, device='cuda:3'), tensor(0.6927, device='cuda:3')]
[tensor(0.3477, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6771, device='cuda:3')]
[tensor(0.5990, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7812, device='cuda:3')]
Autoencoder

Collecting features:   3%|▎         | 271/8000 [00:10<02:19, 55.52it/s]s]

[tensor(0.4386, device='cuda:2'), tensor(0.7935, device='cuda:2'), tensor(0.8229, device='cuda:2')]
[tensor(0.9549, device='cuda:2'), tensor(0.7935, device='cuda:2'), tensor(0.8288, device='cuda:2')]
[tensor(0.2318, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7422, device='cuda:2')]
[tensor(0.6523, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8112, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer21/
Indexing function: find_dots_indices


Collecting features:   0%|          | 29/8000 [00:02<05:35, 23.73it/s]]]]

[tensor(0.3789, device='cuda:2'), tensor(0.8284, device='cuda:2'), tensor(0.8547, device='cuda:2')]
[tensor(0.9741, device='cuda:2'), tensor(0.8284, device='cuda:2'), tensor(0.8574, device='cuda:2')]
[tensor(0.1341, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6185, device='cuda:2')]
[tensor(0.6354, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7995, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer22/
Indexing function: find_dots_indices


Collecting features:  34%|███▍      | 2719/8000 [00:15<00:34, 152.40it/s]

[tensor(0.4169, device='cuda:1'), tensor(0.5650, device='cuda:1'), tensor(0.6066, device='cuda:1')]
[tensor(0.8289, device='cuda:1'), tensor(0.5650, device='cuda:1'), tensor(0.6100, device='cuda:1')]
[tensor(0.4388, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6198, device='cuda:1')]
[tensor(0.6172, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7578, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer23/
Indexing function: find_dots_indices


Collecting features:  37%|███▋      | 2957/8000 [00:16<00:20, 252.05it/s]

[tensor(0.4358, device='cuda:1'), tensor(0.8145, device='cuda:1'), tensor(0.8476, device='cuda:1')]
[tensor(0.9638, device='cuda:1'), tensor(0.8145, device='cuda:1'), tensor(0.8525, device='cuda:1')]
[tensor(0.2161, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7018, device='cuda:1')]
[tensor(0.6536, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8008, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer24/
Indexing function: find_dots_indices


Aggregating statistics:  59%|█████▉    | 38/64 [03:55<02:00,  4.62s/it]]]

[tensor(0.4301, device='cuda:2'), tensor(0.6109, device='cuda:2'), tensor(0.6469, device='cuda:2')]
[tensor(0.8502, device='cuda:2'), tensor(0.6109, device='cuda:2'), tensor(0.6493, device='cuda:2')]
[tensor(0.3958, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6276, device='cuda:2')]
[tensor(0.6159, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7760, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer25/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:54<00:00, 147.83it/s]
Collecting features: 100%|██████████| 8000/8000 [00:44<00:00, 179.79it/s]
Collecting features: 100%|██████████| 8000/8000 [00:43<00:00, 184.77it/s] 
Collecting features: 100%|██████████| 8000/8000 [00:55<00:00, 143.58it/s]
Collecting features: 100%|██████████| 8000/8000 [00:52<00:00, 151.58it/s] 
Collecting features:  52%|█████▏    | 4150/8000 [01:08<00:48, 79.07it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8129 are alive.


Collecting features:  48%|████▊     | 3833/8000 [00:59<00:57, 72.66it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8045 are alive.


Collecting features:  44%|████▍     | 3504/8000 [00:54<01:29, 50.14it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 460 are alive.


Aggregating statistics: 100%|██████████| 64/64 [04:50<00:00,  4.54s/it]]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 53.04it/s] 

[tensor(0.4580, device='cuda:2'), tensor(0.3598, device='cuda:2'), tensor(0.4050, device='cuda:2')]
[tensor(0.6956, device='cuda:2'), tensor(0.3598, device='cuda:2'), tensor(0.4068, device='cuda:2')]
[tensor(0.5625, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6628, device='cuda:2')]
[tensor(0.6823, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7812, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer26/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [04:58<00:00,  4.66s/it]]
Collecting features:  57%|█████▋    | 4523/8000 [01:10<00:41, 84.07it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 311 are alive.


Collecting features:   9%|▉         | 757/8000 [00:05<00:43, 166.90it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 868 are alive.


Collecting features:  83%|████████▎ | 6658/8000 [01:11<00:25, 52.14it/s] 

[tensor(0.4760, device='cuda:0'), tensor(0.4511, device='cuda:0'), tensor(0.4944, device='cuda:0')]
[tensor(0.7626, device='cuda:0'), tensor(0.4511, device='cuda:0'), tensor(0.4980, device='cuda:0')]
[tensor(0.5221, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7214, device='cuda:0')]
[tensor(0.6836, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8021, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer27/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:27<00:00, 91.49it/s]]
Collecting features: 100%|██████████| 8000/8000 [01:25<00:00, 93.71it/s] 
Collecting features: 100%|██████████| 8000/8000 [00:31<00:00, 256.19it/s] 
Aggregating statistics: 100%|██████████| 64/64 [00:39<00:00,  1.60it/s]]]
Collecting features:  88%|████████▊ | 7030/8000 [01:53<00:21, 45.86it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7981 are alive.


Collecting features:  85%|████████▍ | 6795/8000 [01:44<00:17, 67.06it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 423 are alive.


Collecting features:  52%|█████▏    | 4198/8000 [00:32<00:55, 68.30it/s]

[tensor(0.3196, device='cuda:2'), tensor(0.0935, device='cuda:2'), tensor(0.1109, device='cuda:2')]
[tensor(0.3478, device='cuda:2'), tensor(0.0935, device='cuda:2'), tensor(0.1109, device='cuda:2')]
[tensor(0.1510, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.4141, device='cuda:2')]
[tensor(0.2214, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.4492, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer28/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:38<00:00,  1.66it/s]]
Aggregating statistics:  98%|█████████▊| 63/64 [05:43<00:05,  5.16s/it]]]

[tensor(0.3505, device='cuda:0'), tensor(0.1286, device='cuda:0'), tensor(0.1576, device='cuda:0')]
[tensor(0.4084, device='cuda:0'), tensor(0.1286, device='cuda:0'), tensor(0.1576, device='cuda:0')]
[tensor(0.2214, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.4232, device='cuda:0')]
[tensor(0.2669, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.4701, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated/trainer29/
Indexing function: find_dots_indices


Collecting features:  93%|█████████▎| 7451/8000 [01:55<00:10, 53.77it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 18 are alive.


Collecting features: 100%|██████████| 8000/8000 [02:08<00:00, 62.36it/s]]
Aggregating statistics: 100%|██████████| 64/64 [05:47<00:00,  5.42s/it]s]
Aggregating statistics: 100%|██████████| 64/64 [05:48<00:00,  5.44s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [05:49<00:00,  5.46s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [05:50<00:00,  5.47s/it]s]
Collecting features: 100%|██████████| 8000/8000 [02:01<00:00, 65.90it/s] 
Collecting features: 100%|██████████| 8000/8000 [01:55<00:00, 69.50it/s] 
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 1568.08it/s]

[tensor(0.4248, device='cuda:1'), tensor(0.3326, device='cuda:1'), tensor(0.3703, device='cuda:1')]
[tensor(0.6475, device='cuda:1'), tensor(0.3326, device='cuda:1'), tensor(0.3721, device='cuda:1')]
[tensor(0.5768, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6393, device='cuda:1')]
[tensor(0.6810, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7695, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer0/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:52<00:00,  1.22it/s]/s]
Collecting features: 100%|██████████| 8000/8000 [00:11<00:00, 685.99it/s] 
Collecting features:  48%|████▊     | 3868/8000 [00:20<00:06, 600.56it/s]

[tensor(0.4738, device='cuda:1'), tensor(0.4968, device='cuda:1'), tensor(0.5353, device='cuda:1')]
[tensor(0.7871, device='cuda:1'), tensor(0.4968, device='cuda:1'), tensor(0.5382, device='cuda:1')]
[tensor(0.5260, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7253, device='cuda:1')]
[tensor(0.6914, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8086, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer1/
Indexing function: find_dots_indices


Collecting features:  53%|█████▎    | 4239/8000 [00:21<00:07, 520.80it/s]

[tensor(0.5350, device='cuda:3'), tensor(0.5423, device='cuda:3'), tensor(0.5779, device='cuda:3')]
[tensor(0.9044, device='cuda:3'), tensor(0.5423, device='cuda:3'), tensor(0.5859, device='cuda:3')]
[tensor(0.4076, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7539, device='cuda:3')]
[tensor(0.7135, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8177, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer2/
Indexing function: find_dots_indices


Collecting features:  55%|█████▌    | 4438/8000 [00:23<00:18, 187.65it/s]

[tensor(0.4685, device='cuda:3'), tensor(0.4091, device='cuda:3'), tensor(0.4513, device='cuda:3')]
[tensor(0.7331, device='cuda:3'), tensor(0.4091, device='cuda:3'), tensor(0.4545, device='cuda:3')]
[tensor(0.5560, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7044, device='cuda:3')]
[tensor(0.6823, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7917, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer3/
Indexing function: find_dots_indices


Aggregating statistics:  52%|█████▏    | 33/64 [00:25<00:19,  1.56it/s]s]

[tensor(0.4505, device='cuda:0'), tensor(0.1901, device='cuda:0'), tensor(0.2350, device='cuda:0')]
[tensor(0.5426, device='cuda:0'), tensor(0.1901, device='cuda:0'), tensor(0.2362, device='cuda:0')]
[tensor(0.2878, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6380, device='cuda:0')]
[tensor(0.3776, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6966, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer4/
Indexing function: find_dots_indices


Collecting features:  66%|██████▌   | 5257/8000 [00:28<00:21, 125.64it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8085 are alive.


Collecting features:  89%|████████▉ | 7111/8000 [01:03<00:06, 134.23it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 14 are alive.


Collecting features:  14%|█▍        | 1135/8000 [00:11<01:06, 102.51it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 33 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:09<00:00, 853.10it/s]
Aggregating statistics:  81%|████████▏ | 52/64 [00:37<00:07,  1.60it/s]s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 28 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:09<00:00, 115.16it/s]
Aggregating statistics: 100%|██████████| 64/64 [00:29<00:00,  2.18it/s]s]
Aggregating statistics:  33%|███▎      | 21/64 [00:09<00:18,  2.27it/s]s]

[tensor(0.1111, device='cuda:2'), tensor(0.1111, device='cuda:2'), tensor(0.1111, device='cuda:2')]
[tensor(0.1111, device='cuda:2'), tensor(0.1111, device='cuda:2'), tensor(0.1111, device='cuda:2')]
[tensor(0.0508, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2539, device='cuda:2')]
[tensor(0.0508, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2617, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer5/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:43<00:00, 183.93it/s]
Aggregating statistics: 100%|██████████| 64/64 [00:45<00:00,  1.41it/s]]]
Collecting features:  37%|███▋      | 2994/8000 [00:25<00:35, 142.89it/s]

[tensor(0.2530, device='cuda:3'), tensor(0.0733, device='cuda:3'), tensor(0.0898, device='cuda:3')]
[tensor(0.2600, device='cuda:3'), tensor(0.0733, device='cuda:3'), tensor(0.0898, device='cuda:3')]
[tensor(0.1107, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.3255, device='cuda:3')]
[tensor(0.1263, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.3594, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer6/
Indexing function: find_dots_indices


Aggregating statistics:  61%|██████    | 39/64 [00:17<00:12,  2.05it/s]s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4047 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]164.74it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 32 are alive.


Collecting features:  84%|████████▍ | 6730/8000 [00:38<00:14, 89.29it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7 are alive.


Aggregating statistics: 100%|██████████| 64/64 [00:30<00:00,  2.11it/s]s]
Aggregating statistics: 100%|██████████| 64/64 [00:31<00:00,  2.05it/s]s]
Collecting features: 100%|██████████| 8000/8000 [00:45<00:00, 177.25it/s]
Aggregating statistics:  91%|█████████ | 58/64 [00:28<00:03,  1.96it/s]s]

[tensor(0.1429, device='cuda:1'), tensor(0.2143, device='cuda:1'), tensor(0.2143, device='cuda:1')]
[tensor(0.2143, device='cuda:1'), tensor(0.2143, device='cuda:1'), tensor(0.2143, device='cuda:1')]
[tensor(0.0326, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2539, device='cuda:1')]
[tensor(0.0443, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2682, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer7/
Indexing function: find_dots_indices


Collecting features:  97%|█████████▋| 7746/8000 [00:44<00:02, 117.38it/s]

[tensor(0.1212, device='cuda:1'), tensor(0.1515, device='cuda:1'), tensor(0.1515, device='cuda:1')]
[tensor(0.1212, device='cuda:1'), tensor(0.1515, device='cuda:1'), tensor(0.1515, device='cuda:1')]
[tensor(0.0417, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2539, device='cuda:1')]
[tensor(0.0417, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2682, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer8/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:45<00:00, 175.90it/s]
Aggregating statistics: 100%|██████████| 64/64 [00:31<00:00,  2.02it/s]]]
Aggregating statistics:  50%|█████     | 32/64 [00:14<00:14,  2.16it/s]s]

[tensor(0.3571, device='cuda:1'), tensor(0.1071, device='cuda:1'), tensor(0.1071, device='cuda:1')]
[tensor(0.3929, device='cuda:1'), tensor(0.1071, device='cuda:1'), tensor(0.1071, device='cuda:1')]
[tensor(0.0573, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2617, device='cuda:1')]
[tensor(0.0573, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2643, device='cuda:1')]


Collecting features: 100%|██████████| 8000/8000 [00:49<00:00, 162.97it/s] 
Collecting features: 100%|██████████| 8000/8000 [00:57<00:00, 139.89it/s]]
Collecting features:  76%|███████▌  | 6085/8000 [00:38<00:02, 933.36it/s] 

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer9/
Indexing function: find_dots_indices


Aggregating statistics:  69%|██████▉   | 44/64 [00:20<00:09,  2.20it/s]s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4050 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:35<00:00, 226.17it/s] 
Collecting features:   4%|▍         | 312/8000 [00:04<00:53, 144.56it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4063 are alive.


Aggregating statistics: 100%|██████████| 64/64 [00:30<00:00,  2.12it/s]]]
Collecting features:  95%|█████████▌| 7624/8000 [00:47<00:01, 192.33it/s]]

[tensor(0.1250, device='cuda:3'), tensor(0.0938, device='cuda:3'), tensor(0.0938, device='cuda:3')]
[tensor(0.1562, device='cuda:3'), tensor(0.0938, device='cuda:3'), tensor(0.0938, device='cuda:3')]
[tensor(0.0651, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.2760, device='cuda:3')]
[tensor(0.0924, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.3125, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer10/
Indexing function: find_dots_indices


Collecting features:  97%|█████████▋| 7732/8000 [00:48<00:02, 133.74it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4051 are alive.


Aggregating statistics: 100%|██████████| 64/64 [00:29<00:00,  2.16it/s]s]
Collecting features: 100%|██████████| 8000/8000 [00:48<00:00, 163.39it/s]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 140.90it/s]

[tensor(0., device='cuda:0'), tensor(0.1429, device='cuda:0'), tensor(0.1429, device='cuda:0')]
[tensor(0.1429, device='cuda:0'), tensor(0.1429, device='cuda:0'), tensor(0.1429, device='cuda:0')]
[tensor(0., device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2539, device='cuda:0')]
[tensor(0.0143, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2617, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer11/
Indexing function: find_dots_indices


Collecting features:  28%|██▊       | 2257/8000 [00:14<00:45, 127.38it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4050 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]55.56it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4073 are alive.


Collecting features:  71%|███████   | 5668/8000 [00:40<00:27, 84.35it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4067 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:01<00:00,  2.84s/it]]]
Collecting features:  49%|████▉     | 3932/8000 [00:49<00:46, 87.50it/s]]

[tensor(0.5127, device='cuda:2'), tensor(0.6014, device='cuda:2'), tensor(0.6327, device='cuda:2')]
[tensor(0.8878, device='cuda:2'), tensor(0.6014, device='cuda:2'), tensor(0.6399, device='cuda:2')]
[tensor(0.4141, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7695, device='cuda:2')]
[tensor(0.7201, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8164, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer12/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:19<00:00, 100.88it/s]
Collecting features: 100%|██████████| 8000/8000 [01:21<00:00, 98.59it/s]
Aggregating statistics: 100%|██████████| 64/64 [03:26<00:00,  3.23s/it]s]
Collecting features:  98%|█████████▊| 7842/8000 [01:21<00:01, 92.40it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4078 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4072 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:24<00:00, 94.37it/s] 
Collecting features:  84%|████████▍ | 6744/8000 [01:17<00:08, 140.72it/s]

[tensor(0.5356, device='cuda:0'), tensor(0.5555, device='cuda:0'), tensor(0.5907, device='cuda:0')]
[tensor(0.9016, device='cuda:0'), tensor(0.5555, device='cuda:0'), tensor(0.5979, device='cuda:0')]
[tensor(0.4102, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7500, device='cuda:0')]
[tensor(0.7135, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8112, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer13/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:20<00:00, 99.64it/s] 
Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]54.62it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4078 are alive.


Collecting features:  92%|█████████▏| 7372/8000 [01:31<00:12, 50.90it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7596 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:25<00:00,  2.28s/it]]
Collecting features: 100%|██████████| 8000/8000 [01:42<00:00, 77.70it/s]
Collecting features:  21%|██        | 1678/8000 [00:35<03:01, 34.91it/s]

[tensor(0.3225, device='cuda:1'), tensor(0.2459, device='cuda:1'), tensor(0.2728, device='cuda:1')]
[tensor(0.4947, device='cuda:1'), tensor(0.2459, device='cuda:1'), tensor(0.2730, device='cuda:1')]
[tensor(0.3984, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.5378, device='cuda:1')]
[tensor(0.5117, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6862, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer14/
Indexing function: find_dots_indices


Collecting features:  57%|█████▋    | 4528/8000 [01:10<01:07, 51.06it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7684 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:39<00:00,  2.49s/it]] 
Collecting features:  41%|████▏     | 3318/8000 [01:18<01:56, 40.02it/s]]

[tensor(0.2800, device='cuda:2'), tensor(0.3175, device='cuda:2'), tensor(0.3385, device='cuda:2')]
[tensor(0.5267, device='cuda:2'), tensor(0.3175, device='cuda:2'), tensor(0.3393, device='cuda:2')]
[tensor(0.3945, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.5286, device='cuda:2')]
[tensor(0.5404, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6927, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer15/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:42<00:00,  2.53s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [02:38<00:00,  2.48s/it]]]
Collecting features: 100%|██████████| 8000/8000 [01:47<00:00, 74.62it/s]  
Collecting features:  38%|███▊      | 3059/8000 [00:49<00:24, 205.55it/s]

[tensor(0.3607, device='cuda:2'), tensor(0.2333, device='cuda:2'), tensor(0.2659, device='cuda:2')]
[tensor(0.5246, device='cuda:2'), tensor(0.2333, device='cuda:2'), tensor(0.2664, device='cuda:2')]
[tensor(0.4076, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.5430, device='cuda:2')]
[tensor(0.5182, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6940, device='cuda:2')]


Collecting features:  44%|████▍     | 3551/8000 [01:24<02:03, 35.96it/s]

[tensor(0.2998, device='cuda:0'), tensor(0.2656, device='cuda:0'), tensor(0.2897, device='cuda:0')]
[tensor(0.4913, device='cuda:0'), tensor(0.2656, device='cuda:0'), tensor(0.2899, device='cuda:0')]
[tensor(0.4062, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.5156, device='cuda:0')]
[tensor(0.5208, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6875, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer16/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:42<00:00,  2.53s/it]]] 
Collecting features:  10%|█         | 814/8000 [00:17<02:11, 54.77it/s]]]]

[tensor(0.3317, device='cuda:0'), tensor(0.3079, device='cuda:0'), tensor(0.3376, device='cuda:0')]
[tensor(0.5610, device='cuda:0'), tensor(0.3079, device='cuda:0'), tensor(0.3383, device='cuda:0')]
[tensor(0.4180, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.5612, device='cuda:0')]
[tensor(0.5443, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7109, device='cuda:0')]


Collecting features:  11%|█▏        | 907/8000 [00:19<02:32, 46.40it/s]]s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer17/
Indexing function: find_dots_indices


Collecting features:  51%|█████▏    | 4118/8000 [01:39<02:08, 30.14it/s]] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7790 are alive.


Aggregating statistics: 100%|██████████| 64/64 [04:30<00:00,  4.23s/it]]]
Collecting features: 100%|██████████| 8000/8000 [00:17<00:00, 445.74it/s]
Collecting features: 100%|██████████| 8000/8000 [01:10<00:00, 113.39it/s]
Collecting features:  56%|█████▋    | 4509/8000 [01:49<01:13, 47.64it/s]

[tensor(0.4966, device='cuda:3'), tensor(0.6148, device='cuda:3'), tensor(0.6479, device='cuda:3')]
[tensor(0.8842, device='cuda:3'), tensor(0.6148, device='cuda:3'), tensor(0.6537, device='cuda:3')]
[tensor(0.4375, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7812, device='cuda:3')]
[tensor(0.7318, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8164, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer19/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]46.77it/s]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer18/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [04:13<00:00,  3.96s/it]]]
Collecting features:  25%|██▌       | 2014/8000 [00:40<01:21, 73.45it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7958 are alive.


Collecting features:  65%|██████▍   | 5169/8000 [02:02<01:01, 46.08it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7849 are alive.


Collecting features:  65%|██████▌   | 5216/8000 [02:02<00:43, 63.35it/s] 

[tensor(0.5365, device='cuda:1'), tensor(0.5790, device='cuda:1'), tensor(0.6172, device='cuda:1')]
[tensor(0.8983, device='cuda:1'), tensor(0.5790, device='cuda:1'), tensor(0.6257, device='cuda:1')]
[tensor(0.4023, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7656, device='cuda:1')]
[tensor(0.7148, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8177, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer20/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [03:16<00:00,  3.07s/it]]]
Collecting features:  71%|███████   | 5645/8000 [02:09<00:32, 71.65it/s] 

[tensor(0.2919, device='cuda:3'), tensor(0.2867, device='cuda:3'), tensor(0.3131, device='cuda:3')]
[tensor(0.5091, device='cuda:3'), tensor(0.2867, device='cuda:3'), tensor(0.3136, device='cuda:3')]
[tensor(0.3828, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.5143, device='cuda:3')]
[tensor(0.5117, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6875, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer21/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [03:18<00:00,  3.10s/it]]]
Collecting features: 100%|██████████| 8000/8000 [00:31<00:00, 256.25it/s]
Aggregating statistics: 100%|██████████| 64/64 [02:26<00:00,  2.29s/it]] 
Collecting features:  86%|████████▌ | 6870/8000 [02:23<00:03, 283.99it/s]

[tensor(0.3137, device='cuda:3'), tensor(0.3317, device='cuda:3'), tensor(0.3607, device='cuda:3')]
[tensor(0.5707, device='cuda:3'), tensor(0.3317, device='cuda:3'), tensor(0.3619, device='cuda:3')]
[tensor(0.3958, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.5651, device='cuda:3')]
[tensor(0.5260, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7096, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer22/
Indexing function: find_dots_indices


Collecting features:  38%|███▊      | 3048/8000 [00:46<00:20, 243.03it/s]

[tensor(0.3779, device='cuda:2'), tensor(0.2716, device='cuda:2'), tensor(0.3035, device='cuda:2')]
[tensor(0.5661, device='cuda:2'), tensor(0.2716, device='cuda:2'), tensor(0.3043, device='cuda:2')]
[tensor(0.4036, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.5664, device='cuda:2')]
[tensor(0.5221, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7044, device='cuda:2')]


Collecting features: 100%|██████████| 8000/8000 [02:29<00:00, 53.50it/s] 
Collecting features:  63%|██████▎   | 5075/8000 [01:16<00:29, 99.21it/s] 

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer23/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:40<00:00,  2.51s/it]]]
Collecting features:  65%|██████▍   | 5188/8000 [01:18<00:47, 59.25it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7986 are alive.


Collecting features:  15%|█▍        | 1183/8000 [00:18<02:25, 46.73it/s]]

[tensor(0.3404, device='cuda:1'), tensor(0.2869, device='cuda:1'), tensor(0.3195, device='cuda:1')]
[tensor(0.5459, device='cuda:1'), tensor(0.2869, device='cuda:1'), tensor(0.3207, device='cuda:1')]
[tensor(0.3932, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.5365, device='cuda:1')]
[tensor(0.5052, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6862, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer24/
Indexing function: find_dots_indices


Collecting features:  14%|█▍        | 1130/8000 [00:10<01:03, 108.85it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7835 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:15<00:00, 106.60it/s]
Collecting features: 100%|██████████| 8000/8000 [01:38<00:00, 81.45it/s] 
Aggregating statistics: 100%|██████████| 64/64 [02:47<00:00,  2.62s/it]]]
Collecting features: 100%|██████████| 8000/8000 [00:54<00:00, 146.04it/s] 
Collecting features: 100%|██████████| 8000/8000 [01:16<00:00, 104.56it/s]


[tensor(0.4183, device='cuda:3'), tensor(0.2680, device='cuda:3'), tensor(0.3109, device='cuda:3')]
[tensor(0.5988, device='cuda:3'), tensor(0.2680, device='cuda:3'), tensor(0.3124, device='cuda:3')]
[tensor(0.4466, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.5938, device='cuda:3')]
[tensor(0.5573, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7148, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer25/
Indexing function: find_dots_indices


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]340.03it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7979 are alive.


Collecting features:  26%|██▋       | 2114/8000 [00:32<01:41, 58.10it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7875 are alive.


Collecting features:  63%|██████▎   | 5036/8000 [01:17<00:40, 73.57it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4081 are alive.


Collecting features:  65%|██████▌   | 5206/8000 [01:19<00:42, 65.88it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7985 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:57<00:00, 139.00it/s]
Collecting features:  56%|█████▋    | 4508/8000 [01:26<01:15, 46.22it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4059 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:58<00:00, 67.74it/s] 
Collecting features:  62%|██████▏   | 4999/8000 [01:11<01:03, 47.20it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4088 are alive.


Aggregating statistics: 100%|██████████| 64/64 [04:12<00:00,  3.94s/it]]
Aggregating statistics:  52%|█████▏    | 33/64 [02:32<02:30,  4.84s/it]]

[tensor(0.2216, device='cuda:0'), tensor(0.1768, device='cuda:0'), tensor(0.1959, device='cuda:0')]
[tensor(0.3585, device='cuda:0'), tensor(0.1768, device='cuda:0'), tensor(0.1960, device='cuda:0')]
[tensor(0.4388, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.5169, device='cuda:0')]
[tensor(0.5234, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6966, device='cuda:0')]


Collecting features: 100%|██████████| 8000/8000 [02:15<00:00, 58.86it/s]  
Collecting features: 100%|██████████| 8000/8000 [01:58<00:00, 67.41it/s]
Collecting features:  82%|████████▏ | 6542/8000 [01:40<00:17, 82.78it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer26/
Indexing function: find_dots_indices


Collecting features:  86%|████████▋ | 6901/8000 [01:49<00:23, 46.52it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4063 are alive.


Aggregating statistics:  34%|███▍      | 22/64 [01:46<03:34,  5.10s/it]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4053 are alive.


Collecting features: 100%|██████████| 8000/8000 [02:09<00:00, 61.58it/s] 
Aggregating statistics: 100%|██████████| 64/64 [04:37<00:00,  4.33s/it]]
Aggregating statistics:  70%|███████   | 45/64 [03:25<01:32,  4.88s/it]]

[tensor(0.2266, device='cuda:1'), tensor(0.1581, device='cuda:1'), tensor(0.1786, device='cuda:1')]
[tensor(0.3446, device='cuda:1'), tensor(0.1581, device='cuda:1'), tensor(0.1788, device='cuda:1')]
[tensor(0.4297, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.5065, device='cuda:1')]
[tensor(0.5221, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6875, device='cuda:1')]


Aggregating statistics:  72%|███████▏  | 46/64 [02:11<00:50,  2.80s/it]s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4089 are alive.


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 58.34it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer27/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [04:39<00:00,  4.36s/it]] 
Collecting features:  34%|███▍      | 2742/8000 [00:44<01:47, 48.70it/s]

[tensor(0.2451, device='cuda:2'), tensor(0.1442, device='cuda:2'), tensor(0.1653, device='cuda:2')]
[tensor(0.3497, device='cuda:2'), tensor(0.1442, device='cuda:2'), tensor(0.1657, device='cuda:2')]
[tensor(0.4284, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.5104, device='cuda:2')]
[tensor(0.5065, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6927, device='cuda:2')]


Collecting features:  95%|█████████▌| 7612/8000 [01:48<00:08, 47.18it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer28/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [03:15<00:00,  3.05s/it]]
Collecting features:  47%|████▋     | 3789/8000 [01:02<01:26, 48.68it/s]

[tensor(0.4906, device='cuda:3'), tensor(0.3313, device='cuda:3'), tensor(0.3700, device='cuda:3')]
[tensor(0.7599, device='cuda:3'), tensor(0.3313, device='cuda:3'), tensor(0.3705, device='cuda:3')]
[tensor(0.4766, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6237, device='cuda:3')]
[tensor(0.6458, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7734, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer29/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:55<00:00, 69.35it/s]
Aggregating statistics: 100%|██████████| 64/64 [04:55<00:00,  4.61s/it]]] 
Collecting features:  58%|█████▊    | 4623/8000 [01:19<01:09, 48.59it/s]]

[tensor(0.2752, device='cuda:2'), tensor(0.1711, device='cuda:2'), tensor(0.1941, device='cuda:2')]
[tensor(0.3973, device='cuda:2'), tensor(0.1711, device='cuda:2'), tensor(0.1945, device='cuda:2')]
[tensor(0.4974, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.5430, device='cuda:2')]
[tensor(0.5807, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6953, device='cuda:2')]


Collecting features:  19%|█▉        | 1502/8000 [00:17<01:36, 67.12it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4085 are alive.


Aggregating statistics: 100%|██████████| 64/64 [04:59<00:00,  4.68s/it]]]
Collecting features:  24%|██▍       | 1910/8000 [00:25<01:54, 53.08it/s]]

[tensor(0.2948, device='cuda:0'), tensor(0.1303, device='cuda:0'), tensor(0.1593, device='cuda:0')]
[tensor(0.3818, device='cuda:0'), tensor(0.1303, device='cuda:0'), tensor(0.1594, device='cuda:0')]
[tensor(0.4818, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.5729, device='cuda:0')]
[tensor(0.5703, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7174, device='cuda:0')]


Collecting features:  65%|██████▌   | 5234/8000 [01:31<00:51, 54.19it/s]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer30/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [03:21<00:00,  3.14s/it]]]
Collecting features:   3%|▎         | 209/8000 [00:06<01:47, 72.67it/s]]]

[tensor(0.4550, device='cuda:1'), tensor(0.3003, device='cuda:1'), tensor(0.3311, device='cuda:1')]
[tensor(0.6911, device='cuda:1'), tensor(0.3003, device='cuda:1'), tensor(0.3316, device='cuda:1')]
[tensor(0.5026, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6784, device='cuda:1')]
[tensor(0.6510, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7839, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer32/
Indexing function: find_dots_indices


Collecting features:  73%|███████▎  | 5808/8000 [01:39<00:27, 78.62it/s]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer31/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [03:05<00:00,  2.90s/it]]]
Collecting features:  37%|███▋      | 2993/8000 [00:44<01:35, 52.70it/s]]

[tensor(0.5421, device='cuda:2'), tensor(0.3733, device='cuda:2'), tensor(0.4166, device='cuda:2')]
[tensor(0.8469, device='cuda:2'), tensor(0.3733, device='cuda:2'), tensor(0.4181, device='cuda:2')]
[tensor(0.4674, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6146, device='cuda:2')]
[tensor(0.6536, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7721, device='cuda:2')]


Collecting features: 100%|██████████| 8000/8000 [01:01<00:00, 130.02it/s]
Aggregating statistics:  98%|█████████▊| 63/64 [02:45<00:01,  1.40s/it]]s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer33/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:46<00:00,  2.60s/it]]] 
Collecting features: 100%|██████████| 8000/8000 [00:27<00:00, 293.16it/s] 
Collecting features:  52%|█████▏    | 4137/8000 [01:06<01:05, 58.64it/s]

[tensor(0.4954, device='cuda:2'), tensor(0.3257, device='cuda:2'), tensor(0.3676, device='cuda:2')]
[tensor(0.7404, device='cuda:2'), tensor(0.3257, device='cuda:2'), tensor(0.3681, device='cuda:2')]
[tensor(0.5039, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7643, device='cuda:2')]
[tensor(0.6315, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8021, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer34/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [02:10<00:00, 61.43it/s]
Collecting features:  29%|██▉       | 2352/8000 [00:42<02:05, 44.89it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4061 are alive.


Aggregating statistics: 100%|██████████| 64/64 [05:16<00:00,  4.95s/it]] 
Aggregating statistics: 100%|██████████| 64/64 [05:10<00:00,  4.86s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [03:02<00:00,  2.85s/it]s]
Collecting features:  36%|███▌      | 2888/8000 [00:42<01:02, 81.35it/s]]

[tensor(0.4585, device='cuda:0'), tensor(0.3160, device='cuda:0'), tensor(0.3495, device='cuda:0')]
[tensor(0.7115, device='cuda:0'), tensor(0.3160, device='cuda:0'), tensor(0.3500, device='cuda:0')]
[tensor(0.4948, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6536, device='cuda:0')]
[tensor(0.6367, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7786, device='cuda:0')]
do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7529 are alive.


Aggregating statistics:  88%|████████▊ | 56/64 [04:51<00:38,  4.86s/it]s]

[tensor(0.3183, device='cuda:0'), tensor(0.1546, device='cuda:0'), tensor(0.1888, device='cuda:0')]
[tensor(0.4194, device='cuda:0'), tensor(0.1546, device='cuda:0'), tensor(0.1890, device='cuda:0')]
[tensor(0.4505, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.5885, device='cuda:0')]
[tensor(0.5534, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7148, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer35/
Indexing function: find_dots_indices


Collecting features:   1%|          | 99/8000 [00:03<02:09, 60.87it/s]s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4071 are alive.


Collecting features:  17%|█▋        | 1389/8000 [00:24<01:33, 71.01it/s]]

[tensor(0.2560, device='cuda:3'), tensor(0.1303, device='cuda:3'), tensor(0.1555, device='cuda:3')]
[tensor(0.3484, device='cuda:3'), tensor(0.1303, device='cuda:3'), tensor(0.1557, device='cuda:3')]
[tensor(0.4844, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.5195, device='cuda:3')]
[tensor(0.5352, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6979, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer37/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 122.31it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer36/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:11<00:00, 111.29it/s]
Collecting features: 100%|██████████| 8000/8000 [00:38<00:00, 209.42it/s] 
Aggregating statistics: 100%|██████████| 64/64 [05:17<00:00,  4.96s/it]]] 
Aggregating statistics: 100%|██████████| 64/64 [05:23<00:00,  5.05s/it]] 
Collecting features:  37%|███▋      | 2923/8000 [00:26<00:58, 86.96it/s]]

[tensor(0.2602, device='cuda:1'), tensor(0.1872, device='cuda:1'), tensor(0.2076, device='cuda:1')]
[tensor(0.3964, device='cuda:1'), tensor(0.1872, device='cuda:1'), tensor(0.2079, device='cuda:1')]
[tensor(0.4753, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.5339, device='cuda:1')]
[tensor(0.5703, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7005, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer38/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:56<00:00, 68.81it/s] 
Collecting features:  63%|██████▎   | 5004/8000 [01:20<00:33, 90.61it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7879 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:05<00:00,  2.90s/it]s]
Collecting features:  64%|██████▍   | 5148/8000 [01:22<00:53, 53.65it/s]]

[tensor(0.2934, device='cuda:3'), tensor(0.1570, device='cuda:3'), tensor(0.1840, device='cuda:3')]
[tensor(0.4012, device='cuda:3'), tensor(0.1570, device='cuda:3'), tensor(0.1841, device='cuda:3')]
[tensor(0.4727, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.5391, device='cuda:3')]
[tensor(0.5560, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6901, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer39/
Indexing function: find_dots_indices


Collecting features:   0%|          | 15/8000 [00:02<13:26,  9.90it/s]s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7552 are alive.


Aggregating statistics:  22%|██▏       | 14/64 [00:37<02:07,  2.55s/it]]]

[tensor(0.5571, device='cuda:3'), tensor(0.3788, device='cuda:3'), tensor(0.4290, device='cuda:3')]
[tensor(0.8721, device='cuda:3'), tensor(0.3788, device='cuda:3'), tensor(0.4314, device='cuda:3')]
[tensor(0.4336, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6159, device='cuda:3')]
[tensor(0.6393, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7708, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer40/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [05:22<00:00,  5.03s/it]]]
Collecting features: 100%|██████████| 8000/8000 [01:04<00:00, 123.25it/s]
Collecting features: 100%|██████████| 8000/8000 [00:41<00:00, 194.12it/s] 
Collecting features:  52%|█████▏    | 4148/8000 [00:37<00:44, 86.56it/s]]s]

[tensor(0.3560, device='cuda:1'), tensor(0.1527, device='cuda:1'), tensor(0.1889, device='cuda:1')]
[tensor(0.4518, device='cuda:1'), tensor(0.1527, device='cuda:1'), tensor(0.1895, device='cuda:1')]
[tensor(0.4987, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6094, device='cuda:1')]
[tensor(0.5911, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7474, device='cuda:1')]


Collecting features:  18%|█▊        | 1440/8000 [00:16<02:04, 52.60it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4078 are alive.


Collecting features:  57%|█████▋    | 4548/8000 [00:57<00:20, 172.36it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer41/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:43<00:00, 77.23it/s] 
Collecting features:  36%|███▌      | 2899/8000 [00:21<00:16, 302.69it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7653 are alive.


Collecting features:   9%|▉         | 709/8000 [00:03<00:32, 224.72it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7834 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:13<00:00, 108.91it/s]
Aggregating statistics: 100%|██████████| 64/64 [02:16<00:00,  2.14s/it]]
Collecting features:  83%|████████▎ | 6612/8000 [01:06<00:16, 86.64it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7688 are alive.


Collecting features:  84%|████████▍ | 6709/8000 [01:08<00:25, 50.46it/s]]

[tensor(0.5021, device='cuda:0'), tensor(0.3520, device='cuda:0'), tensor(0.3919, device='cuda:0')]
[tensor(0.7890, device='cuda:0'), tensor(0.3520, device='cuda:0'), tensor(0.3934, device='cuda:0')]
[tensor(0.4583, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6146, device='cuda:0')]
[tensor(0.6172, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7656, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer42/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:44<00:00, 180.47it/s]
Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]58.53it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7987 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:22<00:00, 97.36it/s] 
Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]68.28it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7820 are alive.


Collecting features:  70%|███████   | 5614/8000 [01:12<00:36, 64.78it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7857 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:09<00:00,  2.03s/it]] 
Collecting features: 100%|██████████| 8000/8000 [01:23<00:00, 95.39it/s]  
Collecting features:  87%|████████▋ | 6985/8000 [01:31<00:15, 66.42it/s]

[tensor(0.4821, device='cuda:2'), tensor(0.3095, device='cuda:2'), tensor(0.3430, device='cuda:2')]
[tensor(0.7252, device='cuda:2'), tensor(0.3095, device='cuda:2'), tensor(0.3430, device='cuda:2')]
[tensor(0.5026, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6549, device='cuda:2')]
[tensor(0.6224, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7812, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer43/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:45<00:00, 75.96it/s] 
Collecting features:  57%|█████▋    | 4552/8000 [01:03<00:46, 73.76it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 19 are alive.


Collecting features:  13%|█▎        | 1011/8000 [00:30<03:19, 35.00it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7870 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:39<00:00,  2.50s/it]] 
Aggregating statistics: 100%|██████████| 64/64 [00:31<00:00,  2.05it/s]]
Collecting features:  81%|████████  | 6499/8000 [01:56<00:27, 55.59it/s]

[tensor(0.4741, device='cuda:1'), tensor(0.3245, device='cuda:1'), tensor(0.3670, device='cuda:1')]
[tensor(0.7327, device='cuda:1'), tensor(0.3245, device='cuda:1'), tensor(0.3680, device='cuda:1')]
[tensor(0.4831, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6471, device='cuda:1')]
[tensor(0.6380, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7747, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer44/
Indexing function: find_dots_indices


Collecting features:  86%|████████▌ | 6886/8000 [01:40<00:23, 47.69it/s]

[tensor(0.3158, device='cuda:2'), tensor(0.1053, device='cuda:2'), tensor(0.1053, device='cuda:2')]
[tensor(0.3158, device='cuda:2'), tensor(0.1053, device='cuda:2'), tensor(0.1053, device='cuda:2')]
[tensor(0.0586, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2539, device='cuda:2')]
[tensor(0.0625, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2539, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer45/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [02:02<00:00, 65.48it/s] 
Collecting features: 100%|██████████| 8000/8000 [02:25<00:00, 55.07it/s] 
Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]5.19s/it]] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 64 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:38<00:00,  3.42s/it]]
Collecting features:  27%|██▋       | 2188/8000 [00:41<02:09, 44.95it/s]s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 39 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:53<00:00,  2.71s/it]
Collecting features:  31%|███       | 2484/8000 [00:47<02:05, 43.91it/s]]

[tensor(0.3869, device='cuda:2'), tensor(0.1853, device='cuda:2'), tensor(0.2088, device='cuda:2')]
[tensor(0.5322, device='cuda:2'), tensor(0.1853, device='cuda:2'), tensor(0.2095, device='cuda:2')]
[tensor(0.5755, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7135, device='cuda:2')]
[tensor(0.6706, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7956, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer46/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:56<00:00, 141.28it/s]
Collecting features:  57%|█████▋    | 4534/8000 [01:43<01:21, 42.60it/s]

[tensor(0.5449, device='cuda:3'), tensor(0.3443, device='cuda:3'), tensor(0.3892, device='cuda:3')]
[tensor(0.7933, device='cuda:3'), tensor(0.3443, device='cuda:3'), tensor(0.3901, device='cuda:3')]
[tensor(0.4974, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7500, device='cuda:3')]
[tensor(0.6680, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8008, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer47/
Indexing function: find_dots_indices


Collecting features:  10%|█         | 812/8000 [00:20<02:48, 42.67it/s]] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 28 are alive.


Aggregating statistics: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]]
Aggregating statistics: 100%|██████████| 64/64 [00:37<00:00,  1.70it/s]]
Aggregating statistics:  66%|██████▌   | 42/64 [03:16<01:24,  3.83s/it]]

[tensor(0.1026, device='cuda:3'), tensor(0.0769, device='cuda:3'), tensor(0.1026, device='cuda:3')]
[tensor(0.1026, device='cuda:3'), tensor(0.0769, device='cuda:3'), tensor(0.1026, device='cuda:3')]
[tensor(0.0521, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.2617, device='cuda:3')]
[tensor(0.0560, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.3047, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer48/
Indexing function: find_dots_indices


Collecting features:  15%|█▍        | 1162/8000 [00:26<02:31, 45.00it/s]

[tensor(0.0625, device='cuda:0'), tensor(0.0625, device='cuda:0'), tensor(0.0625, device='cuda:0')]
[tensor(0.0938, device='cuda:0'), tensor(0.0625, device='cuda:0'), tensor(0.0625, device='cuda:0')]
[tensor(0.0469, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2591, device='cuda:0')]
[tensor(0.0521, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2708, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer49/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:29<00:00,  2.19it/s]]]
Collecting features:  86%|████████▌ | 6868/8000 [02:34<00:25, 44.66it/s]]

[tensor(0.2500, device='cuda:2'), tensor(0.2500, device='cuda:2'), tensor(0.2500, device='cuda:2')]
[tensor(0.2857, device='cuda:2'), tensor(0.2500, device='cuda:2'), tensor(0.2500, device='cuda:2')]
[tensor(0.0417, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2617, device='cuda:2')]
[tensor(0.0469, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2643, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer50/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:25<00:00, 310.59it/s]
Aggregating statistics: 100%|██████████| 64/64 [02:59<00:00,  2.80s/it]] 
Collecting features:  94%|█████████▍| 7516/8000 [02:46<00:09, 51.69it/s] 

[tensor(0.4327, device='cuda:2'), tensor(0.1956, device='cuda:2'), tensor(0.2179, device='cuda:2')]
[tensor(0.5909, device='cuda:2'), tensor(0.1956, device='cuda:2'), tensor(0.2185, device='cuda:2')]
[tensor(0.5703, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6979, device='cuda:2')]
[tensor(0.6888, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7956, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer51/
Indexing function: find_dots_indices


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s].43it/s]s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 520 are alive.


Collecting features: 100%|██████████| 8000/8000 [02:05<00:00, 63.68it/s] 
Collecting features: 100%|██████████| 8000/8000 [02:57<00:00, 44.99it/s]
Collecting features:  24%|██▍       | 1918/8000 [00:39<02:08, 47.44it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 238 are alive.


Aggregating statistics:   2%|▏         | 1/64 [00:00<00:31,  2.03it/s]s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 553 are alive.


Aggregating statistics: 100%|██████████| 64/64 [04:47<00:00,  4.50s/it]]
Aggregating statistics: 100%|██████████| 64/64 [00:31<00:00,  2.03it/s]]
Aggregating statistics: 100%|██████████| 64/64 [04:56<00:00,  4.63s/it]] 
Collecting features:  60%|█████▉    | 4797/8000 [01:41<01:30, 35.42it/s]

[tensor(0.4635, device='cuda:0'), tensor(0.2168, device='cuda:0'), tensor(0.2581, device='cuda:0')]
[tensor(0.6197, device='cuda:0'), tensor(0.2168, device='cuda:0'), tensor(0.2591, device='cuda:0')]
[tensor(0.5951, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8073, device='cuda:0')]
[tensor(0.7057, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8190, device='cuda:0')]


Collecting features:  24%|██▍       | 1902/8000 [00:40<02:06, 48.19it/s]]

[tensor(0.3365, device='cuda:2'), tensor(0.1058, device='cuda:2'), tensor(0.1385, device='cuda:2')]
[tensor(0.3577, device='cuda:2'), tensor(0.1058, device='cuda:2'), tensor(0.1385, device='cuda:2')]
[tensor(0.1862, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.4297, device='cuda:2')]
[tensor(0.2383, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.4674, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer52/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [04:37<00:00,  4.34s/it]/s]
Collecting features:  63%|██████▎   | 5049/8000 [01:48<01:29, 33.10it/s]]]

[tensor(0.4402, device='cuda:0'), tensor(0.2052, device='cuda:0'), tensor(0.2295, device='cuda:0')]
[tensor(0.6069, device='cuda:0'), tensor(0.2052, device='cuda:0'), tensor(0.2304, device='cuda:0')]
[tensor(0.5703, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6940, device='cuda:0')]
[tensor(0.6875, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7930, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer54/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:05<00:00, 1476.39it/s]
Aggregating statistics: 100%|██████████| 64/64 [04:41<00:00,  4.41s/it]] ]
Collecting features: 100%|██████████| 8000/8000 [01:27<00:00, 91.75it/s]  
Aggregating statistics:  69%|██████▉   | 44/64 [04:05<02:02,  6.15s/it]]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer53/
Indexing function: find_dots_indices


Collecting features:   8%|▊         | 610/8000 [00:06<01:01, 120.86it/s]]

[tensor(0.3958, device='cuda:3'), tensor(0.1823, device='cuda:3'), tensor(0.2045, device='cuda:3')]
[tensor(0.5348, device='cuda:3'), tensor(0.1823, device='cuda:3'), tensor(0.2046, device='cuda:3')]
[tensor(0.5768, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7630, device='cuda:3')]
[tensor(0.6875, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7995, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer55/
Indexing function: find_dots_indices


Collecting features:  89%|████████▊ | 7084/8000 [02:00<00:08, 106.94it/s]]

[tensor(0.4332, device='cuda:3'), tensor(0.1894, device='cuda:3'), tensor(0.2132, device='cuda:3')]
[tensor(0.5802, device='cuda:3'), tensor(0.1894, device='cuda:3'), tensor(0.2134, device='cuda:3')]
[tensor(0.5781, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7227, device='cuda:3')]
[tensor(0.6810, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8021, device='cuda:3')]


Collecting features: 100%|██████████| 8000/8000 [00:07<00:00, 1083.31it/s]
Collecting features: 100%|██████████| 8000/8000 [02:06<00:00, 63.18it/s] 
Aggregating statistics: 100%|██████████| 64/64 [00:34<00:00,  1.83it/s]s]
Collecting features:  74%|███████▍  | 5957/8000 [01:15<00:17, 118.17it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 32 are alive.
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer56/
Indexing function: find_dots_indices


Aggregating statistics:  14%|█▍        | 9/64 [00:03<00:23,  2.37it/s]s]s]

[tensor(0.2017, device='cuda:3'), tensor(0.0756, device='cuda:3'), tensor(0.1008, device='cuda:3')]
[tensor(0.2269, device='cuda:3'), tensor(0.0756, device='cuda:3'), tensor(0.1008, device='cuda:3')]
[tensor(0.1016, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.3411, device='cuda:3')]
[tensor(0.1419, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.3776, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer57/
Indexing function: find_dots_indices


Collecting features:   0%|          | 1/8000 [00:00<1:24:29,  1.58it/s]s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 319 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:06<00:00, 1195.01it/s]
Collecting features: 100%|██████████| 8000/8000 [00:07<00:00, 1120.70it/s]
Collecting features: 100%|██████████| 8000/8000 [01:26<00:00, 92.76it/s] 
Aggregating statistics:  25%|██▌       | 16/64 [00:07<00:22,  2.13it/s]]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 335 are alive.


Aggregating statistics:  75%|███████▌  | 48/64 [03:31<00:39,  2.48s/it]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 50 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:40<00:00, 198.40it/s]
Collecting features:  15%|█▌        | 1217/8000 [00:34<02:58, 37.91it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 47 are alive.


Aggregating statistics: 100%|██████████| 64/64 [00:26<00:00,  2.42it/s]]
Collecting features:  17%|█▋        | 1392/8000 [00:38<02:53, 38.12it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 49 are alive.


Aggregating statistics:  45%|████▌     | 29/64 [00:15<00:19,  1.84it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 72 are alive.


Aggregating statistics: 100%|██████████| 64/64 [01:03<00:00,  1.00it/s]]
Aggregating statistics:  80%|███████▉  | 51/64 [00:26<00:06,  2.10it/s]s]

[tensor(0.1250, device='cuda:2'), tensor(0.0938, device='cuda:2'), tensor(0.0938, device='cuda:2')]
[tensor(0.1562, device='cuda:2'), tensor(0.0938, device='cuda:2'), tensor(0.0938, device='cuda:2')]
[tensor(0.0560, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2539, device='cuda:2')]
[tensor(0.0560, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2539, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer58/
Indexing function: find_dots_indices


Collecting features:  21%|██▏       | 1716/8000 [00:44<02:28, 42.43it/s]

[tensor(0.3834, device='cuda:1'), tensor(0.1501, device='cuda:1'), tensor(0.1754, device='cuda:1')]
[tensor(0.4394, device='cuda:1'), tensor(0.1501, device='cuda:1'), tensor(0.1754, device='cuda:1')]
[tensor(0.2435, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.4987, device='cuda:1')]
[tensor(0.2891, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.5547, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-gated_anneal/trainer59/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:32<00:00,  1.96it/s]]]
Collecting features:  54%|█████▍    | 4300/8000 [01:41<01:28, 41.60it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 4 are alive.


Collecting features:  54%|█████▍    | 4320/8000 [01:41<01:28, 41.57it/s]

[tensor(0.3009, device='cuda:3'), tensor(0.1442, device='cuda:3'), tensor(0.1755, device='cuda:3')]
[tensor(0.3668, device='cuda:3'), tensor(0.1442, device='cuda:3'), tensor(0.1755, device='cuda:3')]
[tensor(0.1458, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.4427, device='cuda:3')]
[tensor(0.2122, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.4935, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer0/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:30<00:00,  2.08it/s]s] 
Collecting features:   4%|▍         | 317/8000 [00:10<03:07, 41.01it/s]]]]

[tensor(0.2400, device='cuda:0'), tensor(0.1000, device='cuda:0'), tensor(0.1000, device='cuda:0')]
[tensor(0.2800, device='cuda:0'), tensor(0.1000, device='cuda:0'), tensor(0.1000, device='cuda:0')]
[tensor(0.0859, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2591, device='cuda:0')]
[tensor(0.1029, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2682, device='cuda:0')]


Aggregating statistics: 100%|██████████| 64/64 [00:32<00:00,  1.95it/s]]s]
Collecting features: 100%|██████████| 8000/8000 [00:05<00:00, 1363.28it/s]
Collecting features:   6%|▌         | 492/8000 [00:14<02:22, 52.84it/s]]

[tensor(0.1343, device='cuda:3'), tensor(0.0388, device='cuda:3'), tensor(0.0418, device='cuda:3')]
[tensor(0.1343, device='cuda:3'), tensor(0.0388, device='cuda:3'), tensor(0.0418, device='cuda:3')]
[tensor(0.0990, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.2552, device='cuda:3')]
[tensor(0.1133, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.2552, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer2/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:26<00:00,  2.40it/s]]]
Collecting features:  60%|██████    | 4830/8000 [01:55<01:17, 40.83it/s]]

[tensor(0.1277, device='cuda:2'), tensor(0.1277, device='cuda:2'), tensor(0.1277, device='cuda:2')]
[tensor(0.1489, device='cuda:2'), tensor(0.1277, device='cuda:2'), tensor(0.1277, device='cuda:2')]
[tensor(0.0677, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.2539, device='cuda:2')]
[tensor(0.0677, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.3138, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer3/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:27<00:00,  2.37it/s]]]
Aggregating statistics:  53%|█████▎    | 34/64 [00:16<00:13,  2.16it/s]]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer1/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [04:14<00:00,  3.98s/it]s]
Collecting features: 100%|██████████| 8000/8000 [03:02<00:00, 43.88it/s]]]
Collecting features:  11%|█         | 865/8000 [00:23<02:55, 40.70it/s]]]]

[tensor(0.0694, device='cuda:3'), tensor(0.0278, device='cuda:3'), tensor(0.0417, device='cuda:3')]
[tensor(0.0972, device='cuda:3'), tensor(0.0278, device='cuda:3'), tensor(0.0417, device='cuda:3')]
[tensor(0.0716, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.2578, device='cuda:3')]
[tensor(0.0807, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.2578, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer4/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:31<00:00,  2.01it/s]/s]
Collecting features:   2%|▏         | 130/8000 [00:06<03:09, 41.62it/s]/s]

[tensor(0.4731, device='cuda:0'), tensor(0.1997, device='cuda:0'), tensor(0.2286, device='cuda:0')]
[tensor(0.6159, device='cuda:0'), tensor(0.1997, device='cuda:0'), tensor(0.2291, device='cuda:0')]
[tensor(0.6016, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7734, device='cuda:0')]
[tensor(0.7083, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8034, device='cuda:0')]


Collecting features: 100%|██████████| 8000/8000 [00:05<00:00, 1540.98it/s]
Aggregating statistics:  72%|███████▏  | 46/64 [00:21<00:07,  2.34it/s]

[tensor(0.0612, device='cuda:0'), tensor(0.0816, device='cuda:0'), tensor(0.0816, device='cuda:0')]
[tensor(0.0612, device='cuda:0'), tensor(0.0816, device='cuda:0'), tensor(0.0816, device='cuda:0')]
[tensor(0.0417, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2539, device='cuda:0')]
[tensor(0.0417, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2539, device='cuda:0')]


Collecting features:  96%|█████████▌| 7661/8000 [00:04<00:00, 1981.02it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4064 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:04<00:00, 1644.27it/s]
Collecting features: 100%|██████████| 8000/8000 [00:31<00:00, 250.16it/s]
Collecting features: 100%|██████████| 8000/8000 [00:17<00:00, 467.52it/s] 
Collecting features:  39%|███▉      | 3102/8000 [01:18<01:54, 42.60it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer5/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:28<00:00,  2.22it/s]]
Collecting features:  17%|█▋        | 1396/8000 [00:35<02:38, 41.68it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer6/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [05:49<00:00,  5.46s/it]]
Collecting features:  70%|███████   | 5605/8000 [02:13<00:46, 51.70it/s]s]

[tensor(0., device='cuda:0'), tensor(0.2500, device='cuda:0'), tensor(0.2500, device='cuda:0')]
[tensor(0.2500, device='cuda:0'), tensor(0.2500, device='cuda:0'), tensor(0.2500, device='cuda:0')]
[tensor(0., device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2617, device='cuda:0')]
[tensor(0.0104, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2786, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer7/
Indexing function: find_dots_indices


Collecting features:  90%|█████████ | 7225/8000 [00:05<00:00, 1621.33it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 6 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:05<00:00, 1477.37it/s]
Collecting features: 100%|██████████| 8000/8000 [00:05<00:00, 1442.09it/s]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]50.71it/s]]

[tensor(0.4024, device='cuda:1'), tensor(0.1955, device='cuda:1'), tensor(0.2181, device='cuda:1')]
[tensor(0.5632, device='cuda:1'), tensor(0.1955, device='cuda:1'), tensor(0.2187, device='cuda:1')]
[tensor(0.5703, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7109, device='cuda:1')]
[tensor(0.6979, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7917, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer8/
Indexing function: find_dots_indices


Collecting features:   2%|▏         | 169/8000 [00:05<02:13, 58.51it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4009 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]1.82it/s]]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1764 are alive.


Collecting features:   4%|▍         | 310/8000 [00:07<02:29, 51.50it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 34 are alive.


Collecting features:  24%|██▍       | 1905/8000 [00:05<00:19, 311.32it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3677 are alive.


Collecting features:  30%|███       | 2415/8000 [00:55<01:53, 49.12it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4086 are alive.


Aggregating statistics:  42%|████▏     | 27/64 [00:11<00:16,  2.28it/s]]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4071 are alive.


Aggregating statistics: 100%|██████████| 64/64 [06:07<00:00,  5.74s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [06:01<00:00,  5.64s/it]]]
Collecting features: 100%|██████████| 8000/8000 [00:28<00:00, 276.30it/s]
Aggregating statistics: 100%|██████████| 64/64 [00:32<00:00,  1.98it/s]s]
Collecting features:  28%|██▊       | 2217/8000 [00:36<00:26, 214.41it/s]

[tensor(0.4753, device='cuda:1'), tensor(0.2106, device='cuda:1'), tensor(0.2373, device='cuda:1')]
[tensor(0.6463, device='cuda:1'), tensor(0.2106, device='cuda:1'), tensor(0.2379, device='cuda:1')]
[tensor(0.5547, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6693, device='cuda:1')]
[tensor(0.6875, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7865, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer9/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [02:49<00:00, 47.19it/s] 
Aggregating statistics: 100%|██████████| 64/64 [00:30<00:00,  2.11it/s]s]
Collecting features:  36%|███▌      | 2848/8000 [00:38<00:24, 206.18it/s]

[tensor(0.0294, device='cuda:0'), tensor(0.0588, device='cuda:0'), tensor(0.0588, device='cuda:0')]
[tensor(0.0588, device='cuda:0'), tensor(0.0588, device='cuda:0'), tensor(0.0588, device='cuda:0')]
[tensor(0.0417, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2539, device='cuda:0')]
[tensor(0.0417, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.2539, device='cuda:0')]


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 245.67it/s]

[tensor(0., device='cuda:1'), tensor(0.3333, device='cuda:1'), tensor(0.3333, device='cuda:1')]
[tensor(0.1667, device='cuda:1'), tensor(0.3333, device='cuda:1'), tensor(0.3333, device='cuda:1')]
[tensor(0., device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2539, device='cuda:1')]
[tensor(0.0091, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2656, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer11/
Indexing function: find_dots_indices


Collecting features:  79%|███████▉  | 6311/8000 [02:01<00:10, 155.86it/s]

[tensor(0.5279, device='cuda:1'), tensor(0.2399, device='cuda:1'), tensor(0.2793, device='cuda:1')]
[tensor(0.6921, device='cuda:1'), tensor(0.2399, device='cuda:1'), tensor(0.2804, device='cuda:1')]
[tensor(0.5990, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8073, device='cuda:1')]
[tensor(0.7148, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8177, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer10/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [02:09<00:00, 61.88it/s] 
Collecting features:  67%|██████▋   | 5358/8000 [00:49<00:08, 300.46it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3611 are alive.


Collecting features:  75%|███████▍  | 5977/8000 [00:52<00:09, 209.88it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer12/
Indexing function: find_dots_indices


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]283.18it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 301 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:31<00:00, 87.86it/s] 
Collecting features: 100%|██████████| 8000/8000 [01:14<00:00, 107.18it/s]
Collecting features: 100%|██████████| 8000/8000 [00:58<00:00, 136.73it/s]
Collecting features: 100%|██████████| 8000/8000 [00:21<00:00, 365.76it/s] 
Collecting features:  80%|███████▉  | 6399/8000 [00:26<00:10, 146.73it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 38 are alive.


Collecting features:  73%|███████▎  | 5862/8000 [00:30<00:03, 616.65it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 39 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:34<00:00, 229.65it/s]
Aggregating statistics:  38%|███▊      | 24/64 [00:09<00:16,  2.36it/s]s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 2844 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]2.32it/s]s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3973 are alive.


Collecting features:  27%|██▋       | 2129/8000 [00:24<01:40, 58.63it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7745 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:42<00:00, 189.60it/s]
Aggregating statistics: 100%|██████████| 64/64 [01:16<00:00,  1.20s/it]]
Aggregating statistics:  95%|█████████▌| 61/64 [00:34<00:02,  1.19it/s]s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 2741 are alive.


Aggregating statistics:  98%|█████████▊| 63/64 [00:36<00:00,  1.18it/s]s]

[tensor(0.3651, device='cuda:2'), tensor(0.1412, device='cuda:2'), tensor(0.1740, device='cuda:2')]
[tensor(0.4229, device='cuda:2'), tensor(0.1412, device='cuda:2'), tensor(0.1746, device='cuda:2')]
[tensor(0.3841, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6042, device='cuda:2')]
[tensor(0.4310, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6589, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer13/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:37<00:00,  1.72it/s]s]
Collecting features:   2%|▏         | 138/8000 [00:04<02:14, 58.59it/s]s]

[tensor(0.2890, device='cuda:1'), tensor(0.0465, device='cuda:1'), tensor(0.0532, device='cuda:1')]
[tensor(0.2957, device='cuda:1'), tensor(0.0465, device='cuda:1'), tensor(0.0532, device='cuda:1')]
[tensor(0.0951, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2617, device='cuda:1')]
[tensor(0.1289, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2982, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer14/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:32<00:00,  1.95it/s]]]
Collecting features:   5%|▍         | 378/8000 [00:08<02:14, 56.87it/s]] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8047 are alive.


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]54.97it/s]s]

[tensor(0.1579, device='cuda:1'), tensor(0.1053, device='cuda:1'), tensor(0.1053, device='cuda:1')]
[tensor(0.1579, device='cuda:1'), tensor(0.1053, device='cuda:1'), tensor(0.1053, device='cuda:1')]
[tensor(0.0651, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2539, device='cuda:1')]
[tensor(0.0651, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.2839, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer15/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:35<00:00,  1.81it/s]] 
Aggregating statistics: 100%|██████████| 64/64 [01:54<00:00,  1.80s/it]]
Collecting features:  20%|█▉        | 1571/8000 [00:14<01:42, 62.79it/s]s]

[tensor(0.5367, device='cuda:2'), tensor(0.5832, device='cuda:2'), tensor(0.6410, device='cuda:2')]
[tensor(0.8937, device='cuda:2'), tensor(0.5832, device='cuda:2'), tensor(0.6430, device='cuda:2')]
[tensor(0.3529, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7383, device='cuda:2')]
[tensor(0.6263, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8138, device='cuda:2')]


Collecting features: 100%|██████████| 8000/8000 [00:55<00:00, 143.14it/s] 
Collecting features:  14%|█▍        | 1107/8000 [00:21<02:05, 54.99it/s]

[tensor(0.0513, device='cuda:1'), tensor(0.0769, device='cuda:1'), tensor(0.0769, device='cuda:1')]
[tensor(0.1026, device='cuda:1'), tensor(0.0769, device='cuda:1'), tensor(0.0769, device='cuda:1')]
[tensor(0.0417, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.3190, device='cuda:1')]
[tensor(0.0651, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.3555, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer16/
Indexing function: find_dots_indices


Collecting features:  14%|█▍        | 1100/8000 [00:20<02:00, 57.50it/s]] 

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer17/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:13<00:00, 578.16it/s]
Collecting features:  24%|██▍       | 1917/8000 [00:35<02:04, 48.89it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 6617 are alive.


Aggregating statistics: 100%|██████████| 64/64 [01:57<00:00,  1.83s/it]] 
Aggregating statistics: 100%|██████████| 64/64 [02:05<00:00,  1.96s/it]]]
Collecting features:  29%|██▉       | 2332/8000 [00:44<01:12, 78.06it/s]]

[tensor(0.5064, device='cuda:0'), tensor(0.2665, device='cuda:0'), tensor(0.3266, device='cuda:0')]
[tensor(0.6554, device='cuda:0'), tensor(0.2665, device='cuda:0'), tensor(0.3283, device='cuda:0')]
[tensor(0.4596, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7891, device='cuda:0')]
[tensor(0.5885, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8125, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer18/
Indexing function: find_dots_indices


Collecting features:  14%|█▎        | 1090/8000 [00:16<01:10, 97.34it/s] 

[tensor(0.5612, device='cuda:3'), tensor(0.4724, device='cuda:3'), tensor(0.5278, device='cuda:3')]
[tensor(0.8401, device='cuda:3'), tensor(0.4724, device='cuda:3'), tensor(0.5301, device='cuda:3')]
[tensor(0.4102, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7812, device='cuda:3')]
[tensor(0.6029, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8164, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer19/
Indexing function: find_dots_indices


Collecting features:  26%|██▌       | 2066/8000 [00:39<05:01, 19.70it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8050 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:14<00:00,  2.09s/it]] 
Aggregating statistics: 100%|██████████| 64/64 [02:18<00:00,  2.17s/it]s]
Aggregating statistics:  92%|█████████▏| 59/64 [01:48<00:12,  2.44s/it]s]

[tensor(0.5254, device='cuda:0'), tensor(0.6116, device='cuda:0'), tensor(0.6701, device='cuda:0')]
[tensor(0.9059, device='cuda:0'), tensor(0.6116, device='cuda:0'), tensor(0.6740, device='cuda:0')]
[tensor(0.3620, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7721, device='cuda:0')]
[tensor(0.6445, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8125, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer20/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s].03it/s]  s]

[tensor(0.4995, device='cuda:3'), tensor(0.6691, device='cuda:3'), tensor(0.7110, device='cuda:3')]
[tensor(0.9209, device='cuda:3'), tensor(0.6691, device='cuda:3'), tensor(0.7151, device='cuda:3')]
[tensor(0.3047, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7318, device='cuda:3')]
[tensor(0.6367, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8164, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer21/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:17<00:00, 102.83it/s]
Aggregating statistics: 100%|██████████| 64/64 [02:01<00:00,  1.91s/it]]]
Collecting features: 100%|██████████| 8000/8000 [01:24<00:00, 95.00it/s] 
Collecting features:  12%|█▏        | 949/8000 [00:18<01:25, 82.55it/s]s]

[tensor(0.4675, device='cuda:2'), tensor(0.2218, device='cuda:2'), tensor(0.2736, device='cuda:2')]
[tensor(0.5954, device='cuda:2'), tensor(0.2218, device='cuda:2'), tensor(0.2742, device='cuda:2')]
[tensor(0.4557, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7956, device='cuda:2')]
[tensor(0.5573, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8151, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer22/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:59<00:00, 135.08it/s]
Collecting features: 100%|██████████| 8000/8000 [00:49<00:00, 160.31it/s]]
Collecting features: 100%|██████████| 8000/8000 [00:24<00:00, 325.68it/s] 
Aggregating statistics: 100%|██████████| 64/64 [01:39<00:00,  1.55s/it]]]
Collecting features:  57%|█████▋    | 4542/8000 [01:27<01:10, 49.36it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 2460 are alive.


Collecting features:  24%|██▍       | 1936/8000 [00:29<01:26, 69.96it/s]s]

[tensor(0.3747, device='cuda:3'), tensor(0.1478, device='cuda:3'), tensor(0.1857, device='cuda:3')]
[tensor(0.4469, device='cuda:3'), tensor(0.1478, device='cuda:3'), tensor(0.1861, device='cuda:3')]
[tensor(0.4349, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7057, device='cuda:3')]
[tensor(0.5052, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7565, device='cuda:3')]


Collecting features:  31%|███▏      | 2515/8000 [00:51<01:48, 50.79it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 4429 are alive.


Collecting features:  51%|█████     | 4067/8000 [00:19<00:48, 80.28it/s]  

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7649 are alive.


Collecting features:  56%|█████▌    | 4461/8000 [00:22<00:31, 112.46it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 6338 are alive.


Collecting features:  56%|█████▌    | 4491/8000 [00:22<00:43, 80.06it/s] 

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer23/
Indexing function: find_dots_indices


Collecting features:  58%|█████▊    | 4611/8000 [00:25<01:07, 50.37it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3973 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:21<00:00,  2.21s/it]] 
Collecting features:  70%|██████▉   | 5596/8000 [00:42<00:30, 79.40it/s] 

[tensor(0.3959, device='cuda:1'), tensor(0.1695, device='cuda:1'), tensor(0.2159, device='cuda:1')]
[tensor(0.4856, device='cuda:1'), tensor(0.1695, device='cuda:1'), tensor(0.2159, device='cuda:1')]
[tensor(0.4297, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7227, device='cuda:1')]
[tensor(0.4974, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7617, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer24/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [02:21<00:00, 56.70it/s] 
Aggregating statistics: 100%|██████████| 64/64 [02:53<00:00,  2.71s/it]]]
Collecting features:  31%|███       | 2485/8000 [00:34<01:24, 65.09it/s]]

[tensor(0.5686, device='cuda:1'), tensor(0.3904, device='cuda:1'), tensor(0.4614, device='cuda:1')]
[tensor(0.8054, device='cuda:1'), tensor(0.3904, device='cuda:1'), tensor(0.4636, device='cuda:1')]
[tensor(0.4206, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8008, device='cuda:1')]
[tensor(0.6198, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8203, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer25/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:36<00:00, 82.61it/s] 
Collecting features: 100%|██████████| 8000/8000 [01:59<00:00, 66.82it/s] 
Collecting features:   1%|          | 95/8000 [00:04<02:10, 60.65it/s]s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8131 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:24<00:00, 95.09it/s]]
Collecting features:  11%|█         | 853/8000 [00:18<01:13, 97.18it/s]] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 2853 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]50.96it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 4177 are alive.


Collecting features:  58%|█████▊    | 4613/8000 [00:57<01:10, 48.36it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3557 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:01<00:00,  1.90s/it]]  
Collecting features: 100%|██████████| 8000/8000 [01:51<00:00, 71.94it/s]
Collecting features:  88%|████████▊ | 7054/8000 [01:32<00:02, 450.97it/s]

[tensor(0.3492, device='cuda:0'), tensor(0.1220, device='cuda:0'), tensor(0.1508, device='cuda:0')]
[tensor(0.4033, device='cuda:0'), tensor(0.1220, device='cuda:0'), tensor(0.1512, device='cuda:0')]
[tensor(0.3906, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.6562, device='cuda:0')]
[tensor(0.4453, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7018, device='cuda:0')]


Collecting features:  45%|████▍     | 3594/8000 [01:09<01:08, 64.73it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer26/
Indexing function: find_dots_indices


Collecting features:   1%|          | 41/8000 [00:03<04:25, 30.01it/s]]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 2630 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:50<00:00, 72.45it/s]
Collecting features:  17%|█▋        | 1375/8000 [00:23<02:09, 51.14it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1570 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:42<00:00,  3.48s/it]]
Collecting features:  28%|██▊       | 2251/8000 [00:31<01:11, 80.70it/s]

[tensor(0.4476, device='cuda:2'), tensor(0.1898, device='cuda:2'), tensor(0.2317, device='cuda:2')]
[tensor(0.5581, device='cuda:2'), tensor(0.1898, device='cuda:2'), tensor(0.2324, device='cuda:2')]
[tensor(0.5117, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7969, device='cuda:2')]
[tensor(0.6315, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8177, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer27/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:54<00:00,  2.72s/it]] 
Aggregating statistics: 100%|██████████| 64/64 [04:30<00:00,  4.22s/it]]] 
Collecting features:  38%|███▊      | 3046/8000 [00:45<01:46, 46.60it/s]]

[tensor(0.3524, device='cuda:3'), tensor(0.1373, device='cuda:3'), tensor(0.1732, device='cuda:3')]
[tensor(0.4272, device='cuda:3'), tensor(0.1373, device='cuda:3'), tensor(0.1736, device='cuda:3')]
[tensor(0.4453, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7604, device='cuda:3')]
[tensor(0.5443, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7956, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer28/
Indexing function: find_dots_indices


Collecting features:   1%|          | 54/8000 [00:03<02:51, 46.39it/s]/s] 

[tensor(0.5986, device='cuda:0'), tensor(0.3893, device='cuda:0'), tensor(0.4448, device='cuda:0')]
[tensor(0.8332, device='cuda:0'), tensor(0.3893, device='cuda:0'), tensor(0.4475, device='cuda:0')]
[tensor(0.5260, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7734, device='cuda:0')]
[tensor(0.7018, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8177, device='cuda:0')]


Aggregating statistics: 100%|██████████| 64/64 [02:50<00:00,  2.66s/it]]
Aggregating statistics: 100%|██████████| 64/64 [04:58<00:00,  4.67s/it]s] 
Collecting features: 100%|██████████| 8000/8000 [02:02<00:00, 65.19it/s] 
Collecting features:  12%|█▏        | 971/8000 [00:08<00:05, 1390.82it/s]

[tensor(0.5434, device='cuda:3'), tensor(0.3846, device='cuda:3'), tensor(0.4329, device='cuda:3')]
[tensor(0.8014, device='cuda:3'), tensor(0.3846, device='cuda:3'), tensor(0.4342, device='cuda:3')]
[tensor(0.4388, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8164, device='cuda:3')]
[tensor(0.6784, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8229, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer30/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 50.91it/s]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer29/
Indexing function: find_dots_indices


Collecting features:  14%|█▎        | 1083/8000 [00:28<02:02, 56.44it/s]]

[tensor(0.5385, device='cuda:1'), tensor(0.3096, device='cuda:1'), tensor(0.3610, device='cuda:1')]
[tensor(0.7301, device='cuda:1'), tensor(0.3096, device='cuda:1'), tensor(0.3628, device='cuda:1')]
[tensor(0.5065, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7995, device='cuda:1')]
[tensor(0.6888, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8177, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer31/
Indexing function: find_dots_indices


Collecting features:   5%|▍         | 395/8000 [00:09<01:51, 68.36it/s]] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 2930 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:10<00:00,  2.05s/it]]]
Collecting features:  72%|███████▏  | 5733/8000 [00:25<00:15, 141.92it/s]

[tensor(0.1924, device='cuda:2'), tensor(0.3789, device='cuda:2'), tensor(0.3870, device='cuda:2')]
[tensor(0.5352, device='cuda:2'), tensor(0.3789, device='cuda:2'), tensor(0.3870, device='cuda:2')]
[tensor(0.3802, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.4089, device='cuda:2')]
[tensor(0.5664, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8086, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer32/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [04:30<00:00,  4.23s/it]]]
Collecting features: 100%|██████████| 8000/8000 [01:31<00:00, 87.38it/s]]
Aggregating statistics:  89%|████████▉ | 57/64 [01:11<00:05,  1.21it/s]]]

[tensor(0.5734, device='cuda:2'), tensor(0.4066, device='cuda:2'), tensor(0.4610, device='cuda:2')]
[tensor(0.8276, device='cuda:2'), tensor(0.4066, device='cuda:2'), tensor(0.4634, device='cuda:2')]
[tensor(0.5130, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7982, device='cuda:2')]
[tensor(0.7044, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8177, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer33/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:53<00:00, 149.90it/s]
Aggregating statistics: 100%|██████████| 64/64 [01:16<00:00,  1.20s/it]]]
Collecting features: 100%|██████████| 8000/8000 [00:42<00:00, 189.07it/s]]
Collecting features: 100%|██████████| 8000/8000 [00:17<00:00, 448.06it/s] 
Aggregating statistics: 100%|██████████| 64/64 [02:29<00:00,  2.33s/it]] 
Collecting features:  29%|██▉       | 2318/8000 [00:45<02:04, 45.66it/s]

[tensor(0.4186, device='cuda:0'), tensor(0.2910, device='cuda:0'), tensor(0.3337, device='cuda:0')]
[tensor(0.6036, device='cuda:0'), tensor(0.2910, device='cuda:0'), tensor(0.3343, device='cuda:0')]
[tensor(0.4102, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8177, device='cuda:0')]
[tensor(0.5964, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8229, device='cuda:0')]


Collecting features:  29%|██▉       | 2338/8000 [00:46<02:05, 45.01it/s]] 

[tensor(0.3414, device='cuda:2'), tensor(0.2363, device='cuda:2'), tensor(0.2790, device='cuda:2')]
[tensor(0.4484, device='cuda:2'), tensor(0.2363, device='cuda:2'), tensor(0.2796, device='cuda:2')]
[tensor(0.3099, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7734, device='cuda:2')]
[tensor(0.4206, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8008, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer34/
Indexing function: find_dots_indices


Collecting features:   1%|          | 85/8000 [00:02<01:36, 81.76it/s]s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4033 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:54<00:00,  3.66s/it]s]
Collecting features:  37%|███▋      | 2998/8000 [00:55<01:16, 65.06it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3241 are alive.


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 50.37it/s]]

[tensor(0.4008, device='cuda:0'), tensor(0.1562, device='cuda:0'), tensor(0.1949, device='cuda:0')]
[tensor(0.4905, device='cuda:0'), tensor(0.1562, device='cuda:0'), tensor(0.1952, device='cuda:0')]
[tensor(0.4961, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8008, device='cuda:0')]
[tensor(0.6172, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8190, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer36/
Indexing function: find_dots_indices


Collecting features:  54%|█████▍    | 4306/8000 [01:26<01:21, 45.53it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 2295 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:03<00:00, 126.46it/s]
Collecting features:   0%|          | 14/8000 [00:01<09:46, 13.61it/s] ]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer35/
Indexing function: find_dots_indices


Collecting features:  40%|███▉      | 3162/8000 [00:58<01:44, 46.28it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 5705 are alive.


Aggregating statistics: 100%|██████████| 64/64 [04:06<00:00,  3.85s/it]]]
Collecting features: 100%|██████████| 8000/8000 [00:22<00:00, 354.77it/s] 
Aggregating statistics:  89%|████████▉ | 57/64 [02:06<00:15,  2.18s/it]]

[tensor(0.5282, device='cuda:3'), tensor(0.2620, device='cuda:3'), tensor(0.3181, device='cuda:3')]
[tensor(0.6864, device='cuda:3'), tensor(0.2620, device='cuda:3'), tensor(0.3191, device='cuda:3')]
[tensor(0.5182, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8086, device='cuda:3')]
[tensor(0.6771, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8203, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer37/
Indexing function: find_dots_indices


Collecting features:   8%|▊         | 639/8000 [00:16<02:49, 43.52it/s]]] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 4327 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:41<00:00, 192.43it/s]
Collecting features:  15%|█▌        | 1224/8000 [00:27<02:34, 43.77it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 2069 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:21<00:00,  2.21s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [03:17<00:00,  3.08s/it]s]
Collecting features:  25%|██▌       | 2001/8000 [00:34<00:21, 276.75it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 3762 are alive.


Aggregating statistics:  36%|███▌      | 23/64 [00:37<01:11,  1.73s/it]

[tensor(0.3669, device='cuda:1'), tensor(0.2540, device='cuda:1'), tensor(0.2989, device='cuda:1')]
[tensor(0.5110, device='cuda:1'), tensor(0.2540, device='cuda:1'), tensor(0.2992, device='cuda:1')]
[tensor(0.3828, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8112, device='cuda:1')]
[tensor(0.5521, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8164, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer38/
Indexing function: find_dots_indices


Collecting features:  66%|██████▌   | 5288/8000 [01:32<00:18, 146.38it/s]

[tensor(0.3321, device='cuda:1'), tensor(0.1199, device='cuda:1'), tensor(0.1492, device='cuda:1')]
[tensor(0.3907, device='cuda:1'), tensor(0.1199, device='cuda:1'), tensor(0.1496, device='cuda:1')]
[tensor(0.4362, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7305, device='cuda:1')]
[tensor(0.5130, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7656, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer39/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:43<00:00, 183.83it/s] 
Collecting features: 100%|██████████| 8000/8000 [02:15<00:00, 59.03it/s] 
Collecting features: 100%|██████████| 8000/8000 [01:55<00:00, 69.32it/s] 
Aggregating statistics: 100%|██████████| 64/64 [01:46<00:00,  1.67s/it]]]
Collecting features:  36%|███▌      | 2840/8000 [00:23<00:23, 220.86it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7284 are alive.


Collecting features:  80%|████████  | 6427/8000 [00:47<00:06, 236.36it/s]

[tensor(0.2055, device='cuda:3'), tensor(0.3765, device='cuda:3'), tensor(0.3809, device='cuda:3')]
[tensor(0.5389, device='cuda:3'), tensor(0.3765, device='cuda:3'), tensor(0.3809, device='cuda:3')]
[tensor(0.3685, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.4049, device='cuda:3')]
[tensor(0.5560, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8034, device='cuda:3')]


Collecting features:  81%|████████▏ | 6500/8000 [01:05<00:06, 226.97it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3849 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:54<00:00, 147.26it/s]
Collecting features:  38%|███▊      | 3013/8000 [00:37<01:40, 49.74it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer40/
Indexing function: find_dots_indices


Collecting features:  61%|██████    | 4895/8000 [00:38<00:26, 117.03it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 6952 are alive.


Collecting features:  46%|████▋     | 3711/8000 [00:49<01:11, 59.66it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 6518 are alive.


Aggregating statistics: 100%|██████████| 64/64 [01:28<00:00,  1.38s/it]]
Collecting features: 100%|██████████| 8000/8000 [01:30<00:00, 88.79it/s]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 63.13it/s]

[tensor(0.3634, device='cuda:3'), tensor(0.2388, device='cuda:3'), tensor(0.2854, device='cuda:3')]
[tensor(0.4871, device='cuda:3'), tensor(0.2388, device='cuda:3'), tensor(0.2858, device='cuda:3')]
[tensor(0.3490, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8099, device='cuda:3')]
[tensor(0.5182, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8190, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer41/
Indexing function: find_dots_indices


Collecting features:  68%|██████▊   | 5446/8000 [01:11<00:07, 343.69it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 4717 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:01<00:00,  1.90s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [05:03<00:00,  4.74s/it]s]
Collecting features:  91%|█████████ | 7245/8000 [01:28<00:15, 49.40it/s]] 

[tensor(0.3999, device='cuda:2'), tensor(0.2629, device='cuda:2'), tensor(0.3085, device='cuda:2')]
[tensor(0.5532, device='cuda:2'), tensor(0.2629, device='cuda:2'), tensor(0.3089, device='cuda:2')]
[tensor(0.4102, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8164, device='cuda:2')]
[tensor(0.6016, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8216, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer42/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:17<00:00,  2.14s/it]]]
Collecting features:  93%|█████████▎| 7439/8000 [01:32<00:11, 50.35it/s]]

[tensor(0.5917, device='cuda:1'), tensor(0.4687, device='cuda:1'), tensor(0.5254, device='cuda:1')]
[tensor(0.8824, device='cuda:1'), tensor(0.4687, device='cuda:1'), tensor(0.5290, device='cuda:1')]
[tensor(0.4883, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7760, device='cuda:1')]
[tensor(0.7057, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8203, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer43/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [01:42<00:00,  1.60s/it]]]
Collecting features:   9%|▉         | 721/8000 [00:07<00:22, 322.52it/s]]

[tensor(0.3320, device='cuda:0'), tensor(0.2059, device='cuda:0'), tensor(0.2465, device='cuda:0')]
[tensor(0.4273, device='cuda:0'), tensor(0.2059, device='cuda:0'), tensor(0.2465, device='cuda:0')]
[tensor(0.3281, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7826, device='cuda:0')]
[tensor(0.4557, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8112, device='cuda:0')]


Collecting features: 100%|██████████| 8000/8000 [01:38<00:00, 81.59it/s] 
Collecting features:  58%|█████▊    | 4607/8000 [01:01<00:49, 68.70it/s]]

[tensor(0.4639, device='cuda:0'), tensor(0.3429, device='cuda:0'), tensor(0.3841, device='cuda:0')]
[tensor(0.7163, device='cuda:0'), tensor(0.3429, device='cuda:0'), tensor(0.3841, device='cuda:0')]
[tensor(0.4271, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7461, device='cuda:0')]
[tensor(0.6315, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8203, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer44/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:41<00:00, 78.59it/s]  
Collecting features:   9%|▊         | 696/8000 [00:09<01:39, 73.07it/s]]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer45/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:20<00:00, 383.53it/s]
Collecting features:  73%|███████▎  | 5862/8000 [01:17<00:25, 82.95it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 4989 are alive.


Collecting features:  87%|████████▋ | 6975/8000 [01:00<00:13, 74.13it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 3209 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:02<00:00,  1.92s/it]]]
Collecting features:  85%|████████▍ | 6769/8000 [01:28<00:20, 59.62it/s]]

[tensor(0.3586, device='cuda:3'), tensor(0.2065, device='cuda:3'), tensor(0.2475, device='cuda:3')]
[tensor(0.4806, device='cuda:3'), tensor(0.2065, device='cuda:3'), tensor(0.2488, device='cuda:3')]
[tensor(0.4062, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8151, device='cuda:3')]
[tensor(0.5859, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8229, device='cuda:3')]


Aggregating statistics: 100%|██████████| 64/64 [02:24<00:00,  2.25s/it]s]
Collecting features: 100%|██████████| 8000/8000 [01:08<00:00, 117.56it/s]
Collecting features:  86%|████████▌ | 6853/8000 [01:29<00:17, 64.49it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1671 are alive.


Collecting features:  90%|████████▉ | 7187/8000 [01:33<00:11, 68.33it/s]]

[tensor(0.1856, device='cuda:0'), tensor(0.2464, device='cuda:0'), tensor(0.2512, device='cuda:0')]
[tensor(0.4021, device='cuda:0'), tensor(0.2464, device='cuda:0'), tensor(0.2512, device='cuda:0')]
[tensor(0.3359, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.3984, device='cuda:0')]
[tensor(0.5482, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8060, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer47/
Indexing function: find_dots_indices


Collecting features:   1%|          | 45/8000 [00:04<04:02, 32.76it/s]s]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer46/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:42<00:00, 78.42it/s] 
Collecting features: 100%|██████████| 8000/8000 [00:31<00:00, 250.25it/s]
Collecting features:   3%|▎         | 272/8000 [00:11<03:44, 34.45it/s]]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1641 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:04<00:00,  2.88s/it]]]
Collecting features:  70%|███████   | 5617/8000 [00:56<00:07, 328.95it/s]

[tensor(0.3893, device='cuda:2'), tensor(0.2233, device='cuda:2'), tensor(0.2647, device='cuda:2')]
[tensor(0.5302, device='cuda:2'), tensor(0.2233, device='cuda:2'), tensor(0.2654, device='cuda:2')]
[tensor(0.4635, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8190, device='cuda:2')]
[tensor(0.6693, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8229, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer48/
Indexing function: find_dots_indices


Collecting features:  35%|███▍      | 2762/8000 [01:02<01:43, 50.58it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1612 are alive.


Aggregating statistics:  23%|██▎       | 15/64 [00:14<00:47,  1.02it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1553 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:30<00:00, 264.43it/s]
Collecting features: 100%|██████████| 8000/8000 [01:19<00:00, 100.55it/s]
Collecting features:  23%|██▎       | 1831/8000 [00:51<02:48, 36.54it/s]] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1519 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]34.28it/s]] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1417 are alive.


Aggregating statistics: 100%|██████████| 64/64 [01:09<00:00,  1.08s/it]]]
Aggregating statistics:  20%|██        | 13/64 [00:16<01:02,  1.22s/it]] 

[tensor(0.3052, device='cuda:0'), tensor(0.2226, device='cuda:0'), tensor(0.2525, device='cuda:0')]
[tensor(0.4261, device='cuda:0'), tensor(0.2226, device='cuda:0'), tensor(0.2525, device='cuda:0')]
[tensor(0.3320, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7930, device='cuda:0')]
[tensor(0.4948, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8047, device='cuda:0')]


Collecting features:  36%|███▌      | 2845/8000 [01:19<01:06, 77.79it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer49/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [01:17<00:00,  1.22s/it]] 
Aggregating statistics:  95%|█████████▌| 61/64 [01:11<00:03,  1.29s/it]]]

[tensor(0.2718, device='cuda:3'), tensor(0.2492, device='cuda:3'), tensor(0.2718, device='cuda:3')]
[tensor(0.4205, device='cuda:3'), tensor(0.2492, device='cuda:3'), tensor(0.2730, device='cuda:3')]
[tensor(0.2943, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7982, device='cuda:3')]
[tensor(0.4701, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8125, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer50/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:59<00:00,  2.80s/it]]]
Collecting features: 100%|██████████| 8000/8000 [01:15<00:00, 106.35it/s] 
Aggregating statistics: 100%|██████████| 64/64 [01:15<00:00,  1.17s/it]]
Aggregating statistics: 100%|██████████| 64/64 [01:15<00:00,  1.19s/it]]s]
Aggregating statistics: 100%|██████████| 64/64 [03:22<00:00,  3.16s/it]s] 
Collecting features: 100%|██████████| 8000/8000 [00:23<00:00, 339.43it/s] 
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 41.96it/s]

[tensor(0.4139, device='cuda:3'), tensor(0.2357, device='cuda:3'), tensor(0.2817, device='cuda:3')]
[tensor(0.5640, device='cuda:3'), tensor(0.2357, device='cuda:3'), tensor(0.2824, device='cuda:3')]
[tensor(0.4674, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8203, device='cuda:3')]
[tensor(0.6901, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8229, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer51/
Indexing function: find_dots_indices


Collecting features:  48%|████▊     | 3835/8000 [01:45<01:56, 35.74it/s]]

[tensor(0.3368, device='cuda:2'), tensor(0.2667, device='cuda:2'), tensor(0.2996, device='cuda:2')]
[tensor(0.5099, device='cuda:2'), tensor(0.2667, device='cuda:2'), tensor(0.3002, device='cuda:2')]
[tensor(0.3060, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7904, device='cuda:2')]
[tensor(0.4779, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8073, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer52/
Indexing function: find_dots_indices


Collecting features:  73%|███████▎  | 5810/8000 [02:23<00:57, 37.93it/s]]

[tensor(0.3220, device='cuda:3'), tensor(0.2717, device='cuda:3'), tensor(0.3200, device='cuda:3')]
[tensor(0.5106, device='cuda:3'), tensor(0.2717, device='cuda:3'), tensor(0.3207, device='cuda:3')]
[tensor(0.3333, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7930, device='cuda:3')]
[tensor(0.4818, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8099, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer53/
Indexing function: find_dots_indices


Collecting features:   1%|          | 78/8000 [00:04<02:39, 49.52it/s]t/s]

[tensor(0.4664, device='cuda:1'), tensor(0.3074, device='cuda:1'), tensor(0.3533, device='cuda:1')]
[tensor(0.6648, device='cuda:1'), tensor(0.3074, device='cuda:1'), tensor(0.3546, device='cuda:1')]
[tensor(0.4505, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8216, device='cuda:1')]
[tensor(0.6523, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8229, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer54/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:04<00:00, 1621.56it/s]
Aggregating statistics: 100%|██████████| 64/64 [02:09<00:00,  2.03s/it]]]
Collecting features:  79%|███████▉  | 6302/8000 [02:33<00:34, 49.15it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1455 are alive.


Aggregating statistics: 100%|██████████| 64/64 [01:05<00:00,  1.03s/it]]]
Collecting features:  55%|█████▌    | 4435/8000 [01:58<01:15, 47.24it/s]]

[tensor(0.3704, device='cuda:0'), tensor(0.2099, device='cuda:0'), tensor(0.2558, device='cuda:0')]
[tensor(0.4919, device='cuda:0'), tensor(0.2099, device='cuda:0'), tensor(0.2572, device='cuda:0')]
[tensor(0.4323, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8203, device='cuda:0')]
[tensor(0.6458, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8229, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer55/
Indexing function: find_dots_indices


Collecting features:  81%|████████  | 6444/8000 [02:37<00:33, 45.90it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1373 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:46<00:00,  3.53s/it]]]
Collecting features:  82%|████████▏ | 6553/8000 [02:40<00:32, 44.48it/s]]

[tensor(0.3028, device='cuda:0'), tensor(0.2475, device='cuda:0'), tensor(0.2870, device='cuda:0')]
[tensor(0.4562, device='cuda:0'), tensor(0.2475, device='cuda:0'), tensor(0.2877, device='cuda:0')]
[tensor(0.3138, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7878, device='cuda:0')]
[tensor(0.4635, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8099, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer56/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:26<00:00, 301.37it/s]
Collecting features:   1%|          | 67/8000 [00:03<02:41, 49.18it/s]t/s]

[tensor(0.5045, device='cuda:2'), tensor(0.2809, device='cuda:2'), tensor(0.3280, device='cuda:2')]
[tensor(0.6970, device='cuda:2'), tensor(0.2809, device='cuda:2'), tensor(0.3296, device='cuda:2')]
[tensor(0.5378, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8203, device='cuda:2')]
[tensor(0.7253, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8229, device='cuda:2')]


Collecting features: 100%|██████████| 8000/8000 [00:15<00:00, 502.90it/s] 
Collecting features:   3%|▎         | 241/8000 [00:08<02:35, 49.74it/s]s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1712 are alive.


Aggregating statistics: 100%|██████████| 64/64 [01:08<00:00,  1.08s/it]s]
Collecting features:  57%|█████▋    | 4560/8000 [00:28<00:23, 147.46it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer57/
Indexing function: find_dots_indices


Collecting features:  18%|█▊        | 1466/8000 [00:33<02:18, 47.31it/s]]

[tensor(0.3324, device='cuda:2'), tensor(0.2054, device='cuda:2'), tensor(0.2378, device='cuda:2')]
[tensor(0.3952, device='cuda:2'), tensor(0.2054, device='cuda:2'), tensor(0.2385, device='cuda:2')]
[tensor(0.3320, device='cuda:2'), tensor(0.8086, device='cuda:2'), tensor(0.7279, device='cuda:2')]
[tensor(0.4479, device='cuda:2'), tensor(0.8086, device='cuda:2'), tensor(0.7461, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer58/
Indexing function: find_dots_indices


Collecting features:  91%|█████████▏| 7307/8000 [02:56<00:15, 46.05it/s]s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1656 are alive.


Collecting features:  85%|████████▌ | 6835/8000 [00:35<00:04, 267.87it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1540 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:39<00:00, 203.74it/s] 
Collecting features: 100%|██████████| 8000/8000 [00:25<00:00, 309.80it/s] 
Collecting features: 100%|██████████| 8000/8000 [00:11<00:00, 691.69it/s]
Collecting features: 100%|██████████| 8000/8000 [03:10<00:00, 41.99it/s]]
Collecting features: 100%|██████████| 8000/8000 [00:19<00:00, 404.75it/s]
Collecting features:  32%|███▏      | 2573/8000 [00:56<01:45, 51.44it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1748 are alive.


Collecting features:  34%|███▍      | 2722/8000 [00:59<02:04, 42.36it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1582 are alive.


Collecting features:  35%|███▍      | 2776/8000 [01:01<02:06, 41.15it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1559 are alive.


Aggregating statistics: 100%|██████████| 64/64 [00:51<00:00,  1.25it/s]]
Collecting features:  36%|███▋      | 2916/8000 [01:04<01:50, 45.91it/s]

[tensor(0.3354, device='cuda:2'), tensor(0.2227, device='cuda:2'), tensor(0.2564, device='cuda:2')]
[tensor(0.4309, device='cuda:2'), tensor(0.2227, device='cuda:2'), tensor(0.2577, device='cuda:2')]
[tensor(0.3451, device='cuda:2'), tensor(0.8086, device='cuda:2'), tensor(0.7578, device='cuda:2')]
[tensor(0.4701, device='cuda:2'), tensor(0.8086, device='cuda:2'), tensor(0.7917, device='cuda:2')]
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1597 are alive.


Aggregating statistics:  53%|█████▎    | 34/64 [00:31<00:30,  1.03s/it]] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1630 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:02<00:00,  2.85s/it]]
Aggregating statistics: 100%|██████████| 64/64 [00:52<00:00,  1.23it/s]]
Collecting features:  31%|███       | 2482/8000 [00:54<01:49, 50.57it/s]

[tensor(0.3618, device='cuda:1'), tensor(0.2113, device='cuda:1'), tensor(0.2502, device='cuda:1')]
[tensor(0.4637, device='cuda:1'), tensor(0.2113, device='cuda:1'), tensor(0.2505, device='cuda:1')]
[tensor(0.3763, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8138, device='cuda:1')]
[tensor(0.5508, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8216, device='cuda:1')]


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 46.47it/s]

[tensor(0.3372, device='cuda:3'), tensor(0.2003, device='cuda:3'), tensor(0.2331, device='cuda:3')]
[tensor(0.4166, device='cuda:3'), tensor(0.2003, device='cuda:3'), tensor(0.2338, device='cuda:3')]
[tensor(0.3164, device='cuda:3'), tensor(0.7617, device='cuda:3'), tensor(0.7461, device='cuda:3')]
[tensor(0.4375, device='cuda:3'), tensor(0.7617, device='cuda:3'), tensor(0.7721, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer0/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 44.57it/s]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-p_anneal/trainer59/
Indexing function: find_dots_indices


Collecting features:  38%|███▊      | 3002/8000 [01:04<02:16, 36.72it/s]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer1/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:58<00:00,  1.10it/s]]]
Aggregating statistics: 100%|██████████| 64/64 [04:04<00:00,  3.82s/it]]
Collecting features:  46%|████▌     | 3680/8000 [00:05<00:03, 1339.07it/s]

[tensor(0.3353, device='cuda:3'), tensor(0.2103, device='cuda:3'), tensor(0.2494, device='cuda:3')]
[tensor(0.4357, device='cuda:3'), tensor(0.2103, device='cuda:3'), tensor(0.2506, device='cuda:3')]
[tensor(0.3763, device='cuda:3'), tensor(0.8073, device='cuda:3'), tensor(0.7812, device='cuda:3')]
[tensor(0.5052, device='cuda:3'), tensor(0.8073, device='cuda:3'), tensor(0.7995, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer2/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [03:10<00:00, 41.92it/s] ]
Collecting features:   6%|▌         | 446/8000 [00:09<01:00, 125.68it/s]s]

[tensor(0.2020, device='cuda:1'), tensor(0.2351, device='cuda:1'), tensor(0.2393, device='cuda:1')]
[tensor(0.4096, device='cuda:1'), tensor(0.2351, device='cuda:1'), tensor(0.2393, device='cuda:1')]
[tensor(0.3516, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.4388, device='cuda:1')]
[tensor(0.5508, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8125, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer3/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:10<00:00, 779.32it/s] 
Collecting features: 100%|██████████| 8000/8000 [00:21<00:00, 375.34it/s]
Collecting features:  20%|██        | 1604/8000 [00:25<01:24, 76.10it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1541 are alive.


Aggregating statistics: 100%|██████████| 64/64 [01:07<00:00,  1.06s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [01:11<00:00,  1.12s/it]]]
Collecting features:  60%|█████▉    | 4781/8000 [01:31<01:03, 50.73it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4094 are alive.
[tensor(0.3448, device='cuda:0'), tensor(0.2052, device='cuda:0'), tensor(0.2422, device='cuda:0')]
[tensor(0.4143, device='cuda:0'), tensor(0.2052, device='cuda:0'), tensor(0.2429, device='cuda:0')]
[tensor(0.3411, device='cuda:0'), tensor(0.7448, device='cuda:0'), tensor(0.7422, device='cuda:0')]
[tensor(0.4570, device='cuda:0'), tensor(0.7448, device='cuda:0'), tensor(0.7708, device='cuda:0')]


Collecting features:  89%|████████▊ | 7091/8000 [00:24<00:01, 796.77it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4096 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:25<00:00, 316.89it/s] 
Collecting features:  72%|███████▏  | 5729/8000 [01:51<00:48, 46.73it/s]

[tensor(0.3001, device='cuda:0'), tensor(0.2572, device='cuda:0'), tensor(0.2893, device='cuda:0')]
[tensor(0.4789, device='cuda:0'), tensor(0.2572, device='cuda:0'), tensor(0.2893, device='cuda:0')]
[tensor(0.3333, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7930, device='cuda:0')]
[tensor(0.4922, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8151, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer4/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [00:50<00:00,  1.27it/s]] 
Collecting features:  67%|██████▋   | 5373/8000 [01:42<00:47, 55.88it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer5/
Indexing function: find_dots_indices


Collecting features:  70%|███████   | 5601/8000 [01:46<00:54, 44.10it/s]] 

[tensor(0.2798, device='cuda:3'), tensor(0.2233, device='cuda:3'), tensor(0.2479, device='cuda:3')]
[tensor(0.3951, device='cuda:3'), tensor(0.2233, device='cuda:3'), tensor(0.2491, device='cuda:3')]
[tensor(0.3242, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7734, device='cuda:3')]
[tensor(0.4714, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7956, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer6/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [05:17<00:00,  4.97s/it]] 
Aggregating statistics: 100%|██████████| 64/64 [01:06<00:00,  1.04s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [01:02<00:00,  1.02it/s]s]]
Aggregating statistics: 100%|██████████| 64/64 [01:04<00:00,  1.01s/it]/s]
Collecting features:  74%|███████▍  | 5959/8000 [01:51<00:23, 87.24it/s]]]

[tensor(0.4796, device='cuda:1'), tensor(0.2704, device='cuda:1'), tensor(0.3150, device='cuda:1')]
[tensor(0.6592, device='cuda:1'), tensor(0.2704, device='cuda:1'), tensor(0.3157, device='cuda:1')]
[tensor(0.5273, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8203, device='cuda:1')]
[tensor(0.7096, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8229, device='cuda:1')]


Collecting features:  65%|██████▍   | 5189/8000 [00:05<00:01, 1718.72it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4088 are alive.


Autoencoder loop: 100%|██████████| 180/180 [38:28<00:00, 12.82s/it]72it/s]
Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 118.18it/s]

[tensor(0.3293, device='cuda:2'), tensor(0.2383, device='cuda:2'), tensor(0.2718, device='cuda:2')]
[tensor(0.4614, device='cuda:2'), tensor(0.2383, device='cuda:2'), tensor(0.2724, device='cuda:2')]
[tensor(0.3216, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7773, device='cuda:2')]
[tensor(0.4714, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8073, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer10/
Indexing function: find_dots_indices
[tensor(0.3085, device='cuda:0'), tensor(0.2130, device='cuda:0'), tensor(0.2476, device='cuda:0')]
[tensor(0.3970, device='cuda:0'), tensor(0.2130, device='cuda:0'), tensor(0.2476, device='cuda:0')]
[tensor(0.3242, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7617, device='cuda:0')]
[tensor(0.4922, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7826, device='cuda:0')]
Autoenco

Collecting features: 100%|██████████| 8000/8000 [00:15<00:00, 527.80it/s]
Collecting features: 100%|██████████| 8000/8000 [00:11<00:00, 721.24it/s]
Aggregating statistics:  36%|███▌      | 23/64 [00:31<00:47,  1.15s/it]] 

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer9/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [02:19<00:00, 57.26it/s]  
Collecting features: 100%|██████████| 8000/8000 [00:15<00:00, 528.75it/s]]
Collecting features: 100%|██████████| 8000/8000 [02:11<00:00, 60.83it/s]  
Collecting features: 100%|██████████| 8000/8000 [00:35<00:00, 226.84it/s]
Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]705.17it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8185 are alive.


Aggregating statistics: 100%|██████████| 64/64 [01:24<00:00,  1.32s/it]s]
Collecting features:  57%|█████▋    | 4582/8000 [00:18<00:08, 396.83it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8170 are alive.


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 182.20it/s]

[tensor(0.2937, device='cuda:1'), tensor(0.2217, device='cuda:1'), tensor(0.2461, device='cuda:1')]
[tensor(0.3863, device='cuda:1'), tensor(0.2217, device='cuda:1'), tensor(0.2467, device='cuda:1')]
[tensor(0.3346, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7799, device='cuda:1')]
[tensor(0.4961, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8008, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer11/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [00:23<00:00, 335.08it/s]
Aggregating statistics:  69%|██████▉   | 44/64 [00:52<00:18,  1.07it/s]s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1698 are alive.


Collecting features:  92%|█████████▏| 7363/8000 [00:24<00:08, 72.54it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3939 are alive.


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]68.76it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1634 are alive.
do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3641 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:25<00:00, 309.35it/s] 
Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]104.11it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7626 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:34<00:00, 84.51it/s] 
Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]44.47it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 6258 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:33<00:00, 85.23it/s]
Aggregating statistics: 100%|██████████| 64/64 [01:18<00:00,  1.23s/it]s]
Aggregating statistics:  23%|██▎       | 15/64 [00:20<01:02,  1.27s/it]s]

[tensor(0.3063, device='cuda:1'), tensor(0.2291, device='cuda:1'), tensor(0.2602, device='cuda:1')]
[tensor(0.4244, device='cuda:1'), tensor(0.2291, device='cuda:1'), tensor(0.2609, device='cuda:1')]
[tensor(0.3294, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7773, device='cuda:1')]
[tensor(0.4674, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7969, device='cuda:1')]


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]1.15s/it]s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1447 are alive.


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 1.41s/it]s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer12/
Indexing function: find_dots_indices


Collecting features:   0%|          | 31/8000 [00:03<06:06, 21.74it/s]s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4029 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:20<00:00, 99.09it/s] 
Aggregating statistics: 100%|██████████| 64/64 [01:59<00:00,  1.86s/it]]
Collecting features:  74%|███████▎  | 5886/8000 [01:13<00:56, 37.53it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 8083 are alive.


Aggregating statistics:  17%|█▋        | 11/64 [00:33<02:50,  3.22s/it]]

[tensor(0.3561, device='cuda:3'), tensor(0.8144, device='cuda:3'), tensor(0.8461, device='cuda:3')]
[tensor(0.9282, device='cuda:3'), tensor(0.8144, device='cuda:3'), tensor(0.8486, device='cuda:3')]
[tensor(0.2539, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.6940, device='cuda:3')]
[tensor(0.6406, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8164, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer13/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [02:09<00:00,  2.02s/it]]
Collecting features:  81%|████████▏ | 6504/8000 [01:30<00:51, 29.15it/s]]

[tensor(0.3391, device='cuda:2'), tensor(0.8174, device='cuda:2'), tensor(0.8474, device='cuda:2')]
[tensor(0.9258, device='cuda:2'), tensor(0.8174, device='cuda:2'), tensor(0.8503, device='cuda:2')]
[tensor(0.1940, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.6901, device='cuda:2')]
[tensor(0.6328, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8125, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer14/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [01:44<00:00,  1.63s/it]]]]
Collecting features:  49%|████▉     | 3956/8000 [00:36<00:38, 105.02it/s]

[tensor(0.3522, device='cuda:1'), tensor(0.2344, device='cuda:1'), tensor(0.2691, device='cuda:1')]
[tensor(0.4912, device='cuda:1'), tensor(0.2344, device='cuda:1'), tensor(0.2715, device='cuda:1')]
[tensor(0.3880, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7917, device='cuda:1')]
[tensor(0.5365, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8125, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer15/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [01:45<00:00,  1.64s/it]] 
Aggregating statistics: 100%|██████████| 64/64 [02:22<00:00,  2.23s/it]] 
Collecting features: 100%|██████████| 8000/8000 [01:19<00:00, 100.79it/s] 
Collecting features:  58%|█████▊    | 4628/8000 [00:43<00:29, 115.85it/s]

[tensor(0.3427, device='cuda:1'), tensor(0.2742, device='cuda:1'), tensor(0.3140, device='cuda:1')]
[tensor(0.5196, device='cuda:1'), tensor(0.2742, device='cuda:1'), tensor(0.3140, device='cuda:1')]
[tensor(0.3451, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7812, device='cuda:1')]
[tensor(0.5117, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8060, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer16/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s] 69.41it/s]]

[tensor(0.4156, device='cuda:0'), tensor(0.7485, device='cuda:0'), tensor(0.7852, device='cuda:0')]
[tensor(0.9095, device='cuda:0'), tensor(0.7485, device='cuda:0'), tensor(0.7886, device='cuda:0')]
[tensor(0.2656, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7409, device='cuda:0')]
[tensor(0.6484, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8203, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer17/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [02:07<00:00, 62.87it/s] 
Aggregating statistics: 100%|██████████| 64/64 [01:39<00:00,  1.55s/it]/s]
Collecting features:  20%|█▉        | 1581/8000 [00:22<01:12, 88.44it/s] ]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3995 are alive.


Collecting features:  60%|█████▉    | 4795/8000 [00:49<00:19, 161.57it/s]

[tensor(0.2951, device='cuda:1'), tensor(0.1900, device='cuda:1'), tensor(0.2211, device='cuda:1')]
[tensor(0.3601, device='cuda:1'), tensor(0.1900, device='cuda:1'), tensor(0.2232, device='cuda:1')]
[tensor(0.3216, device='cuda:1'), tensor(0.8138, device='cuda:1'), tensor(0.7057, device='cuda:1')]
[tensor(0.4492, device='cuda:1'), tensor(0.8138, device='cuda:1'), tensor(0.7305, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer18/
Indexing function: find_dots_indices


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]65.17it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 4058 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:19<00:00,  2.17s/it]]]
Collecting features:  87%|████████▋ | 6998/8000 [01:14<00:15, 66.49it/s]] 

[tensor(0.6317, device='cuda:2'), tensor(0.4183, device='cuda:2'), tensor(0.4982, device='cuda:2')]
[tensor(0.8506, device='cuda:2'), tensor(0.4183, device='cuda:2'), tensor(0.5010, device='cuda:2')]
[tensor(0.3945, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7878, device='cuda:2')]
[tensor(0.6120, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8164, device='cuda:2')]


Aggregating statistics: 100%|██████████| 64/64 [02:34<00:00,  2.42s/it]]]
Collecting features: 100%|██████████| 8000/8000 [01:26<00:00, 92.47it/s] 
Collecting features:  26%|██▋       | 2117/8000 [00:25<00:53, 109.95it/s]

[tensor(0.3224, device='cuda:0'), tensor(0.4803, device='cuda:0'), tensor(0.5006, device='cuda:0')]
[tensor(0.7416, device='cuda:0'), tensor(0.4803, device='cuda:0'), tensor(0.5006, device='cuda:0')]
[tensor(0.3906, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.5859, device='cuda:0')]
[tensor(0.6367, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8151, device='cuda:0')]


Collecting features:  48%|████▊     | 3869/8000 [00:53<01:29, 46.17it/s]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer19/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [01:19<00:00, 100.11it/s]]
Aggregating statistics:  70%|███████   | 45/64 [03:00<00:48,  2.55s/it]s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer20/
Indexing function: find_dots_indices


Collecting features:   6%|▌         | 493/8000 [00:12<02:02, 61.48it/s]]] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 3645 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:43<00:00, 185.90it/s]
Collecting features:  60%|██████    | 4817/8000 [01:13<01:10, 45.33it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 2901 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:12<00:00, 110.04it/s]
Collecting features:  68%|██████▊   | 5438/8000 [01:25<00:51, 50.12it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 5810 are alive.


Collecting features:  84%|████████▍ | 6760/8000 [01:27<00:24, 51.59it/s]] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7539 are alive.


Collecting features: 100%|██████████| 8000/8000 [01:48<00:00, 73.54it/s] 
Aggregating statistics: 100%|██████████| 64/64 [03:11<00:00,  3.00s/it]]
Collecting features:  65%|██████▌   | 5228/8000 [01:06<00:59, 46.60it/s]]

[tensor(0.5495, device='cuda:1'), tensor(0.5959, device='cuda:1'), tensor(0.6600, device='cuda:1')]
[tensor(0.8938, device='cuda:1'), tensor(0.5959, device='cuda:1'), tensor(0.6642, device='cuda:1')]
[tensor(0.3177, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7904, device='cuda:1')]
[tensor(0.6237, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8190, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer21/
Indexing function: find_dots_indices


Collecting features: 100%|██████████| 8000/8000 [02:10<00:00, 61.25it/s] 
Aggregating statistics: 100%|██████████| 64/64 [03:38<00:00,  3.42s/it]]]
Aggregating statistics: 100%|██████████| 64/64 [04:09<00:00,  3.90s/it]]
Collecting features:  69%|██████▉   | 5517/8000 [01:11<00:52, 47.25it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 7102 are alive.


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]3.39s/it]]]]

[tensor(0.6256, device='cuda:3'), tensor(0.2985, device='cuda:3'), tensor(0.3746, device='cuda:3')]
[tensor(0.7880, device='cuda:3'), tensor(0.2985, device='cuda:3'), tensor(0.3763, device='cuda:3')]
[tensor(0.5130, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8021, device='cuda:3')]
[tensor(0.6706, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8190, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer22/
Indexing function: find_dots_indices


Collecting features:   0%|          | 0/8000 [00:00<?, ?it/s]52.34it/s]] 

[tensor(0.5119, device='cuda:0'), tensor(0.5999, device='cuda:0'), tensor(0.6530, device='cuda:0')]
[tensor(0.9219, device='cuda:0'), tensor(0.5999, device='cuda:0'), tensor(0.6578, device='cuda:0')]
[tensor(0.3841, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7656, device='cuda:0')]
[tensor(0.7161, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8190, device='cuda:0')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer23/
Indexing function: find_dots_indices


Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]55.23it/s]] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 6819 are alive.


Aggregating statistics: 100%|██████████| 64/64 [04:27<00:00,  4.19s/it]]]
Collecting features:  84%|████████▍ | 6738/8000 [01:35<00:28, 44.29it/s]s]

[tensor(0.5014, device='cuda:3'), tensor(0.6220, device='cuda:3'), tensor(0.6756, device='cuda:3')]
[tensor(0.9324, device='cuda:3'), tensor(0.6220, device='cuda:3'), tensor(0.6800, device='cuda:3')]
[tensor(0.3841, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.7565, device='cuda:3')]
[tensor(0.7109, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8177, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer24/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [04:14<00:00,  3.98s/it]]
Collecting features:  18%|█▊        | 1465/8000 [00:24<02:17, 47.64it/s]] 

[tensor(0.6087, device='cuda:2'), tensor(0.4056, device='cuda:2'), tensor(0.4739, device='cuda:2')]
[tensor(0.8562, device='cuda:2'), tensor(0.4056, device='cuda:2'), tensor(0.4785, device='cuda:2')]
[tensor(0.4948, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8021, device='cuda:2')]
[tensor(0.6979, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8203, device='cuda:2')]


Collecting features: 100%|██████████| 8000/8000 [01:51<00:00, 71.78it/s]] 
Aggregating statistics: 100%|██████████| 64/64 [02:32<00:00,  2.38s/it]] 
Collecting features:  59%|█████▉    | 4745/8000 [00:47<00:48, 67.22it/s]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer25/
Indexing function: find_dots_indices


Collecting features:  65%|██████▍   | 5179/8000 [00:52<00:33, 83.68it/s]]

[tensor(0.4891, device='cuda:2'), tensor(0.6558, device='cuda:2'), tensor(0.6964, device='cuda:2')]
[tensor(0.9359, device='cuda:2'), tensor(0.6558, device='cuda:2'), tensor(0.6986, device='cuda:2')]
[tensor(0.2643, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7930, device='cuda:2')]
[tensor(0.6654, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8229, device='cuda:2')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer26/
Indexing function: find_dots_indices


Aggregating statistics: 100%|██████████| 64/64 [01:50<00:00,  1.73s/it]]]
Collecting features: 100%|██████████| 8000/8000 [00:48<00:00, 163.55it/s]
Collecting features:  10%|█         | 823/8000 [00:10<00:22, 323.05it/s]]

[tensor(0.6067, device='cuda:0'), tensor(0.4867, device='cuda:0'), tensor(0.5474, device='cuda:0')]
[tensor(0.9114, device='cuda:0'), tensor(0.4867, device='cuda:0'), tensor(0.5502, device='cuda:0')]
[tensor(0.3438, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8060, device='cuda:0')]
[tensor(0.6276, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8216, device='cuda:0')]


Collecting features:  34%|███▍      | 2724/8000 [00:51<01:39, 53.19it/s]] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1536 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:33<00:00, 236.73it/s]
Collecting features: 100%|██████████| 8000/8000 [02:19<00:00, 57.26it/s] ]
Collecting features:  39%|███▉      | 3122/8000 [00:58<01:23, 58.35it/s]]

Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer27/
Indexing function: find_dots_indices


Collecting features:  42%|████▏     | 3336/8000 [01:03<01:29, 52.34it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1437 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:21<00:00, 364.49it/s]
Aggregating statistics: 100%|██████████| 64/64 [02:55<00:00,  2.75s/it]] 
Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]113.08it/s]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1430 are alive.


Collecting features:  97%|█████████▋| 7730/8000 [01:20<00:03, 79.07it/s] 

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 4206 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:25<00:00,  2.27s/it]] 
Collecting features: 100%|██████████| 8000/8000 [01:24<00:00, 95.09it/s]
Collecting features:  49%|████▉     | 3929/8000 [01:14<01:22, 49.15it/s]s]

[tensor(0.4707, device='cuda:1'), tensor(0.5483, device='cuda:1'), tensor(0.5917, device='cuda:1')]
[tensor(0.9039, device='cuda:1'), tensor(0.5483, device='cuda:1'), tensor(0.5944, device='cuda:1')]
[tensor(0.3346, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6732, device='cuda:1')]
[tensor(0.6576, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8177, device='cuda:1')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer28/
Indexing function: find_dots_indices


Collecting features:  52%|█████▏    | 4167/8000 [01:19<01:25, 44.80it/s]

[tensor(0.5739, device='cuda:3'), tensor(0.5610, device='cuda:3'), tensor(0.6123, device='cuda:3')]
[tensor(0.9270, device='cuda:3'), tensor(0.5610, device='cuda:3'), tensor(0.6154, device='cuda:3')]
[tensor(0.3372, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8034, device='cuda:3')]
[tensor(0.6419, device='cuda:3'), tensor(0.8177, device='cuda:3'), tensor(0.8229, device='cuda:3')]
Autoencoder: /project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-11/group-2024-05-11_chess-standard/trainer29/
Indexing function: find_dots_indices


Collecting features:   3%|▎         | 279/8000 [00:06<02:13, 57.74it/s]]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1546 are alive.


Collecting features:  58%|█████▊    | 4666/8000 [01:30<01:03, 52.10it/s]]

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1470 are alive.


Collecting features: 100%|██████████| 8000/8000 [00:57<00:00, 139.20it/s]
Collecting features:  71%|███████▏  | 5710/8000 [01:51<00:47, 48.04it/s]]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1607 are alive.


Aggregating statistics: 100%|██████████| 64/64 [02:46<00:00,  2.60s/it]] 
Aggregating statistics: 100%|██████████| 64/64 [01:02<00:00,  1.03it/s]]]
Aggregating statistics: 100%|██████████| 64/64 [01:15<00:00,  1.17s/it]]]
Collecting features: 100%|██████████| 8000/8000 [00:52<00:00, 152.79it/s]
Aggregating statistics: 100%|██████████| 64/64 [00:58<00:00,  1.09it/s]]] 
Aggregating statistics: 100%|██████████| 64/64 [01:14<00:00,  1.16s/it]]]
Collecting features: 100%|██████████| 8000/8000 [01:10<00:00, 113.29it/s]
Collecting features:  54%|█████▎    | 4287/8000 [01:28<01:17, 48.21it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1555 are alive.


Aggregating statistics: 100%|██████████| 64/64 [05:31<00:00,  5.18s/it]]
Collecting features: 100%|██████████| 8000/8000 [02:37<00:00, 50.94it/s]
Aggregating statistics:  58%|█████▊    | 37/64 [00:49<00:36,  1.35s/it]/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1622 are alive.


Aggregating statistics: 100%|██████████| 64/64 [03:28<00:00,  3.26s/it]s] 
Aggregating statistics: 100%|██████████| 64/64 [01:17<00:00,  1.21s/it]s]
Aggregating statistics:  16%|█▌        | 10/64 [00:08<00:41,  1.30it/s]s]

[tensor(0.5486, device='cuda:2'), tensor(0.4670, device='cuda:2'), tensor(0.5166, device='cuda:2')]
[tensor(0.8793, device='cuda:2'), tensor(0.4670, device='cuda:2'), tensor(0.5205, device='cuda:2')]
[tensor(0.4414, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7799, device='cuda:2')]
[tensor(0.7227, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8216, device='cuda:2')]


Collecting features:  89%|████████▉ | 7129/8000 [01:54<00:09, 91.47it/s] 

do_standard_evals
do custom eval metrics
Out of 4096 features, on 256000 activations, 1449 are alive.


Collecting features: 100%|██████████| 8000/8000 [02:06<00:00, 63.24it/s] 
Aggregating statistics: 100%|██████████| 64/64 [01:23<00:00,  1.31s/it]
Aggregating statistics:   0%|          | 0/64 [00:00<?, ?it/s]1.50it/s]

do_standard_evals
do custom eval metrics
Out of 8192 features, on 256000 activations, 1470 are alive.


Aggregating statistics: 100%|██████████| 64/64 [00:44<00:00,  1.44it/s]
Aggregating statistics:  77%|███████▋  | 49/64 [02:15<00:43,  2.92s/it]

[tensor(0.3711, device='cuda:2'), tensor(0.2984, device='cuda:2'), tensor(0.3446, device='cuda:2')]
[tensor(0.5727, device='cuda:2'), tensor(0.2984, device='cuda:2'), tensor(0.3446, device='cuda:2')]
[tensor(0.3203, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.7539, device='cuda:2')]
[tensor(0.4583, device='cuda:2'), tensor(0.8177, device='cuda:2'), tensor(0.8034, device='cuda:2')]


Aggregating statistics: 100%|██████████| 64/64 [03:31<00:00,  3.30s/it]
Aggregating statistics: 100%|██████████| 64/64 [01:09<00:00,  1.09s/it]
Aggregating statistics: 100%|██████████| 64/64 [03:04<00:00,  2.88s/it]
Aggregating statistics: 100%|██████████| 64/64 [01:28<00:00,  1.39s/it]
Aggregating statistics:  66%|██████▌   | 42/64 [01:01<00:24,  1.11s/it]

[tensor(0.3879, device='cuda:1'), tensor(0.3306, device='cuda:1'), tensor(0.3596, device='cuda:1')]
[tensor(0.6094, device='cuda:1'), tensor(0.3306, device='cuda:1'), tensor(0.3596, device='cuda:1')]
[tensor(0.2878, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7500, device='cuda:1')]
[tensor(0.4414, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.7982, device='cuda:1')]


Aggregating statistics: 100%|██████████| 64/64 [04:25<00:00,  4.14s/it]
Aggregating statistics:  88%|████████▊ | 56/64 [01:14<00:06,  1.27it/s]

[tensor(0.3883, device='cuda:1'), tensor(0.3574, device='cuda:1'), tensor(0.3786, device='cuda:1')]
[tensor(0.6873, device='cuda:1'), tensor(0.3574, device='cuda:1'), tensor(0.3791, device='cuda:1')]
[tensor(0.4792, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.6992, device='cuda:1')]
[tensor(0.6901, device='cuda:1'), tensor(0.8177, device='cuda:1'), tensor(0.8151, device='cuda:1')]


Aggregating statistics: 100%|██████████| 64/64 [01:19<00:00,  1.24s/it]


In [9]:
all_autoencoder_paths

['/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer0/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer1/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer10/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer11/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer12/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer13/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer14/',
 '/project/pi_mccallum_umass_edu/rangell_umass_edu//chess-gpt-circuits/autoencoders/group-2024-05-09/chess-gated_trainer

In [9]:
for ae_name, results in results_list:
    sweep_results[ae_name] = {}
    for sweep_key in sweep_result_keys:
        sweep_results[ae_name][sweep_key] = results[sweep_key]

In [11]:
df = pd.DataFrame.from_dict(sweep_results, orient='index').sort_values('l0')
df = df.round(3)
df
#df[['frac_variance_explained', 'l0', 'frac_any_board_per_feature_act-nonzero_precision-0.9', 'frac_any_board_per_feature_act-best_precision-0.9', 'frac_any_feature_per_board_act-nonzero_precision-0.9', 'frac_any_feature_per_board_act-best_precision-0.9', 'frac_any_board_per_feature_act-nonzero_recall-0.01', 'frac_any_board_per_feature_act-best_recall-0.01', 'frac_any_feature_per_board_act-nonzero_recall-0.01', 'frac_any_feature_per_board_act-best_recall-0.01', 'frac_any_board_per_feature_act-nonzero_f1-0.01', 'frac_any_board_per_feature_act-best_f1-0.01', 'frac_any_feature_per_board_act-nonzero_f1-0.01', 'frac_any_feature_per_board_act-best_f1-0.01']]
df.to_csv("large_sae_sweep.csv")

[tensor(0.5990, device='cuda:0'), tensor(0.4802, device='cuda:0'), tensor(0.5356, device='cuda:0')]
[tensor(0.9046, device='cuda:0'), tensor(0.4802, device='cuda:0'), tensor(0.5379, device='cuda:0')]
[tensor(0.4310, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8177, device='cuda:0')]
[tensor(0.6940, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8229, device='cuda:0')]
[tensor(0.3521, device='cuda:0'), tensor(0.3570, device='cuda:0'), tensor(0.3876, device='cuda:0')]
[tensor(0.6068, device='cuda:0'), tensor(0.3570, device='cuda:0'), tensor(0.3883, device='cuda:0')]
[tensor(0.2630, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.7214, device='cuda:0')]
[tensor(0.4453, device='cuda:0'), tensor(0.8177, device='cuda:0'), tensor(0.8034, device='cuda:0')]
[tensor(0.3372, device='cuda:2'), tensor(0.3971, device='cuda:2'), tensor(0.4238, device='cuda:2')]
[tensor(0.6439, device='cuda:2'), tensor(0.3971, device='cuda:2'), tensor(0.4245, device='cuda:2')]
