In [1]:
import pickle
import pandas as pd
from typing import Callable
import torch

import circuits.eval_sae_as_classifier as eval_sae
import circuits.analysis as analysis
import circuits.test_board_reconstruction as test_board_reconstruction
import circuits.get_eval_results as get_eval_results

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# For multi-GPU evaluation
from collections import deque
from joblib import Parallel, delayed

from circuits.utils import to_device

N_GPUS = 1
RESOURCE_STACK = deque([f"cuda:{i}" for i in range(N_GPUS)])

In [3]:
def initialize_dataframe() -> pd.DataFrame:
    return pd.DataFrame(
        columns=[
            "autoencoder_group_path",
            "autoencoder_path",
            "reconstruction_file",
            "eval_results_n_inputs",
            "l0",
            "l1_loss",
            "l2_loss",
            "frac_alive",
            "frac_variance_explained",
            "cossim",
            "l2_ratio",
            "num_alive_features",
            "board_reconstruction_board_count",
            "best_idx",
            "zero_L0",
            "zero_f1_score",
            "best_L0",
            "best_f1_score",
            "zero_num_true_positive_squares",
            "best_num_true_positive_squares",
            "zero_num_false_positive_squares",
            "best_num_false_positive_squares",
            "zero_percent_active_classifiers",
            "best_percent_active_classifiers",
            "zero_classifiers_per_token",
            "best_classifiers_per_token",
            "zero_classified_per_token",
            "best_classified_per_token",
        ]
    )

def append_results(
    eval_results: dict,
    board_reconstruction_results: dict,
    custom_functions: list[Callable],
    df: pd.DataFrame,
    autoencoder_group_path: str,
    autoencoder_path: str,
    reconstruction_file: str,
) -> pd.DataFrame:
    
    print(eval_results)

    for custom_function in custom_functions:
        function_name = custom_function.__name__

        best_idx = board_reconstruction_results[custom_function.__name__]["f1_score"].argmax()

        new_row = {
            "autoencoder_group_path": autoencoder_group_path,
            "autoencoder_path": autoencoder_path,
            "reconstruction_file": reconstruction_file,
            "eval_results_n_inputs": eval_results["hyperparameters"]['n_inputs'],
            "l0": eval_results['eval_results']["l0"],
            "l1_loss": eval_results['eval_results']["l1_loss"],
            "l2_loss": eval_results['eval_results']["l2_loss"],
            "frac_alive": eval_results['eval_results']["frac_alive"],
            "frac_variance_explained": eval_results['eval_results']["frac_variance_explained"],
            "cossim": eval_results['eval_results']["cossim"],
            "l2_ratio": eval_results['eval_results']["l2_ratio"],
            "num_alive_features": board_reconstruction_results["alive_features"].shape[0],
            "board_reconstruction_board_count": board_reconstruction_results[function_name]["num_boards"],
            "best_idx": best_idx.item(),
            "zero_L0": board_reconstruction_results["active_per_token"][0].item(),
            "best_L0": board_reconstruction_results["active_per_token"][best_idx].item(),
            "zero_f1_score": board_reconstruction_results[function_name]["f1_score"][0].item(),
            "best_f1_score": board_reconstruction_results[function_name]["f1_score"][
                best_idx
            ].item(),
            "zero_num_true_positive_squares": board_reconstruction_results[function_name][
                "num_true_positive_squares"
            ][0].item(),
            "best_num_true_positive_squares": board_reconstruction_results[function_name][
                "num_true_positive_squares"
            ][best_idx].item(),
            "zero_num_false_positive_squares": board_reconstruction_results[function_name][
                "num_false_positive_squares"
            ][0].item(),
            "best_num_false_positive_squares": board_reconstruction_results[function_name][
                "num_false_positive_squares"
            ][best_idx].item(),
            "zero_percent_active_classifiers": (
                board_reconstruction_results[function_name]["classifiers_per_token"][0]
                / board_reconstruction_results["active_per_token"][0]
            ).item(),
            "best_percent_active_classifiers": (
                board_reconstruction_results[function_name]["classifiers_per_token"][best_idx]
                / board_reconstruction_results["active_per_token"][best_idx]
            ).item(),
            "zero_classifiers_per_token": board_reconstruction_results[function_name][
                "classifiers_per_token"
            ][0].item(),
            "best_classifiers_per_token": board_reconstruction_results[function_name][
                "classifiers_per_token"
            ][best_idx].item(),
            "zero_classified_per_token": board_reconstruction_results[function_name][
                "classified_per_token"
            ][0].item(),
            "best_classified_per_token": board_reconstruction_results[function_name][
                "classified_per_token"
            ][best_idx].item(),
        }

        new_row_df = pd.DataFrame([new_row])
        df = pd.concat([df, new_row_df], ignore_index=True)

    return df

Basically, just set `autoencoder_group_paths` and various hyperparameters and run it. If you already ran, for example, `eval_sae_as_classifier` and don't want to run it again, set `run_eval_sae` to False. Note that in this case, `eval_results_n_inputs` must match in order for it to load the file saved from the previous run.

By default, we `save_results`, which means each of the 4 functions saves a `.pkl` file. By default, we also aggregate and format some of the results into a csv `output_file`. If you already have results `.pkl` files and want a csv, you can set all `run_...` to False, and it will load the results and put them into a csv. 

In [4]:
import importlib
importlib.reload(eval_sae)
importlib.reload(analysis)
importlib.reload(test_board_reconstruction)
importlib.reload(get_eval_results)
import circuits.chess_utils as chess_utils
importlib.reload(chess_utils)

# NOTE: This script makes a major assumption here: That all autoencoders in a given group are trained on chess XOR Othello
# We do this so we don't have to reconstruct the dataset for each autoencoder in the group
# autoencoder_group_paths = ["../autoencoders/othello_layer5_ef4/"]
# autoencoder_group_paths = ["../autoencoders/chess_layer5/"]
autoencoder_group_paths = ["../autoencoders/group-2024-05-07/"]


eval_sae_n_inputs = 1000
batch_size = 100
#device = "cuda"
model_path = "../models/"
save_results = True

eval_results_n_inputs = 1000
board_reconstruction_n_inputs = 1000

analysis_high_threshold = 0.95
analysis_low_threshold = 0.1
analysis_significance_threshold = 10

run_eval_results = True # We don't check for this as eval_results are pretty quick to collect

# To skip any of the following steps, set the corresponding variable to False
# The results must have been saved previously
run_eval_sae = True
run_analysis = True
run_board_reconstruction = True

dataset_size = max(eval_sae_n_inputs, eval_results_n_inputs, board_reconstruction_n_inputs)

if dataset_size == eval_results_n_inputs:
    dataset_size *= 2

for autoencoder_group_path in autoencoder_group_paths:
    othello = eval_sae.check_if_autoencoder_is_othello(autoencoder_group_path)

    indexing_functions = eval_sae.get_recommended_indexing_functions(othello)
    indexing_function = indexing_functions[0]

    custom_functions = eval_sae.get_recommended_custom_functions(othello)

    model_name = eval_sae.get_model_name(othello)

    precompute = True

    print("Constructing evaluation dataset")
    device = RESOURCE_STACK.pop()
    data = eval_sae.construct_dataset(othello, custom_functions, dataset_size, device, models_path=model_path)
    RESOURCE_STACK.append(device)
    del device
    
    folders = eval_sae.get_nested_folders(autoencoder_group_path)

    def full_eval_pipeline(autoencoder_path):
        
        # Grab a GPU off the stack to use
        device = RESOURCE_STACK.pop()
        
        # For debugging
        # if "ef=4_lr=1e-03_l1=1e-01_layer=5" not in autoencoder_path:
        #     continue

        # If this is set, everything below should be reproducible
        # Then we can just save results from 1 run, make optimizations, and check that the results are the same
        # The determinism is only needed for getting activations from the activation buffer for finding alive features
        torch.manual_seed(0)
        eval_results = get_eval_results.get_evals(
            autoencoder_path,
            eval_results_n_inputs,
            device,
            model_path,
            model_name,
            to_device(data.copy(), device),
            othello=othello,
            save_results=save_results,
        )


        expected_aggregation_output_location = eval_sae.get_output_location(
                autoencoder_path, n_inputs=eval_sae_n_inputs, indexing_function=indexing_function
            )
        
        if run_eval_sae:
            print("Aggregating", autoencoder_path)
            aggregation_results = (
                eval_sae.aggregate_statistics(
                    custom_functions=custom_functions,
                    autoencoder_path=autoencoder_path,
                    n_inputs=eval_sae_n_inputs,
                    batch_size=batch_size,
                    device=device,
                    model_path=model_path,
                    model_name=model_name,
                    data=to_device(data.copy(), device),
                    indexing_function=indexing_function,
                    othello=othello,
                    save_results=save_results,
                    precomputed=precompute,
                )
            )
        else:
            with open(expected_aggregation_output_location, "rb") as f:
                aggregation_results = pickle.load(f)

        expected_feature_labels_output_location = expected_aggregation_output_location.replace(
            "results.pkl", "feature_labels.pkl"
        )
        if run_analysis:
            feature_labels = analysis.analyze_results_dict(
                aggregation_results,
                output_path=expected_feature_labels_output_location,
                device=device,
                high_threshold=analysis_high_threshold,
                low_threshold=analysis_low_threshold,
                significance_threshold=analysis_significance_threshold,
                verbose=False,
                print_results=False,
                save_results=save_results,
            )
        else:
            with open(expected_feature_labels_output_location, "rb") as f:
                feature_labels = pickle.load(f)

        expected_reconstruction_output_location = expected_aggregation_output_location.replace(
            "results.pkl", "reconstruction.pkl"
        )

        if run_board_reconstruction:
            print("Testing board reconstruction")
            board_reconstruction_results = test_board_reconstruction.test_board_reconstructions(
                    custom_functions=custom_functions,
                    autoencoder_path=autoencoder_path,
                    feature_labels=feature_labels,
                    output_file=expected_reconstruction_output_location,
                    n_inputs=board_reconstruction_n_inputs,
                    batch_size=batch_size,
                    device=device,
                    model_name=model_name,
                    data=to_device(data.copy(), device),
                    othello=othello,
                    print_results=False,
                    save_results=save_results,
            )
        else:
            with open(expected_reconstruction_output_location, "rb") as f:
                board_reconstruction_results = pickle.load(f)

        df = initialize_dataframe()
        df = append_results(
            eval_results,
            board_reconstruction_results,
            custom_functions,
            df,
            autoencoder_group_path,
            autoencoder_path,
            expected_reconstruction_output_location,
        )

        print("Finished", autoencoder_path)

        # Save the dataframe after each autoencoder so we don't lose data if the script crashes
        output_file = autoencoder_path + "/" + "results.csv"
        df.to_csv(output_file)
        
        # Put the GPU back on the stack after we're done
        RESOURCE_STACK.append(device)
        return df
    
    dfs = Parallel(n_jobs=N_GPUS, require="sharedmem")(delayed(full_eval_pipeline)(autoencoder_path) for autoencoder_path in folders)

Constructing evaluation dataset
board_to_piece_state
Element size: 1 bytes
Number of elements: 425984000
Memory usage: 406.25 MB
board_to_pin_state
Element size: 1 bytes
Number of elements: 512000
Memory usage: 0.48828125 MB




AggregatingAggregating ../autoencoders/group-2024-05-07/PAnnealTrainer-chess-alpha0.04466836154460907L_p^p/
 ../autoencoders/group-2024-05-07/GatedSAETrainer-chess-alpha1.0/
Aggregating ../autoencoders/group-2024-05-07/StandardTrainer-chess-alpha0.07943282276391983/
Aggregating ../autoencoders/group-2024-05-07/GatedSAETrainer-chess-alpha0.7079457640647888/
Aggregating ../autoencoders/group-2024-05-07/StandardTrainer-chess-alpha0.09440608322620392/






Collecting features:   0%|          | 0/1280 [00:00<?, ?it/s]

Aggregating ../autoencoders/group-2024-05-07/PAnnealTrainer-chess-alpha0.05011872947216034L_p^p/
Aggregating ../autoencoders/group-2024-05-07/StandardTrainer-chess-alpha0.11220184713602066/
Aggregating ../autoencoders/group-2024-05-07/PAnnealTrainer-chess-alpha0.03981072083115578L_p^p/



Collecting features:   0%|          | 0/1280 [00:00<?, ?it/s][A





Collecting features:   0%|          | 1/1280 [00:02<1:01:52,  2.90s/it]



Collecting features:   6%|▋         | 81/1280 [00:03<00:32, 37.41it/s] 






Collecting features:  13%|█▎        | 168/1280 [00:03<00:18, 61.65it/s]
Collecting features:   0%|          | 1/1280 [00:03<1:03:54,  3.00s/it][A
Collecting features:  14%|█▍        | 184/1280 [00:04<00:17, 63.13it/s][A
Collecting features:  15%|█▌        | 197/1280 [00:04<00:15, 68.25it/s][A



Collecting features:   0%|          | 0/1280 [00:00<?, ?it/s][A[A[A[A




Collecting features:   0%|          | 0/1280 [00:00<?, ?it/s][A[A[A[A[A
Collecting features:   4%|▍         | 52/1280 [00:03<00:35, 34.49it/s][A





Collecting features:   0%|          | 0/1280 [00:00<?, ?it/s][A[A[A[A[A[A






Collecting features:  22%|██▏       | 284/1280 [00:04<00:05, 170.12it/s]A[A[A[A
Collecting features:  32%|███▏      | 413/1280 [00:04<00:02, 342.08it/s][A
Collecting features:  42%|████▏     | 542/1280 [00:04<00:01, 512.33it/s][A
Collecting features:  52%|█████▎    | 672/1280 [00:04<00:00, 672.58it/s][A
Collecting features:  63%|██████▎   | 802/1280 [00:04<00:00, 813.06

Out of 16384 features, on 128000 activations, 3792 are alive.
Out of 16384 features, on 128000 activations, 8115 are alive.


Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s]

Collecting features:   5%|▍         | 61/1280 [00:02<00:40, 29.76it/s][A[A
Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A

Collecting features:  15%|█▍        | 191/1280 [00:02<00:09, 112.88it/s][A[A

Collecting features:  21%|██        | 265/1280 [00:03<00:10, 97.98it/s] [A[A


Collecting features:   0%|          | 1/1280 [00:03<1:10:24,  3.30s/it][A[A[A

Collecting features:  29%|██▉       | 371/1280 [00:03<00:05, 162.45it/s][A[A


Collecting features:   8%|▊         | 108/1280 [00:03<00:26, 44.64it/s][A[A[A

Collecting features:  37%|███▋      | 477/1280 [00:04<00:03, 240.89it/s][A[A


Aggregating statistics:  10%|█         | 1/10 [00:01<00:11,  1.32s/it]s][A[A[A

Collecting features:  47%|████▋     | 605/1280 [00:04<00:01, 356.07it/s][A[A

Collecting features:  59%|█████▉    | 753/1280 [00:04<00:01, 509.62it/s][A[A


Collecting features:  23%|██▎       | 298/1280 [00:04<00:09, 

Out of 16384 features, on 128000 activations, 4981 are alive.




Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A


Collecting features:  65%|██████▍   | 826/1280 [00:05<00:01, 407.46it/s][A[A[A




Collecting features:  30%|██▉       | 378/1280 [00:04<00:02, 323.07it/s][A[A[A[A[A



Collecting features:  22%|██▏       | 286/1280 [00:04<00:04, 206.56it/s][A[A[A[A






Collecting features:  37%|███▋      | 477/1280 [00:04<00:02, 342.62it/s][A[A[A[A[A[A[A





Collecting features:  35%|███▍      | 446/1280 [00:04<00:02, 329.30it/s][A[A[A[A[A[A




Collecting features:  35%|███▍      | 445/1280 [00:04<00:02, 397.25it/s][A[A[A[A[A



Collecting features:  26%|██▌       | 335/1280 [00:04<00:03, 260.11it/s][A[A[A[A






Collecting features:  43%|████▎     | 545/1280 [00:04<00:01, 414.47it/s][A[A[A[A[A[A[A





Collecting features:  40%|████      | 516/1280 [00:04<00:01, 408.11it/s][A[A[A[A[A[A




Collecting features:  41%|████      | 525/1280 [00:04<00:01, 491.49it/s][A[A[A[A[A



Out of 16384 features, on 128000 activations, 7202 are alive.





Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A[A






Collecting features:  73%|███████▎  | 934/1280 [00:05<00:00, 358.25it/s][A[A[A[A[A[A[A





Collecting features:  70%|███████   | 899/1280 [00:05<00:01, 347.51it/s][A[A[A[A[A[A




Collecting features:  70%|███████   | 901/1280 [00:05<00:01, 364.61it/s][A[A[A[A[A






Collecting features:  83%|████████▎ | 1066/1280 [00:05<00:00, 524.61it/s][A[A[A[A[A[A[A





Collecting features:  80%|███████▉  | 1021/1280 [00:05<00:00, 497.30it/s][A[A[A[A[A[A




Collecting features:  80%|███████▉  | 1018/1280 [00:05<00:00, 508.75it/s][A[A[A[A[A






Collecting features:  93%|█████████▎| 1186/1280 [00:05<00:00, 657.68it/s][A[A[A[A[A[A[A





Collecting features:  89%|████████▉ | 1142/1280 [00:05<00:00, 637.47it/s][A[A[A[A[A[A




Collecting features: 100%|██████████| 1280/1280 [00:05<00:00, 239.31it/s][A[A[A[A[A






Collecting features:  97%|█████████▋| 1237/1280 [

Out of 16384 features, on 128000 activations, 3885 are alive.









Collecting features: 100%|██████████| 1280/1280 [00:05<00:00, 235.52it/s]A[A[A[A





Collecting features: 100%|██████████| 1280/1280 [00:05<00:00, 231.97it/s][A[A[A[A[A




Collecting features:  78%|███████▊  | 994/1280 [00:05<00:00, 461.31it/s][A[A[A[A

Out of 16384 features, on 128000 activations, 9180 are alive.







Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A[A[A[A



Collecting features:  86%|████████▌ | 1096/1280 [00:05<00:00, 567.90it/s][A[A[A[A

Aggregating statistics:  10%|█         | 1/10 [00:01<00:14,  1.57s/it][A[A



Collecting features: 100%|██████████| 1280/1280 [00:05<00:00, 221.20it/s][A[A[A[A


Out of 16384 features, on 128000 activations, 5556 are alive.






Aggregating statistics:  40%|████      | 4/10 [00:04<00:07,  1.19s/it]A[A

Out of 16384 features, on 128000 activations, 2983 are alive.








Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A[A[A[A[A

Aggregating statistics:  20%|██        | 2/10 [00:03<00:12,  1.53s/it][A[A
Aggregating statistics:  50%|█████     | 5/10 [00:05<00:05,  1.18s/it][A






Aggregating statistics:  10%|█         | 1/10 [00:01<00:17,  1.94s/it][A[A[A[A[A[A[A


Aggregating statistics:  10%|█         | 1/10 [00:02<00:21,  2.34s/it][A[A[A



Aggregating statistics:  10%|█         | 1/10 [00:02<00:18,  2.04s/it][A[A[A[A





Aggregating statistics:  60%|██████    | 6/10 [00:07<00:04,  1.17s/it][A[A[A[A[A[A






Aggregating statistics:  20%|██        | 2/10 [00:03<00:11,  1.50s/it][A[A[A[A[A[A[A

Aggregating statistics:  30%|███       | 3/10 [00:04<00:10,  1.52s/it][A[A




Aggregating statistics:  10%|█         | 1/10 [00:03<00:30,  3.34s/it][A[A[A[A[A





Aggregating statistics:  20%|██        | 2/10 [00:01<00:07,  1.02it/s][A[A[A[A[A[A



Aggregating statistics:  70%|███████  

Nonzero classifiers per feature per threshold: tensor([1238, 1202, 1173, 1158, 1116, 1065,  965,  812,  545,  184,    4],
       device='cuda:0')
Total classified squares per feature per threshold: tensor([14950, 14624, 14615, 14863, 15104, 15620, 14917, 13942, 11593,  6185,
          256], device='cuda:0')
Out of 3792 features, 1238 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 12.075929641723633, max count: 64


Aggregating statistics:  30%|███       | 3/10 [00:08<00:20,  2.92s/it][A[A[A[A[A






Aggregating statistics:  70%|███████   | 7/10 [00:09<00:03,  1.24s/it][A[A[A[A[A[A[A





Aggregating statistics:  80%|████████  | 8/10 [00:07<00:01,  1.08it/s][A[A[A[A[A[A
Aggregating statistics:  50%|█████     | 5/10 [00:13<00:12,  2.52s/it][A



Aggregating statistics:  50%|█████     | 5/10 [00:08<00:08,  1.72s/it][A[A[A[A

Aggregating statistics:  70%|███████   | 7/10 [00:10<00:04,  1.50s/it][A[A

Testing board reconstruction








Aggregating statistics:  90%|█████████ | 9/10 [00:08<00:00,  1.10it/s][A[A[A[A[A[A










Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s]


Aggregating statistics:  50%|█████     | 5/10 [00:10<00:10,  2.18s/it][A[A[A

Aggregating statistics:  80%|████████  | 8/10 [00:12<00:03,  1.50s/it][A[A



Aggregating statistics:  60%|██████    | 6/10 [00:10<00:06,  1.70s/it][A[A[A[A





Aggregating statistics: 100%|██████████| 10/10 [00:09<00:00,  1.08it/s][A[A[A[A[A[A
Aggregating statistics:  10%|█         | 1/10 [00:01<00:14,  1.61s/it]






Aggregating statistics:  90%|█████████ | 9/10 [00:12<00:01,  1.42s/it][A[A[A[A[A[A[A
Aggregating statistics:  60%|██████    | 6/10 [00:16<00:10,  2.69s/it][A




Aggregating statistics:  40%|████      | 4/10 [00:12<00:17,  2.98s/it][A[A[A[A[A

Aggregating statistics:  90%|█████████ | 9/10 [00:13<00:01,  1.52s/it][A[A



Aggregating statistics:  70%|███████   | 7/10 [00:12<00:05,  1.71s/it][A[A[A[A






Aggregating statistics: 100%|██████████| 10/10 [00:12<00:00,  1.26s/it][A[A[A[A[A[A[A

Nonzero classifiers per feature per threshold: tensor([920, 894, 906, 884, 858, 825, 771, 655, 477, 130,   0],
       device='cuda:5')
Total classified squares per feature per threshold: tensor([ 9769,  9030,  9839,  9886, 10560, 11459, 12014, 11754, 10485,  4697,
            0], device='cuda:5')
Out of 2983 features, 920 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 10.618477821350098, max count: 64






Aggregating statistics:  60%|██████    | 6/10 [00:13<00:08,  2.18s/it][A[A[A

Testing board reconstruction


Aggregating statistics:  20%|██        | 2/10 [00:02<00:09,  1.21s/it]

Aggregating statistics: 100%|██████████| 10/10 [00:15<00:00,  1.51s/it][A[A

Nonzero classifiers per feature per threshold: tensor([1208, 1165, 1194, 1206, 1155, 1094,  998,  832,  542,  165,    0],
       device='cuda:6')
Total classified squares per feature per threshold: tensor([12688, 11671, 12457, 13531, 14121, 14691, 15247, 14249, 10976,  5290,
            0], device='cuda:6')
Out of 3885 features, 1208 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 10.503311157226562, max count: 64




Aggregating statistics:  70%|███████   | 7/10 [00:18<00:07,  2.43s/it][A



Aggregating statistics:  80%|████████  | 8/10 [00:13<00:03,  1.70s/it][A[A[A[A

Testing board reconstruction







Aggregating statistics:  50%|█████     | 5/10 [00:14<00:13,  2.76s/it][A[A[A[A[A

Aggregating statistics:  30%|███       | 3/10 [00:04<00:10,  1.48s/it]


Aggregating statistics:  70%|███████   | 7/10 [00:15<00:06,  2.18s/it][A[A[A

Nonzero classifiers per feature per threshold: tensor([1618, 1503, 1536, 1543, 1482, 1378, 1256, 1025,  630,  174,    0],
       device='cuda:7')
Total classified squares per feature per threshold: tensor([17521, 15061, 15744, 16922, 17463, 18044, 18047, 16541, 11913,  5159,
            0], device='cuda:7')
Out of 4981 features, 1618 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 10.828801155090332, max count: 64
Testing board reconstruction








Aggregating statistics:  10%|█         | 1/10 [00:01<00:12,  1.42s/it][A[A









Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A[A[A[A[A
Aggregating statistics:  80%|████████  | 8/10 [00:20<00:04,  2.43s/it][A

Aggregating statistics:  40%|████      | 4/10 [00:06<00:10,  1.74s/it][A[A


Aggregating statistics:  80%|████████  | 8/10 [00:17<00:04,  2.17s/it][A[A[A




Aggregating statistics:  60%|██████    | 6/10 [00:17<00:11,  2.75s/it][A[A[A[A[A







Aggregating statistics: 100%|██████████| 10/10 [00:17<00:00,  1.74s/it][A[A[A[A






Aggregating statistics:  10%|█         | 1/10 [00:01<00:15,  1.76s/it][A[A[A[A[A[A



Aggregating statistics:  50%|█████     | 5/10 [00:07<00:07,  1.49s/it]A[A

Aggregating statistics:  30%|███       | 3/10 [00:03<00:08,  1.23s/it][A[A





Aggregating statistics:  20%|██        | 2/10 [00:02<00:10,  1.29s/it][A[A[A[A[A[A
Aggregating statistics:  90%|█████████ | 9/10 [00:22<00:02,  2.43s/it][A

Nonzero classifiers per feature per threshold: tensor([1684, 1642, 1630, 1578, 1540, 1416, 1231,  967,  609,  184,    4],
       device='cuda:1')
Total classified squares per feature per threshold: tensor([19489, 19326, 19774, 19309, 19918, 19529, 18722, 16326, 12351,  5740,
          256], device='cuda:1')
Out of 5556 features, 1684 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 11.573040962219238, max count: 64





Aggregating statistics:  60%|██████    | 6/10 [00:09<00:06,  1.58s/it][A[A[A

Testing board reconstruction




Aggregating statistics:  40%|████      | 4/10 [00:05<00:08,  1.36s/it][A[A




Aggregating statistics:  70%|███████   | 7/10 [00:19<00:08,  2.75s/it][A[A[A[A[A



Aggregating statistics:  10%|█         | 1/10 [00:02<00:19,  2.13s/it][A[A[A[A
















Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A[A[A[A[A[A

Aggregating statistics:  50%|█████     | 5/10 [00:06<00:06,  1.34s/it][A[A



Aggregating statistics:  20%|██        | 2/10 [00:03<00:12,  1.54s/it][A[A[A[A
Aggregating statistics: 100%|██████████| 10/10 [00:25<00:00,  2.54s/it][A
Aggregating statistics:  70%|███████   | 7/10 [00:10<00:04,  1.58s/it]


Aggregating statistics: 100%|██████████| 10/10 [00:21<00:00,  2.18s/it][A[A[A






Aggregating statistics:  40%|████      | 4/10 [00:05<00:08,  1.49s/it][A[A[A[A[A[A




Aggregating statistics:  80%|████████  | 8/10 [00:22<00:05,  2.75s/it][A[A[A[A[A

Aggregating statistics:  60%|██████    | 6/10 [00:08<00:05,  1.43s/it][A[A






Aggregating statistics:  10%|█         | 1/10 [00:02<00:19,  2.21s/it][A[A[A[A[A[A[A

Nonzero classifiers per feature per threshold: tensor([2227, 2188, 2091, 1937, 1782, 1553, 1301,  981,  571,  174,    1],
       device='cuda:3')
Total classified squares per feature per threshold: tensor([29929, 30141, 29693, 28322, 27043, 24070, 21194, 17137, 11470,  5073,
           11], device='cuda:3')
Out of 8115 features, 2227 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 13.439156532287598, max count: 64
Nonzero classifiers per feature per threshold: tensor([1990, 1955, 1883, 1760, 1610, 1434, 1186,  927,  555,  174,    1],
       device='cuda:2')
Total classified squares per feature per threshold: tensor([27516, 28066, 28080, 27028, 25281, 23091, 19311, 16378, 11479,  5218,
            9], device='cuda:2')
Out of 7202 features, 1990 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 13.82713508605957, max count: 64


Aggregating statistics:  80%|████████  | 8/10 [00:13<00:03,  1.88s/it]





Aggregating statistics:  50%|█████     | 5/10 [00:07<00:08,  1.63s/it][A[A[A[A[A[A



Aggregating statistics:  30%|███       | 3/10 [00:05<00:14,  2.06s/it][A[A[A[A

Testing board reconstruction
Testing board reconstruction












Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A[A




Aggregating statistics:  90%|█████████ | 9/10 [00:25<00:02,  2.75s/it][A[A[A[A[A






Aggregating statistics:  90%|█████████ | 9/10 [00:15<00:01,  1.82s/it][A[A[A[A[A[A[A





Aggregating statistics:  60%|██████    | 6/10 [00:09<00:06,  1.66s/it][A[A[A[A[A[A

Aggregating statistics:  80%|████████  | 8/10 [00:11<00:02,  1.49s/it][A[A



Aggregating statistics:  40%|████      | 4/10 [00:08<00:12,  2.07s/it][A[A[A[A

Aggregating statistics: 100%|██████████| 10/10 [00:16<00:00,  1.69s/it][A[A



Aggregating statistics:  10%|█         | 1/10 [00:02<00:25,  2.84s/it][A[A[A





  df = pd.concat([df, new_row_df], ignore_index=True)

Aggregating statistics:  10%|█         | 1/10 [00:03<00:28,  3.22s/it][A






Aggregating statistics:  30%|███       | 3/10 [00:07<00:17,  2.55s/it][A[A[A[A[A[A[A

{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 1.869858980178833, 'l1_loss': 23.012340545654297, 'l0': 15.707000732421875, 'frac_alive': 0.0009586792439222336, 'frac_variance_explained': 0.9729184508323669, 'cossim': 0.968239426612854, 'l2_ratio': 0.9659726023674011}}
Finished ../autoencoders/group-2024-05-07/GatedSAETrainer-chess-alpha1.0/






Aggregating statistics:  50%|█████     | 5/10 [00:10<00:10,  2.20s/it][A[A[A[A




Aggregating statistics: 100%|██████████| 10/10 [00:28<00:00,  2.83s/it][A[A[A[A[A






Aggregating statistics: 100%|██████████| 10/10 [00:14<00:00,  1.49s/it][A[A
  df = pd.concat([df, new_row_df], ignore_index=True)






Aggregating statistics:  80%|████████  | 8/10 [00:14<00:03,  1.99s/it][A[A[A[A[A[A

{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 2.0228612422943115, 'l1_loss': 14.537116050720215, 'l0': 27.304000854492188, 'frac_alive': 0.0016665039584040642, 'frac_variance_explained': 0.9657015800476074, 'cossim': 0.9598084092140198, 'l2_ratio': 0.906217098236084}}
Finished ../autoencoders/group-2024-05-07/StandardTrainer-chess-alpha0.11220184713602066/
Aggregating ../autoencoders/group-2024-05-07/GatedSAETrainer-chess-alpha1.4125375747680664/
Nonzero classifiers per feature per threshold: tensor([2429, 2423, 2323, 2162, 1943, 1694, 1377, 1018,  567,  166,    0],
       device='cuda:4')
Total classified squares per feature per threshold: tensor([32830, 33863, 33453, 31616, 29298, 26598, 22602, 17622, 11377,  4809,
            0], device='cuda:4')
Out of 9180 features, 2429 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 13.515850067138672, max count: 64





Aggregating statistics:  20%|██        | 2/10 [00:05<00:23,  2.95s/it][A[A[A
Aggregating statistics:  20%|██        | 2/10 [00:06<00:24,  3.05s/it][A



Aggregating statistics:  60%|██████    | 6/10 [00:13<00:09,  2.41s/it][A[A[A[A
















Aggregating statistics:  90%|█████████ | 9/10 [00:15<00:01,  1.91s/it][A[A[A[A[A[A

Testing board reconstruction






Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s]

Collecting features:   0%|          | 0/1280 [00:00<?, ?it/s][A[A



Aggregating statistics:  70%|███████   | 7/10 [00:15<00:07,  2.50s/it][A[A[A[A





Aggregating statistics: 100%|██████████| 10/10 [00:17<00:00,  1.78s/it][A[A[A[A[A[A

Aggregating ../autoencoders/group-2024-05-07/GatedAnnealTrainer-chess-alpha0.3981071710586548L_p^p/



  df = pd.concat([df, new_row_df], ignore_index=True)







Aggregating statistics:  50%|█████     | 5/10 [00:13<00:15,  3.03s/it][A[A[A[A[A[A[A


Aggregating statistics:  30%|███       | 3/10 [00:09<00:23,  3.31s/it][A[A[A

{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 1.8137911558151245, 'l1_loss': 15.704079627990723, 'l0': 37.21500015258789, 'frac_alive': 0.0022714233491569757, 'frac_variance_explained': 0.9723506569862366, 'cossim': 0.9677205681800842, 'l2_ratio': 0.9240405559539795}}
Finished ../autoencoders/group-2024-05-07/StandardTrainer-chess-alpha0.09440608322620392/



Aggregating statistics:  30%|███       | 3/10 [00:09<00:23,  3.37s/it][A





Collecting features:   0%|          | 1/1280 [00:02<59:18,  2.78s/it][A[A



Aggregating statistics:  80%|████████  | 8/10 [00:18<00:04,  2.39s/it][A[A[A[A

Collecting features:   2%|▏         | 22/1280 [00:02<01:58, 10.58it/s][A[A




Collecting features:   0%|          | 0/1280 [00:00<?, ?it/s][A[A[A[A[A

Collecting features:  11%|█         | 135/1280 [00:02<00:13, 83.45it/s][A[A

Collecting features:  21%|██        | 269/1280 [00:03<00:05, 190.26it/s][A[A

Collecting features:  31%|███▏      | 402/1280 [00:03<00:02, 313.69it/s][A[A

Collecting features:  42%|████▏     | 535/1280 [00:03<00:01, 449.87it/s][A[A






Aggregating statistics:  10%|█         | 1/10 [00:03<00:33,  3.69s/it][A[A[A[A[A[A[A

Collecting features:  51%|█████     | 649/1280 [00:03<00:01, 484.14it/s][A[A

Collecting features:  61%|██████    | 779/1280 [00:03<00:00, 618.96it/s][A[A

Aggregating ../autoencoders/group-2024-05-07/GatedAnnealTrainer-chess-alpha0.5623413324356079L_p^p/





Aggregating statistics:  40%|████      | 4/10 [00:11<00:17,  2.89s/it][A[A[A



Collecting features:  75%|███████▌  | 965/1280 [00:04<00:00, 386.20it/s][A[A





Collecting features:  83%|████████▎ | 1063/1280 [00:04<00:00, 444.84it/s][A[A





Collecting features:   0%|          | 0/1280 [00:00<?, ?it/s][A[A[A[A[A[A

Collecting features:  89%|████████▉ | 1138/1280 [00:04<00:00, 436.63it/s][A[A
Aggregating statistics:  40%|████      | 4/10 [00:12<00:19,  3.23s/it][A

Collecting features: 100%|██████████| 1280/1280 [00:04<00:00, 260.82it/s][A[A


Out of 16384 features, on 128000 activations, 2505 are alive.




Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A



Aggregating statistics:  90%|█████████ | 9/10 [00:20<00:02,  2.32s/it][A[A[A[A






Aggregating statistics:  70%|███████   | 7/10 [00:17<00:07,  2.46s/it][A[A[A[A[A[A[A




Collecting features:   0%|          | 1/1280 [00:02<59:39,  2.80s/it][A[A[A[A[A

Aggregating statistics:  20%|██        | 2/10 [00:00<00:03,  2.58it/s][A[A




Aggregating statistics:  20%|██        | 2/10 [00:06<00:24,  3.02s/it]][A[A[A[A[A




Collecting features:  15%|█▍        | 191/1280 [00:03<00:11, 95.28it/s][A[A[A[A[A




Collecting features:  25%|██▌       | 324/1280 [00:03<00:04, 195.34it/s][A[A[A[A[A




Collecting features:  35%|███▌      | 453/1280 [00:03<00:02, 308.90it/s][A[A[A[A[A




Collecting features:  46%|████▌     | 586/1280 [00:03<00:01, 440.49it/s][A[A[A[A[A




Collecting features:  55%|█████▌    | 705/1280 [00:03<00:01, 556.50it/s][A[A[A[A[A

Aggregating statistics:  30

{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 1.627392292022705, 'l1_loss': 16.864665985107422, 'l0': 49.8280029296875, 'frac_alive': 0.0030412599444389343, 'frac_variance_explained': 0.9777706861495972, 'cossim': 0.9741085171699524, 'l2_ratio': 0.9376888871192932}}
Finished ../autoencoders/group-2024-05-07/StandardTrainer-chess-alpha0.07943282276391983/









Aggregating statistics:  80%|████████  | 8/10 [00:20<00:04,  2.44s/it][A[A[A[A[A[A[A
Aggregating statistics:  50%|█████     | 5/10 [00:16<00:16,  3.23s/it][A




Collecting features:  77%|███████▋  | 985/1280 [00:05<00:01, 213.76it/s][A[A[A[A[A




Collecting features:  87%|████████▋ | 1117/1280 [00:05<00:00, 311.37it/s][A[A[A[A[A





Collecting features:   0%|          | 1/1280 [00:03<1:17:28,  3.63s/it][A[A[A[A[A[A




Collecting features: 100%|██████████| 1280/1280 [00:05<00:00, 234.08it/s][A[A[A[A[A






Collecting features:   8%|▊         | 98/1280 [00:03<00:31, 36.95it/s] [A[A[A[A[A[A

Out of 16384 features, on 128000 activations, 7152 are alive.






Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A[A[A





Collecting features:  16%|█▋        | 209/1280 [00:03<00:11, 91.95it/s][A[A[A[A[A[A





Collecting features:  23%|██▎       | 291/1280 [00:04<00:08, 121.45it/s][A[A[A[A[A[A

Aggregating statistics:  50%|█████     | 5/10 [00:03<00:04,  1.11it/s][A[A

Aggregating statistics:  30%|███       | 3/10 [00:10<00:24,  3.53s/it][A[A



Aggregating statistics:  10%|█         | 1/10 [00:01<00:14,  1.61s/it][A[A[A[A


Aggregating statistics:  60%|██████    | 6/10 [00:17<00:12,  3.03s/it][A[A[A





Collecting features:  27%|██▋       | 350/1280 [00:05<00:11, 83.75it/s] [A[A[A[A[A[A





Collecting features:  37%|███▋      | 476/1280 [00:05<00:05, 148.83it/s][A[A[A[A[A[A





Collecting features:  48%|████▊     | 608/1280 [00:05<00:02, 234.50it/s][A[A[A[A[A[A





Collecting features:  58%|█████▊    | 740/1280 [00:05<00:01, 337.07it/s][A[A[A[A[A[A





Collecting featur









Collecting features:  93%|█████████▎| 1192/1280 [00:06<00:00, 512.17it/s][A[A[A[A[A[A

Collecting features: 100%|██████████| 1280/1280 [00:06<00:00, 194.49it/s][A


Out of 16384 features, on 128000 activations, 5491 are alive.







Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A[A[A[A
Aggregating statistics:  60%|██████    | 6/10 [00:19<00:13,  3.41s/it][A

Aggregating statistics: 100%|██████████| 10/10 [00:07<00:00,  1.42it/s][A[A




Aggregating statistics:  20%|██        | 2/10 [00:03<00:14,  1.81s/it][A[A[A[A




Aggregating statistics:  10%|█         | 1/10 [00:00<00:07,  1.15it/s][A[A[A[A[A

Aggregating ../autoencoders/group-2024-05-07/GatedAnnealTrainer-chess-alpha0.7943282127380371L_p^p/





Aggregating statistics:  70%|███████   | 7/10 [00:20<00:08,  2.85s/it][A[A[A






Aggregating statistics: 100%|██████████| 10/10 [00:25<00:00,  2.54s/it][A[A[A[A[A[A[A
Aggregating statistics:  40%|████      | 4/10 [00:13<00:19,  3.32s/it]

{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 1.6255600452423096, 'l1_loss': 24.73455238342285, 'l0': 22.929000854492188, 'frac_alive': 0.0013994751498103142, 'frac_variance_explained': 0.9795665740966797, 'cossim': 0.9763676524162292, 'l2_ratio': 0.9762305021286011}}
Finished ../autoencoders/group-2024-05-07/GatedSAETrainer-chess-alpha0.7079457640647888/
Nonzero classifiers per feature per threshold: tensor([840, 828, 816, 805, 777, 752, 709, 611, 437, 178,   2],
       device='cuda:0')
Total classified squares per feature per threshold: tensor([ 9187,  9057,  9686, 10052, 10163, 10872, 11200, 10377,  9168,  5623,
          110], device='cuda:0')
Out of 2505 features, 840 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 10.936904907226562, max count: 64


  df = pd.concat([df, new_row_df], ignore_index=True)





Aggregating statistics:  20%|██        | 2/10 [00:02<00:11,  1.49s/it][A[A[A[A[A



Aggregating statistics:  30%|███       | 3/10 [00:05<00:14,  2.01s/it][A[A[A[A

Testing board reconstruction








Collecting features:   0%|          | 0/1280 [00:00<?, ?it/s][A[A






Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A[A[A[A[A





Aggregating statistics:  10%|█         | 1/10 [00:00<00:02,  3.08it/s][A[A[A[A[A[A




Aggregating statistics:  30%|███       | 3/10 [00:04<00:10,  1.45s/it][A[A[A[A[A





Aggregating statistics:  20%|██        | 2/10 [00:01<00:06,  1.27it/s][A[A[A[A[A[A


Aggregating statistics:  80%|████████  | 8/10 [00:24<00:06,  3.14s/it][A[A[A



Aggregating statistics:  50%|█████     | 5/10 [00:17<00:17,  3.55s/it][A[A[A[A




Aggregating statistics:  40%|████      | 4/10 [00:05<00:09,  1.54s/it][A[A[A[A[A

Collecting features:   0%|          | 1/1280 [00:02<1:03:41,  2.99s/it][A[A
Aggregating statistics:  80%|████████  | 8/10 [00:25<00:06,  3.18s/it][A





Aggregating statistics:  30%|███       | 3/10 [00:02<00:07,  1.11s/it][A[A[A[A[A[A

Collecting features:   9%|▉         | 117/1280 [00:03<00:21

Out of 16384 features, on 128000 activations, 3961 are alive.




Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A


Aggregating statistics:  90%|█████████ | 9/10 [00:27<00:03,  3.09s/it][A[A[A

Aggregating statistics:  10%|█         | 1/10 [00:00<00:03,  2.36it/s][A[A





Aggregating statistics:  60%|██████    | 6/10 [00:20<00:13,  3.48s/it][A[A[A[A[A[A




Aggregating statistics:  60%|██████    | 6/10 [00:09<00:06,  1.61s/it][A[A[A[A[A



Aggregating statistics:  60%|██████    | 6/10 [00:12<00:08,  2.11s/it][A[A[A[A
Aggregating statistics:  90%|█████████ | 9/10 [00:28<00:03,  3.14s/it][A

Aggregating statistics:  20%|██        | 2/10 [00:01<00:07,  1.13it/s][A[A





Aggregating statistics:  60%|██████    | 6/10 [00:06<00:04,  1.14s/it][A[A[A[A[A[A


Aggregating statistics: 100%|██████████| 10/10 [00:29<00:00,  2.99s/it][A[A[A


  df = pd.concat([df, new_row_df], ignore_index=True)





Aggregating statistics:  70%|███████   | 7/10 [00:11<00:05,  1.84s/it][A[A[A[A[A



Aggregating statisti

{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 1.73408043384552, 'l1_loss': 18.087629318237305, 'l0': 12.952000617980957, 'frac_alive': 0.0007905273814685643, 'frac_variance_explained': 0.9730545878410339, 'cossim': 0.9698538184165955, 'l2_ratio': 0.9646522402763367}}
Finished ../autoencoders/group-2024-05-07/PAnnealTrainer-chess-alpha0.05011872947216034L_p^p/








Aggregating statistics:  70%|███████   | 7/10 [00:08<00:04,  1.40s/it][A[A[A[A[A[A

Aggregating statistics:  70%|███████   | 7/10 [00:24<00:10,  3.44s/it][A[A
Aggregating statistics: 100%|██████████| 10/10 [00:31<00:00,  3.19s/it][A





  df = pd.concat([df, new_row_df], ignore_index=True)
Aggregating statistics:  80%|████████  | 8/10 [00:13<00:03,  1.80s/it][A[A[A[A[A

Aggregating statistics:  50%|█████     | 5/10 [00:05<00:05,  1.15s/it][A[A

{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 1.699897050857544, 'l1_loss': 19.32272720336914, 'l0': 16.142000198364258, 'frac_alive': 0.0009852295042946935, 'frac_variance_explained': 0.9766021370887756, 'cossim': 0.9721130728721619, 'l2_ratio': 0.9680718779563904}}
Finished ../autoencoders/group-2024-05-07/PAnnealTrainer-chess-alpha0.04466836154460907L_p^p/






Aggregating statistics:  80%|████████  | 8/10 [00:16<00:04,  2.13s/it][A[A[A[A





Aggregating statistics:  80%|████████  | 8/10 [00:10<00:03,  1.55s/it][A[A[A[A[A[A




Aggregating statistics:  90%|█████████ | 9/10 [00:14<00:01,  1.54s/it][A[A[A[A[A

Aggregating statistics:  60%|██████    | 6/10 [00:06<00:04,  1.11s/it][A[A





Aggregating statistics:  90%|█████████ | 9/10 [00:11<00:01,  1.41s/it][A[A[A[A[A[A

Aggregating statistics:  70%|███████   | 7/10 [00:07<00:03,  1.14s/it][A[A



Aggregating statistics:  90%|█████████ | 9/10 [00:18<00:02,  2.14s/it][A[A[A[A




Aggregating statistics: 100%|██████████| 10/10 [00:15<00:00,  1.59s/it][A[A[A[A[A






Aggregating statistics: 100%|██████████| 10/10 [00:12<00:00,  1.24s/it][A[A[A[A[A[A
  df = pd.concat([df, new_row_df], ignore_index=True)


Aggregating statistics:  80%|████████  | 8/10 [00:08<00:02,  1.19s/it][A[A

{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 2.1417288780212402, 'l1_loss': 20.995946884155273, 'l0': 10.462000846862793, 'frac_alive': 0.0006385498563759029, 'frac_variance_explained': 0.9608203768730164, 'cossim': 0.9563363194465637, 'l2_ratio': 0.9557435512542725}}
Finished ../autoencoders/group-2024-05-07/GatedSAETrainer-chess-alpha1.4125375747680664/
Nonzero classifiers per feature per threshold: tensor([1233, 1232, 1217, 1194, 1156, 1101, 1008,  859,  582,  207,    4],
       device='cuda:6')
Total classified squares per feature per threshold: tensor([19218, 19235, 19212, 19021, 18779, 17957, 17016, 15091, 11725,  5771,
          237], device='cuda:6')
Out of 5491 features, 1233 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 15.586374282836914, max count: 64






Aggregating statistics: 100%|██████████| 10/10 [00:20<00:00,  2.09s/it][A[A[A[A


Aggregating statistics:  90%|█████████ | 9/10 [00:10<00:01,  1.18s/it][A[A

Testing board reconstruction



Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A

Aggregating statistics: 100%|██████████| 10/10 [00:11<00:00,  1.13s/it][A[A
Aggregating statistics:  90%|█████████ | 9/10 [00:31<00:03,  3.49s/it]

Nonzero classifiers per feature per threshold: tensor([1567, 1562, 1535, 1503, 1447, 1353, 1215,  986,  615,  185,    5],
       device='cuda:5')
Total classified squares per feature per threshold: tensor([22558, 22537, 22281, 22241, 21901, 21031, 19315, 16927, 12310,  5615,
          320], device='cuda:5')
Out of 7152 features, 1567 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 14.395660400390625, max count: 64
Testing board reconstruction



Aggregating statistics:  10%|█         | 1/10 [00:01<00:12,  1.35s/it][A

Nonzero classifiers per feature per threshold: tensor([972, 971, 960, 944, 929, 897, 848, 731, 534, 201,   2],
       device='cuda:7')
Total classified squares per feature per threshold: tensor([15125, 15216, 15131, 15016, 15203, 15068, 14676, 13231, 10932,  5751,
          128], device='cuda:7')
Out of 3961 features, 972 were classifiers.
The following are counts of squares classified per classifier per feature:
Min count: 1, average count: 15.560699462890625, max count: 64
Testing board reconstruction




Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A






Aggregating statistics:   0%|          | 0/10 [00:00<?, ?it/s][A[A[A


Aggregating statistics:  10%|█         | 1/10 [00:00<00:07,  1.22it/s][A[A[A
Aggregating statistics: 100%|██████████| 10/10 [00:34<00:00,  3.47s/it][A


  df = pd.concat([df, new_row_df], ignore_index=True)
  df = pd.concat([df, new_row_df], ignore_index=True)


{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 1.6193091869354248, 'l1_loss': 20.3377628326416, 'l0': 18.822999954223633, 'frac_alive': 0.0011488647432997823, 'frac_variance_explained': 0.9788535237312317, 'cossim': 0.9749823212623596, 'l2_ratio': 0.9709648489952087}}
Finished ../autoencoders/group-2024-05-07/PAnnealTrainer-chess-alpha0.03981072083115578L_p^p/





Aggregating statistics:  20%|██        | 2/10 [00:03<00:14,  1.76s/it][A[A[A
Aggregating statistics:  30%|███       | 3/10 [00:06<00:15,  2.26s/it][A

Aggregating statistics:  20%|██        | 2/10 [00:04<00:19,  2.39s/it][A[A


Aggregating statistics:  30%|███       | 3/10 [00:04<00:11,  1.68s/it][A[A[A
Aggregating statistics:  40%|████      | 4/10 [00:08<00:13,  2.20s/it][A


Aggregating statistics:  40%|████      | 4/10 [00:06<00:09,  1.65s/it][A[A[A

Aggregating statistics:  30%|███       | 3/10 [00:07<00:17,  2.51s/it][A[A
Aggregating statistics:  50%|█████     | 5/10 [00:10<00:10,  2.16s/it][A


Aggregating statistics:  50%|█████     | 5/10 [00:08<00:08,  1.62s/it][A[A[A

Aggregating statistics:  40%|████      | 4/10 [00:10<00:15,  2.57s/it][A[A


Aggregating statistics:  60%|██████    | 6/10 [00:09<00:06,  1.66s/it][A[A[A
Aggregating statistics:  60%|██████    | 6/10 [00:12<00:08,  2.19s/it][A


Aggregating statistics:  70%|███████   | 7/10 [00:11<00:0

{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 2.5791163444519043, 'l1_loss': 20.726863861083984, 'l0': 5.12000036239624, 'frac_alive': 0.00031250002211891115, 'frac_variance_explained': 0.9461272358894348, 'cossim': 0.9302018284797668, 'l2_ratio': 0.9287645220756531}}
Finished ../autoencoders/group-2024-05-07/GatedAnnealTrainer-chess-alpha0.7943282127380371L_p^p/




Aggregating statistics:  70%|███████   | 7/10 [00:18<00:07,  2.65s/it][A[A
Aggregating statistics: 100%|██████████| 10/10 [00:21<00:00,  2.11s/it][A


{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 2.1893229484558105, 'l1_loss': 23.305150985717773, 'l0': 7.521000385284424, 'frac_alive': 0.000459045433672145, 'frac_variance_explained': 0.9575729966163635, 'cossim': 0.950599193572998, 'l2_ratio': 0.9501383304595947}}
Finished ../autoencoders/group-2024-05-07/GatedAnnealTrainer-chess-alpha0.5623413324356079L_p^p/


  df = pd.concat([df, new_row_df], ignore_index=True)


Aggregating statistics:  80%|████████  | 8/10 [00:21<00:05,  2.91s/it][A[A

Aggregating statistics:  90%|█████████ | 9/10 [00:24<00:02,  2.83s/it][A[A

Aggregating statistics: 100%|██████████| 10/10 [00:27<00:00,  2.71s/it][A[A


{'hyperparameters': {'n_inputs': 1000, 'context_length': 256}, 'eval_results': {'l2_loss': 1.9210543632507324, 'l1_loss': 26.373027801513672, 'l0': 11.141000747680664, 'frac_alive': 0.0006799927214160562, 'frac_variance_explained': 0.9678363800048828, 'cossim': 0.9638010859489441, 'l2_ratio': 0.9648488759994507}}
Finished ../autoencoders/group-2024-05-07/GatedAnnealTrainer-chess-alpha0.3981071710586548L_p^p/


  df = pd.concat([df, new_row_df], ignore_index=True)


In [7]:
# TODO: merge results.csv from all of the folders
df = pd.concat(dfs, axis=0, ignore_index=True)
df

  df = pd.concat(dfs, axis=0, ignore_index=True)


Unnamed: 0,autoencoder_group_path,autoencoder_path,reconstruction_file,eval_results_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,...,zero_num_true_positive_squares,best_num_true_positive_squares,zero_num_false_positive_squares,best_num_false_positive_squares,zero_percent_active_classifiers,best_percent_active_classifiers,zero_classifiers_per_token,best_classifiers_per_token,zero_classified_per_token,best_classified_per_token
0,../autoencoders/group-2024-05-07/,../autoencoders/group-2024-05-07/StandardTrain...,../autoencoders/group-2024-05-07/StandardTrain...,1000,49.828003,16.864666,1.627392,0.003041,0.977771,0.974109,...,653280,0,34524,0,0.0,0.0,0.0,0.0,0.0,0.0
1,../autoencoders/group-2024-05-07/,../autoencoders/group-2024-05-07/StandardTrain...,../autoencoders/group-2024-05-07/StandardTrain...,1000,49.828003,16.864666,1.627392,0.003041,0.977771,0.974109,...,14,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,../autoencoders/group-2024-05-07/,../autoencoders/group-2024-05-07/StandardTrain...,../autoencoders/group-2024-05-07/StandardTrain...,1000,37.215,15.70408,1.813791,0.002271,0.972351,0.967721,...,609570,0,30270,0,0.0,0.0,0.0,0.0,0.0,0.0
3,../autoencoders/group-2024-05-07/,../autoencoders/group-2024-05-07/StandardTrain...,../autoencoders/group-2024-05-07/StandardTrain...,1000,37.215,15.70408,1.813791,0.002271,0.972351,0.967721,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
4,../autoencoders/group-2024-05-07/,../autoencoders/group-2024-05-07/StandardTrain...,../autoencoders/group-2024-05-07/StandardTrain...,1000,27.304001,14.537116,2.022861,0.001667,0.965702,0.959808,...,587846,0,27072,0,0.0,0.0,0.0,0.0,0.0,0.0
5,../autoencoders/group-2024-05-07/,../autoencoders/group-2024-05-07/StandardTrain...,../autoencoders/group-2024-05-07/StandardTrain...,1000,27.304001,14.537116,2.022861,0.001667,0.965702,0.959808,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
6,../autoencoders/group-2024-05-07/,../autoencoders/group-2024-05-07/PAnnealTraine...,../autoencoders/group-2024-05-07/PAnnealTraine...,1000,18.823,20.337763,1.619309,0.001149,0.978854,0.974982,...,816315,0,32057,0,0.0,0.0,0.0,0.0,0.0,0.0
7,../autoencoders/group-2024-05-07/,../autoencoders/group-2024-05-07/PAnnealTraine...,../autoencoders/group-2024-05-07/PAnnealTraine...,1000,18.823,20.337763,1.619309,0.001149,0.978854,0.974982,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
8,../autoencoders/group-2024-05-07/,../autoencoders/group-2024-05-07/PAnnealTraine...,../autoencoders/group-2024-05-07/PAnnealTraine...,1000,16.142,19.322727,1.699897,0.000985,0.976602,0.972113,...,812246,867426,29864,24811,0.0,0.0,0.0,0.0,0.0,0.0
9,../autoencoders/group-2024-05-07/,../autoencoders/group-2024-05-07/PAnnealTraine...,../autoencoders/group-2024-05-07/PAnnealTraine...,1000,16.142,19.322727,1.699897,0.000985,0.976602,0.972113,...,51,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


Example of gathering top k contexts

In [None]:
import torch
import circuits.chess_interp as chess_interp
importlib.reload(chess_interp)

torch.set_grad_enabled(False)

autoencoder_group_path = autoencoder_group_paths[0]

othello = eval_sae.check_if_autoencoder_is_othello(autoencoder_group_path)

indexing_functions = eval_sae.get_recommended_indexing_functions(othello)
indexing_function = indexing_functions[0]

custom_functions = eval_sae.get_recommended_custom_functions(othello)

model_name = eval_sae.get_model_name(othello)

device = RESOURCE_STACK.pop()
print("Constructing evaluation dataset")
data = eval_sae.construct_dataset(othello, custom_functions, dataset_size, device, models_path=model_path)


dataset_size = dataset_size * 2  # x2 to make sure we have enough data for loss_recovered()

data, ae_bundle, pgn_strings, encoded_inputs = eval_sae.prep_firing_rate_data(
    autoencoder_path, dataset_size, model_path, model_name, data, device, dataset_size, othello
)

dims = torch.tensor([10], device=device)
chess_interp.examine_dimension_chess(ae_bundle, 100, dims)

RESOURCE_STACK.append(device)
del device

In [None]:
RESOURCE_STACK