In [None]:
import torch
import pickle
import einops
import importlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from matplotlib.colors import Normalize

import circuits.analysis as analysis
import circuits.eval_sae_as_classifier as eval_sae
import circuits.chess_utils as chess_utils
import circuits.utils as utils

In [None]:
# import torch
# # for testing purposes

# # Define a sample 3D tensor with random values
# # Dimensions: T x F x C (let's use 2 x 3 x 4 for simplicity)
# f1_TFC = torch.randn(2, 3, 4)
# print("Original Tensor (T x F x C):")
# print(f1_TFC)

# def best_f1_average(f1_TFC: torch.Tensor) -> torch.Tensor:
#     # Apply torch.max along the last dimension (dimension 2)
#     # Select only the values, ignoring the indices
#     f1_TF, _ = torch.max(f1_TFC, dim=1)
#     return f1_TF

# # Compute the maximum along the 'C' dimension and reduce to a 2D tensor
# f1_TF = best_f1_average(f1_TFC)
# print("\nReduced Tensor (T x F) with max values from 'C':")
# print(f1_TF)

In [None]:
def mask_all_blanks(results: dict, device) -> dict:
    custom_functions = analysis.get_all_custom_functions(results)
    for function in custom_functions:
        function_name = function.__name__

        if function == chess_utils.board_to_piece_state or function == chess_utils.board_to_piece_color_state:
            on_TFRRC = results[function_name]['on']
            off_TFRRC = results[function_name]['off']
            results[function_name]['on'] = analysis.mask_initial_board_state(on_TFRRC, function, device)
            results[function_name]['off'] = analysis.mask_initial_board_state(off_TFRRC, function, device)

    return results

def best_f1_average(f1_TFRRC: torch.Tensor) -> torch.Tensor:
    f1_TRRC, _ = torch.max(f1_TFRRC, dim=1)

    T, R1, R2, C = f1_TRRC.shape

    max_possible = R1 * R2 * C

    f1_T = einops.reduce(f1_TRRC, 'T R1 R2 C -> T', 'sum') / max_possible

    return f1_T
    


In [None]:
importlib.reload(analysis)

device = "cuda"
autoencoder_group_paths = ["../autoencoders/chess_layer5_large_sweep/"]
autoencoder_group_path = autoencoder_group_paths[0]
folders = eval_sae.get_nested_folders(autoencoder_group_path)

custom_functions = []
custom_function_names = []

sae_results = {}

csv_results_file = "results.csv"
df = pd.read_csv(autoencoder_group_path + csv_results_file)

for autoencoder_path in folders:

    print(f"Processing {autoencoder_path}")

    assert autoencoder_path in df["autoencoder_path"].values, f"{autoencoder_path} not in csv file"

    sae_results[autoencoder_path] = {}


    filter = "1000" # This is only necessary if you have multiple files with multiple n_inputs
    # e.g. indexing_find_dots_indices_n_inputs_1000_results.pkl and indexing_find_dots_indices_n_inputs_5000_results.pkl
    # In this case, if you want to view the results for n_inputs = 1000, you would set filter = "1000"
    eval_filter = None

    results_filenames = analysis.get_all_results_file_names(autoencoder_path, filter)
    if len(results_filenames) > 1 or len(results_filenames) == 0:
        print(f"Skipping {autoencoder_path} because it has {len(results_filenames)} results files")
        print("This is most likely because there are results files from different n_inputs")
        continue
    results_filename = results_filenames[0]

    evals_filename = analysis.get_all_evals_file_names(autoencoder_path, eval_filter)

    if len(evals_filename) > 1 or len(evals_filename) == 0:
        print(f"Skipping {autoencoder_path} because it has {len(evals_filename)} evals files")
        print("This is most likely because there are evals files from different n_inputs")
        continue

    evals_filename = evals_filename[0]

    with open(autoencoder_path + results_filename, "rb") as f:
        results = pickle.load(f)

    with open(autoencoder_path + evals_filename, "rb") as f:
        evals = pickle.load(f)

    results = utils.to_device(results, device)
    evals = utils.to_device(evals, device)

    custom_functions = analysis.get_all_custom_functions(results)
    for function in custom_functions:
        function_name = function.__name__
        custom_function_names.append(function_name)
    
    results = analysis.add_off_tracker(results, custom_functions, device)
    results = mask_all_blanks(results, device)
    f1_dict_TFRRC = analysis.get_all_f1s(results, device)

    sae_results[autoencoder_path]["l0"] = evals["eval_results"]["l0"]
    sae_results[autoencoder_path]["frac_variance_explained"] = evals["eval_results"]["frac_variance_explained"]

    for func_name in custom_function_names:
        if func_name in sae_results:
            continue

        T = f1_dict_TFRRC[func_name].shape[0]
        f1_counter_T = torch.zeros(T, device=device)
        sae_results[func_name] = {"f1_counter": f1_counter_T}

    for func_name in f1_dict_TFRRC:
        config = chess_utils.config_lookup[func_name]
        custom_function = config.custom_board_state_function
        assert custom_function in custom_functions, f"Key {custom_function} not in custom_functions"
        f1_TFRRC = f1_dict_TFRRC[func_name]
        f1_T = best_f1_average(f1_TFRRC)
        sae_results[func_name]["f1_counter"] += f1_T

        sae_results[autoencoder_path][func_name] = f1_T

    # torch.cuda.empty_cache()


        


In [None]:
for func_name in custom_function_names:

    new_column_name = f"{func_name}_best_custom_metric"
    if new_column_name not in df.columns:
        df[new_column_name] = np.nan

    f1_counter_T = sae_results[func_name]["f1_counter"]
    best_idx = torch.argmax(f1_counter_T)

    for autoencoder_path in folders:
        f1_T = sae_results[autoencoder_path][func_name]
        best_f1 = f1_T[best_idx]
        df.loc[df["autoencoder_path"] == autoencoder_path, new_column_name] = best_f1.item()

In [None]:
df

In [None]:
# select only the numerical columns
numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns
numerical_data = df[numerical_columns]

# calculate the correlation matrix
correlation_matrix = numerical_data.corr()

# create a heatmap using plotly
fig = px.imshow(correlation_matrix, 
                labels=dict(x="Columns", y="Columns", color="Correlation"),
                x=correlation_matrix.columns,
                y=correlation_matrix.columns,
                color_continuous_scale='RdBu_r',
                zmin=-1, zmax=1)

# update the layout
fig.update_layout(
    title='Correlation Matrix',
    width=2000,
    height=2000
)

# display the plot
fig.show()

In [None]:
for col in df.columns:
    print(col)

In [None]:
# get unique trainer types
unique_trainers = df['trainer_class'].unique()

# create a dictionary mapping trainer types to marker shapes
trainer_markers = dict(zip(unique_trainers, ['o', 's', '^', 'D']))

# create the scatter plot
fig, ax = plt.subplots(figsize=(10, 6))

# create a normalize object for color scaling
color_column = 'board_to_can_capture_queen_best_custom_metric'
norm = Normalize(vmin=df[color_column].min(), vmax=df[color_column].max())

metric_1 = "l0"
metric_2 = "frac_variance_explained"

# plot data points for each trainer type separately
for trainer, marker in trainer_markers.items():
    trainer_data = df[df['trainer_class'].str.contains(trainer)]
    ax.scatter(trainer_data[metric_1], trainer_data[metric_2], c=trainer_data[color_column], cmap='viridis', marker=marker, s=100, label=trainer, norm=norm)

# add colorbar
cbar = fig.colorbar(ax.collections[0], ax=ax)
cbar.set_label(color_column)

# set labels and title
ax.set_xlabel(metric_1)
ax.set_ylabel(metric_2)
ax.set_title(f'{metric_1} vs. {metric_2}')

# addnd
ax.legend(title='Trainer Type', loc='upper right')

# # set x range
ax.set_xlim(0, 1000)
# ax.set_ylim(0.8, 1)

# display the plot
plt.show()