# Generating Outputs for Neuronpedia Upload

We use Callum McDougall's `sae_vis` library for generating JSON data to upload to Neuronpedia.


## Set Up

In [None]:

MODEL = "gpt2-small"
LAYER = 8
SOURCE = "res-jb"
REPO_ID = "jbloom/GPT2-Small-SAEs"
FILENAME = f"final_sparse_autoencoder_gpt2-small_blocks.{LAYER}.hook_resid_pre_24576.pt"

import torch
from huggingface_hub import hf_hub_download
from sae_vis.model_fns import AutoEncoder, AutoEncoderConfig
from sae_vis.utils_fns import get_device

device = get_device()
print(device)
torch.set_grad_enabled(False)

## Filter to non-dead features to save time

In [None]:
# TODO: Joseph
alive_indexes = [1, 99, 40, 5, 3, 999, 123, 345]

## Load SAE

In [None]:
path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)

obj = torch.load(path, map_location=device)
state_dict = obj["state_dict"]
assert set(state_dict.keys()) == {"W_enc", "b_enc", "W_dec", "b_dec"}

# WARNING: If running on Mac, you need to change line 59 in model_fns to: self.to("mps")
cfg = AutoEncoderConfig(
    d_in=obj["cfg"].d_in,
    dict_mult=obj["cfg"].expansion_factor,
)
gpt2_sae = AutoEncoder(cfg)
gpt2_sae.load_state_dict(state_dict)

## Save JSON to neuronpedia_outputs

In [None]:
from sae_analysis.neuronpedia_runner import NeuronpediaRunner

NP_OUTPUT_FOLDER = "../neuronpedia_outputs"

runner = NeuronpediaRunner(
    sae_path=path,
    neuronpedia_parent_folder=NP_OUTPUT_FOLDER,
    init_session=True,
    n_batches_to_sample_from=2 ** 12,
    n_prompts_to_select=4096 * 6,
    n_features_at_a_time=256,
    buffer_tokens=8,
    alive_indexes=alive_indexes,
)
runner.run()

## Upload to Neuronpedia
#### This currently only works if you have admin access to the Neuronpedia database via localhost.

In [None]:
# Helpers that fix weird NaN stuff
from decimal import Decimal
from typing import Any
import math
import json
import os
import requests

folder_path = runner.neuronpedia_folder

def nanToNeg999(obj: Any) -> Any:
    if isinstance(obj, dict):
        return {k: nanToNeg999(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [nanToNeg999(v) for v in obj]
    elif (isinstance(obj, float) or isinstance(obj, Decimal)) and math.isnan(obj):
        return -999
    return obj


class NanConverter(json.JSONEncoder):
    def encode(self, o: Any, *args: Any, **kwargs: Any):
        return super().encode(nanToNeg999(o), *args, **kwargs)

# Server info
host = "http://localhost:3000"
sourceName = str(LAYER) + "-" + SOURCE

# Upload alive features
for file_name in os.listdir(folder_path):
    if file_name.startswith("batch-") and file_name.endswith(".json"):
        print("Uploading file: " + file_name)
        file_path = os.path.join(folder_path, file_name)
        f = open(file_path, "r")
        data = json.load(f)

        # Replace NaNs
        data_fixed = json.dumps(data, cls=NanConverter)
        data = json.loads(data_fixed)

        url = host + "/api/internal/upload-features"
        resp = requests.post(
            url,
            json={
                "modelId": MODEL,
                "layer": sourceName,
                "features": data,
            },
        )

# Upload dead features (just makes blanks features)
# We want this for completeness
dead_path = os.path.join(folder_path, "dead.json")
f = open(dead_path, "r")
data = json.load(f)
dead_indexes = data["dead_indexes"]
url = host + "/api/internal/upload-dead-features"
resp = requests.post(
    url,
    json={
        "modelId": MODEL,
        "layer": sourceName,
        "deadIndexes": dead_indexes,
    },
)

### TODO: Automatically validate the uploaded data