# Generating Outputs for Neuronpedia Upload

We use Callum McDougall's `sae_vis` library for generating JSON data to upload to Neuronpedia.


## Set Up

In [12]:
# from huggingface_hub import hf_hub_download

# MODEL = "gpt2-small"
# LAYER = 0
# SOURCE = "res-jb"
# REPO_ID = "jbloom/GPT2-Small-SAEs"
# FILENAME = f"final_sparse_autoencoder_gpt2-small_blocks.{LAYER}.hook_resid_pre_24576.pt"
# SAE_PATH = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)

MODEL = "pythia-70m-deduped"
LAYER = 0
SOURCE = "res-sm"
SAE_PATH = "../data/res-sm/sae_" + str(LAYER) + "_resid.pt"
FEATURE_SPARSITY_PATH = "../data/res-sm/feature_sparsity_" + str(LAYER) + "_resid.pt"

## Save JSON to neuronpedia_outputs

In [13]:
from sae_analysis.neuronpedia_runner import NeuronpediaRunner

NP_OUTPUT_FOLDER = "../neuronpedia_outputs"

runner = NeuronpediaRunner(
    sae_path=SAE_PATH,
    feature_sparsity_path=FEATURE_SPARSITY_PATH,
    neuronpedia_parent_folder=NP_OUTPUT_FOLDER,
    init_session=True,
    n_batches_to_sample_from=2 ** 12,
    n_prompts_to_select=4096 * 6,
    n_features_at_a_time=128,
    buffer_tokens=8,
)
runner.run()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config_tokenizer_class: GPTNeoXTokenizer
tokenizer_auto_map: None
tokenizer_class_candidate fast
tokenizer_class <class 'transformers.models.gpt_neox.tokenization_gpt_neox_fast.GPTNeoXTokenizerFast'>
pretrained_model_name_or_path EleutherAI/pythia-70m-deduped
vocab filenames {'vocab_file': 'vocab.json', 'merges_file': 'merges.txt', 'tokenizer_file': 'tokenizer.json'}
vocab.json
merges.txt
tokenizer.json
added_tokens.json
special_tokens_map.json
tokenizer_config.json
Loaded pretrained model pythia-70m-deduped into HookedTransformer
Moving model to device:  mps


Resolving data files:   0%|          | 0/1650 [00:00<?, ?it/s]

Run name: 32768-L1-0.001-LR-0.0003-Tokens-2.000e+06
n_tokens_per_buffer (millions): 0.04096
Lower bound: n_contexts_per_buffer (millions): 0.00032
Total training steps: 488
Total wandb updates: 48
n_tokens_per_feature_sampling_window (millions): 1048.576
n_tokens_per_dead_feature_window (millions): 524.288
We will reset the sparsity calculation 0 times.
Number tokens in sparsity calculation window: 8.19e+06
Run name: 32768-L1-0.001-LR-0.0003-Tokens-2.000e+06
n_tokens_per_buffer (millions): 0.04096
Lower bound: n_contexts_per_buffer (millions): 0.00032
Total training steps: 488
Total wandb updates: 48
n_tokens_per_feature_sampling_window (millions): 1048.576
n_tokens_per_dead_feature_window (millions): 524.288
We will reset the sparsity calculation 0 times.
Number tokens in sparsity calculation window: 8.19e+06
Total features to run: 19578
Total skipped: 13190
Total batches: 153
Hook Point Layer: 0
Hook Point: blocks.0.hook_resid_post
Writing files to: ../neuronpedia_outputs/pythia-70m-

100%|██████████| 4096/4096 [03:25<00:00, 19.90it/s]


Time to get tokens: 209.5396511554718


  0%|          | 0/153 [00:00<?, ?it/s]

Doing batch: 0


Forward passes to gather data:   0%|          | 0/384 [00:00<?, ?it/s]

Getting sequence data:   0%|          | 0/128 [00:00<?, ?it/s]

## Upload to Neuronpedia
#### This currently only works if you have admin access to the Neuronpedia database via localhost.

In [None]:
# Helpers that fix weird NaN stuff
from decimal import Decimal
from typing import Any
import math
import json
import os
import requests

folder_path = runner.neuronpedia_folder

def nanToNeg999(obj: Any) -> Any:
    if isinstance(obj, dict):
        return {k: nanToNeg999(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [nanToNeg999(v) for v in obj]
    elif (isinstance(obj, float) or isinstance(obj, Decimal)) and math.isnan(obj):
        return -999
    return obj


class NanConverter(json.JSONEncoder):
    def encode(self, o: Any, *args: Any, **kwargs: Any):
        return super().encode(nanToNeg999(o), *args, **kwargs)

# Server info
host = "http://localhost:3000"
sourceName = str(LAYER) + "-" + SOURCE

# Upload alive features
for file_name in os.listdir(folder_path):
    if file_name.startswith("batch-") and file_name.endswith(".json"):
        print("Uploading file: " + file_name)
        file_path = os.path.join(folder_path, file_name)
        f = open(file_path, "r")
        data = json.load(f)

        # Replace NaNs
        data_fixed = json.dumps(data, cls=NanConverter)
        data = json.loads(data_fixed)

        url = host + "/api/internal/upload-features"
        resp = requests.post(
            url,
            json={
                "modelId": MODEL,
                "layer": sourceName,
                "features": data,
            },
        )

# Upload dead features (just makes blanks features)
# We want this for completeness
# skipped_path = os.path.join(folder_path, "skipped_indexes.json")
# f = open(skipped_path, "r")
# data = json.load(f)
# skipped_indexes = data["skipped_indexes"]
# url = host + "/api/internal/upload-dead-features"
# resp = requests.post(
#     url,
#     json={
#         "modelId": MODEL,
#         "layer": sourceName,
#         "deadIndexes": skipped_indexes,
#     },
# )

### TODO: Automatically validate the uploaded data