In [1]:
# Example usage
import json
import torch
from pathlib import Path
from sae_dashboard.neuronpedia.vector_set import VectorSet


# Load vector from file. Note that the vectors should be stored in this format, as a list of lists of floats:
# {
#     "vectors": [
#         [vector_1],
#         [vector_2],
#         ...
#     ]
# }
json_path = Path("test_vectors/logistic_direction.json")

# Load the vector into a VectorSet
vector_set = VectorSet.from_json(
    json_path=json_path,
    d_model=768,  # Example dimension for GPT-2 Small
    hook_point="blocks.7.hook_resid_pre",
    hook_layer=7,
    model_name="gpt2",
    names=["sentiment_vector"],  # Optional custom name
)

# Now you can use the vector set
print(f"Number of vectors: {vector_set.vectors.shape[0]}")
print(f"Vector dimension: {vector_set.vectors.shape[1]}")
print(f"Vector names: {vector_set.names}")

Number of vectors: 1
Vector dimension: 768
Vector names: ['sentiment_vector']


In [3]:
# You can also save and load the vector set as a VectorSet object as opposed to a simple list of lists of floats
vector_set.save(Path("test_vectors/logistic_direction_vector_set.json"))
vector_set = VectorSet.load(Path("test_vectors/logistic_direction_vector_set.json"))

In [4]:
from sae_dashboard.neuronpedia.neuronpedia_vector_runner import (
    NeuronpediaVectorRunner,
    NeuronpediaVectorRunnerConfig,
)

cfg = NeuronpediaVectorRunnerConfig(
    outputs_dir="test_outputs/",
    huggingface_dataset_path="monology/pile-uncopyrighted",
    vector_dtype="float32",
    model_dtype="float32",
    # Small test settings
    n_prompts_total=16384,
    n_tokens_in_prompt=128,  # Shorter sequences
    n_prompts_in_forward_pass=256,
    n_vectors_at_a_time=1,
    use_wandb=False,  # Disable wandb for testing
)

In [5]:
runner = NeuronpediaVectorRunner(vector_set, cfg)
runner.run()

Device Count: 1
Using specified vector dtype: float32
SAE Device: mps
Model Device: mps
Model Num Devices: 1
Activation Store Device: mps
Dataset Path: monology/pile-uncopyrighted
Forward Pass size: 128
Total number of tokens: 2097152
Total number of contexts (prompts): 16384
Vector DType: float32
Model DType: float32




Loaded pretrained model gpt2 into HookedTransformer


Resolving data files:   0%|          | 0/30 [00:00<?, ?it/s]

Tokens don't exist, making them.


  0%|          | 0/2048 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (3180 > 1024). Running this sequence through the model will result in indexing errors
100%|██████████| 2048/2048 [00:18<00:00, 108.67it/s]
0it [00:00, ?it/s]



Forward passes to cache data for vis:   0%|          | 0/60 [00:00<?, ?it/s]

Extracting vis data from cached data:   0%|          | 0/1 [00:00<?, ?it/s]

  return torch.load(


feature_indices: [0]


1it [00:02,  2.65s/it]

Output written to test_outputs/gpt2_blocks.7.hook_resid_pre/batch-0.json



