# Gelu-2L

In [2]:
import torch
import os 

from sae_training.config import LanguageModelSAERunnerConfig
from sae_training.lm_runner import language_model_sae_runner

import cProfile


os.environ["TOKENIZERS_PARALLELISM"] = "false"
cfg = LanguageModelSAERunnerConfig(

    # Data Generating Function (Model + Training Distibuion)
    model_name = "gelu-2l",
    hook_point = "blocks.0.hook_mlp_out",
    hook_point_layer = 0,
    d_in = 512,
    dataset_path = "NeelNanda/c4-tokenized-2b",
    is_dataset_tokenized=True,
    
    # SAE Parameters
    expansion_factor = 32,
    
    # Training Parameters
    lr = 1e-4,
    l1_coefficient = 3e-4,
    train_batch_size = 4096,
    context_size = 128,
    
    # Activation Store Parameters
    n_batches_in_buffer = 128,
    total_training_tokens = 1_000_000 * 500, 
    store_batch_size = 32,
    
    # Resampling protocol
    feature_sampling_method = 'l2',
    feature_sampling_window = 2500,
    feature_reinit_scale = 0.2,
    dead_feature_window=1250,
    dead_feature_threshold = 1e-8,
    
    # WANDB
    log_to_wandb = True,
    wandb_project= "mats_sae_training_language_models_gelu_2l",
    wandb_entity = None,
    wandb_log_frequency=10,
    
    # Misc
    device = "mps",
    seed = 42,
    n_checkpoints = 10,
    checkpoint_path = "checkpoints",
    dtype = torch.float32,
    )


sparse_autoencoder = language_model_sae_runner(cfg)


n_tokens_per_buffer (millions): 0.131072
Lower bound: n_contexts_per_buffer (millions): 0.001024
Total training steps: 2441
n_dead_feature_samples: 0


KeyboardInterrupt: 

# GPT2 - Small

In [None]:
import torch
import os 
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB__SERVICE_WAIT"] = "300"

from sae_training.config import LanguageModelSAERunnerConfig
from sae_training.lm_runner import language_model_sae_runner

cfg = LanguageModelSAERunnerConfig(

    # Data Generating Function (Model + Training Distibuion)
    model_name = "gpt2-small",
    hook_point = "blocks.10.hook_resid_pre",
    hook_point_layer = 11,
    d_in = 768,
    dataset_path = "Skylion007/openwebtext",
    is_dataset_tokenized=False,
    
    # SAE Parameters
    expansion_factor = 64, # determines the dimension of the SAE.
    
    # Training Parameters
    lr = 1e-5,
    l1_coefficient = 5e-4,
    lr_scheduler_name=None,
    train_batch_size = 4096,
    context_size = 128,
    
    # Activation Store Parameters
    n_batches_in_buffer = 128,
    total_training_tokens = 1_000_000 * 200, # 200M tokens seems doable overnight.
    store_batch_size = 32,
    
    # Resampling protocol
    feature_sampling_method = 'l2',
    feature_sampling_window = 1000,
    feature_reinit_scale = 0.2,
    dead_feature_window=5000,
    dead_feature_threshold = 1e-7,
    
    # WANDB
    log_to_wandb = True,
    wandb_project= "mats_sae_training_gpt2_small",
    wandb_entity = None,
    wandb_log_frequency=50,
    
    # Misc
    device = "mps",
    seed = 42,
    n_checkpoints = 5,
    checkpoint_path = "checkpoints",
    dtype = torch.float32,
    )

sparse_autoencoder = language_model_sae_runner(cfg)


# GPT2-Small Hook Q

In [None]:
import torch
import os 
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB__SERVICE_WAIT"] = "300"

from sae_training.config import LanguageModelSAERunnerConfig
from sae_training.lm_runner import language_model_sae_runner

cfg = LanguageModelSAERunnerConfig(

    # Data Generating Function (Model + Training Distibuion)
    model_name = "gpt2-small",
    hook_point = "blocks.10.attn.hook_q",
    hook_point_layer = 11,
    hook_point_head_index=7,
    d_in = 64,
    dataset_path = "Skylion007/openwebtext",
    is_dataset_tokenized=False,
    
    # SAE Parameters
    expansion_factor = 64, # determines the dimension of the SAE.
    
    # Training Parameters
    lr = 1e-3,
    l1_coefficient = 0.001,
    lr_scheduler_name=None,
    train_batch_size = 4096,
    context_size = 128,
    
    # Activation Store Parameters
    n_batches_in_buffer = 512,
    total_training_tokens = 1_000_000 * 3, # 200M tokens seems doable overnight.
    store_batch_size = 32,
    
    # Resampling protocol
    feature_sampling_method = 'l2',
    feature_sampling_window = 1000,
    feature_reinit_scale = 0.2,
    dead_feature_window=1000,
    dead_feature_threshold = 1e-7,
    
    # WANDB
    log_to_wandb = True,
    wandb_project= "mats_sae_training_gpt2_small_hook_q",
    wandb_entity = None,
    wandb_log_frequency=50,
    
    # Misc
    device = "mps",
    seed = 42,
    n_checkpoints = 5,
    checkpoint_path = "checkpoints",
    dtype = torch.float32,
    )

sparse_autoencoder = language_model_sae_runner(cfg)


In [None]:
sparse_autoencoder.save_model("./overnight_sae_resid_pre_10_gpt_2_small.pt")

# Pythia 70-M

In [None]:
import torch
import os 

from sae_training.config import LanguageModelSAERunnerConfig
from sae_training.lm_runner import language_model_sae_runner

import cProfile


os.environ["TOKENIZERS_PARALLELISM"] = "false"
cfg = LanguageModelSAERunnerConfig(

    # Data Generating Function (Model + Training Distibuion)
    model_name = "pythia-70m",
    hook_point = "blocks.0.hook_mlp_out",
    hook_point_layer = 0,
    d_in = 512,
    dataset_path = "EleutherAI/the_pile_deduplicated",
    is_dataset_tokenized=False,
    
    # SAE Parameters
    expansion_factor = 16,
    
    # Training Parameters
    lr = 3e-4,
    l1_coefficient = 1e-3,
    train_batch_size = 4096,
    context_size = 128,
    
    # Activation Store Parameters
    n_batches_in_buffer = 64,
    total_training_tokens = 1_000_000 * 5, 
    store_batch_size = 32,
    
    # Resampling protocol
    feature_sampling_method = 'l2',
    feature_sampling_window = 2500, # Doesn't currently matter.
    feature_reinit_scale = 0.2,
    dead_feature_window=1250,
    dead_feature_threshold = 1e-8,
    
    # WANDB
    log_to_wandb = True,
    wandb_project= "mats_sae_training_language_benchmark_tests",
    wandb_entity = None,
    wandb_log_frequency=10,
    
    # Misc
    device = "mps",
    seed = 42,
    n_checkpoints = 0,
    checkpoint_path = "checkpoints",
    dtype = torch.float32,
    )

def main():
    sparse_autoencoder = language_model_sae_runner(cfg)

main()
# os.environ["TOKENIZERS_PARALLELISM"] = "false"
# import cProfile, pstats, io
# from pstats import SortKey
# pr = cProfile.Profile()
# pr.enable()
# # ... do something ...
# main()
# pr.disable()
# s = io.StringIO()
# sortby = SortKey.CUMULATIVE
# ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
# ps.print_stats()
# print(s.getvalue())


# Tiny Stories

In [None]:
import torch
import os 

from sae_training.config import LanguageModelSAERunnerConfig
from sae_training.lm_runner import language_model_sae_runner



os.environ["TOKENIZERS_PARALLELISM"] = "false"
cfg = LanguageModelSAERunnerConfig(

    # Data Generating Function (Model + Training Distibuion)
    model_name = "tiny-stories-2L-33M",
    hook_point = "blocks.1.mlp.hook_post",
    hook_point_layer = 1,
    d_in = 4096,
    dataset_path = "roneneldan/TinyStories",
    is_dataset_tokenized=False,
    
    # SAE Parameters
    expansion_factor = 4,
    
    # Training Parameters
    lr = 1e-4,
    l1_coefficient = 3e-4,
    train_batch_size = 4096,
    context_size = 128,
    
    # Activation Store Parameters
    n_batches_in_buffer = 128,
    total_training_tokens = 1_000_000 * 10, # want 500M eventually.
    store_batch_size = 32,
    
    # Resampling protocol
    feature_sampling_method = 'l2',
    feature_sampling_window = 2500, # Doesn't currently matter.
    feature_reinit_scale = 0.2,
    dead_feature_window=1250,
    dead_feature_threshold = 0.0005,
    
    # WANDB
    log_to_wandb = True,
    wandb_project= "mats_sae_training_language_benchmark_tests",
    wandb_entity = None,
    wandb_log_frequency=10,
    
    # Misc
    device = "mps",
    seed = 42,
    n_checkpoints = 0,
    checkpoint_path = "checkpoints",
    dtype = torch.float32,
    )

sparse_autoencoder = language_model_sae_runner(cfg)


# Toy Model

In [None]:

from sae_training.toy_model_runner import SAEToyModelRunnerConfig, toy_model_sae_runner


cfg = SAEToyModelRunnerConfig(
    
    # Model Details
    n_features=200,
    n_hidden=5,
    n_correlated_pairs=0,
    n_anticorrelated_pairs=0,
    feature_probability=0.025,
    model_training_steps=10_000,
    
    # SAE Parameters
    d_sae=240,
    l1_coefficient=0.001,
    
    # SAE Train Config
    train_batch_size=1028,
    feature_sampling_window=3_000,
    dead_feature_window=1_000,
    feature_reinit_scale=0.5,
    total_training_tokens=4096*300,
    
    # Other parameters
    log_to_wandb=True,
    wandb_project="sae-training-test",
    wandb_log_frequency=5,
    device="mps",
)

trained_sae = toy_model_sae_runner(cfg)

assert trained_sae is not None


# Run caching of activations to disk

In [1]:
import torch
import os 
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB__SERVICE_WAIT"] = "300"

from sae_training.config import CacheActivationsRunnerConfig
from sae_training.cache_activations_runner import cache_activations_runner

cfg = CacheActivationsRunnerConfig(

    # Data Generating Function (Model + Training Distibuion)
    model_name = "gpt2-small",
    hook_point = "blocks.10.hook_resid_pre",
    hook_point_layer = 11,
    d_in = 768,
    dataset_path = "Skylion007/openwebtext",
    is_dataset_tokenized=False,
    
    # Activation Store Parameters
    n_batches_in_buffer = 16,
    total_training_tokens = 200_000, 
    store_batch_size = 32,

    # Activation caching shuffle parameters
    n_shuffles_final = 16,
    
    # Misc
    device = "mps",
    seed = 42,
    dtype = torch.float32,
    )

cache_activations_runner(cfg)


Loaded pretrained model gpt2-small into HookedTransformer
Moving model to device:  mps
Dataset is not tokenized! Updating config.
Started caching 200000 activations


Caching activations: 100%|██████████| 4/4 [00:41<00:00, 10.39s/it]
Final shuffling: 100%|██████████| 16/16 [00:41<00:00,  2.61s/it]


## Train an SAE using the cached activations stored on disk
Pass `use_cached_activations=True` into the config

In [2]:
import torch
import os 
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB__SERVICE_WAIT"] = "300"
from sae_training.config import LanguageModelSAERunnerConfig
from sae_training.lm_runner import language_model_sae_runner

cfg = LanguageModelSAERunnerConfig(

    # Data Generating Function (Model + Training Distibuion)
    model_name = "gpt2-small",
    hook_point = "blocks.10.hook_resid_pre",
    hook_point_layer = 11,
    d_in = 768,
    dataset_path = "Skylion007/openwebtext",
    is_dataset_tokenized=False,
    use_cached_activations=True,
    
    # SAE Parameters
    expansion_factor = 64, # determines the dimension of the SAE.
    
    # Training Parameters
    lr = 1e-5,
    l1_coefficient = 5e-4,
    lr_scheduler_name=None,
    train_batch_size = 4096,
    context_size = 128,
    
    # Activation Store Parameters
    n_batches_in_buffer = 64,
    total_training_tokens = 200_000,
    store_batch_size = 32,
    
    # Resampling protocol
    feature_sampling_method = 'l2',
    feature_sampling_window = 1000,
    feature_reinit_scale = 0.2,
    dead_feature_window=5000,
    dead_feature_threshold = 1e-7,
    
    # WANDB
    log_to_wandb = True,
    wandb_project= "mats_sae_training_gpt2_small",
    wandb_entity = None,
    wandb_log_frequency=50,
    
    # Misc
    device = "mps",
    seed = 42,
    n_checkpoints = 5,
    checkpoint_path = "checkpoints",
    dtype = torch.float32,
    )

sparse_autoencoder = language_model_sae_runner(cfg)


n_tokens_per_buffer (millions): 0.262144
Lower bound: n_contexts_per_buffer (millions): 0.002048
Total training steps: 48
n_dead_feature_samples: -1
Loaded pretrained model gpt2-small into HookedTransformer
Moving model to device:  mps
Dataset is not tokenized! Updating config.


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlucyfarnik[0m. Use [1m`wandb login --relogin`[0m to force relogin


  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
9| MSE Loss 109954.977 | L0 24386.133:  20%|██        | 40960/200000 [04:14<14:33, 182.05it/s]

Saved model to checkpoints/vli8fhwa/40960_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_49152.pkl.gz


19| MSE Loss 74495.047 | L0 24158.695:  41%|████      | 81920/200000 [05:27<02:52, 683.70it/s] 

Saved model to checkpoints/vli8fhwa/81920_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_49152.pkl.gz


29| MSE Loss 103482.188 | L0 23886.371:  61%|██████▏   | 122880/200000 [08:00<06:17, 204.06it/s]

Saved model to checkpoints/vli8fhwa/122880_sparse_autoencoder_gpt2-small_blocks.10.hook_resid_pre_49152.pkl.gz


31| MSE Loss 43942.613 | L0 23830.605:  66%|██████▌   | 131072/200000 [08:27<06:01, 190.91it/s] 



Expected to have 131072 activations, but only found 0.
This might just be a rounding error — your batch_size * n_batches_in_buffer * context_size is not divisible by your total_training_tokens
Returning a buffer of size 0 instead.





36| MSE Loss 29693.375 | L0 23692.150:  76%|███████▌  | 151552/200000 [10:41<04:20, 185.97it/s]

KeyboardInterrupt: 

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x293bdabc0>> (for post_run_cell), with arguments args (<ExecutionResult object at 13f5a3040, execution_count=2 error_before_exec=None error_in_exec= info=<ExecutionInfo object at 293c119f0, raw_cell="import torch
import os 
os.environ["TOKENIZERS_PAR.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell:/Users/tz20913/Library/Mobile%20Documents/com~apple~CloudDocs/Desktop/Research/MATS_Neel/mats_sae_training/run.ipynb#X22sZmlsZQ%3D%3D> result=None>,),kwargs {}:


TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given