## Setup

### Imports

In [1]:
import os
import json
import glob
import torch
import re
import einops
import pandas as pd
from functools import partial
from torch import Tensor
from torchtyping import TensorType as TT


import plotly.express as px

from utils.data_utils import generate_data_and_caches
from utils.data_processing import (
    load_edge_scores_into_dictionary,
)
from utils.visualization import plot_attention_heads, imshow_p
from utils.backup_analysis import (
    load_model,
    run_iteration,
    process_backup_results,
    get_past_nmhs_for_checkpoints,
    plot_top_heads
)

In [2]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f57a841f760>

### Functions

## Experiments

### Experiment Parameters

In [10]:
TASK = 'ioi'
PERFORMANCE_METRIC = 'logit_diff'
BASE_MODEL = "pythia-160m"
VARIANT = "EleutherAI/pythia-160m-data-seed3"
MODEL_SHORTNAME = BASE_MODEL if not VARIANT else VARIANT[11:]
CACHE = "model_cache"
IOI_DATASET_SIZE = 70
COPY_SCORE_THRESHOLD = 75.0

### Circuit Data

In [11]:
folder_path = f'results/graphs/{MODEL_SHORTNAME}/{TASK}'
df = load_edge_scores_into_dictionary(folder_path)

# filter everything before 1000 steps
df = df[df['checkpoint'] >= 1000]

df[['source', 'target']] = df['edge'].str.split('->', expand=True)
len(df['target'].unique())

Processing file 1/143: results/graphs/pythia-160m-data-seed3/ioi/57000.json
Processing file 2/143: results/graphs/pythia-160m-data-seed3/ioi/141000.json
Processing file 3/143: results/graphs/pythia-160m-data-seed3/ioi/95000.json
Processing file 4/143: results/graphs/pythia-160m-data-seed3/ioi/107000.json
Processing file 5/143: results/graphs/pythia-160m-data-seed3/ioi/34000.json
Processing file 6/143: results/graphs/pythia-160m-data-seed3/ioi/6000.json
Processing file 7/143: results/graphs/pythia-160m-data-seed3/ioi/37000.json
Processing file 8/143: results/graphs/pythia-160m-data-seed3/ioi/39000.json
Processing file 9/143: results/graphs/pythia-160m-data-seed3/ioi/104000.json
Processing file 10/143: results/graphs/pythia-160m-data-seed3/ioi/59000.json
Processing file 11/143: results/graphs/pythia-160m-data-seed3/ioi/67000.json
Processing file 12/143: results/graphs/pythia-160m-data-seed3/ioi/111000.json
Processing file 13/143: results/graphs/pythia-160m-data-seed3/ioi/76000.json
Proce

445

### Dataset Setup

In [12]:
initial_model = load_model(BASE_MODEL, VARIANT, 143000, CACHE, device)
size=70
ioi_dataset, abc_dataset = generate_data_and_caches(initial_model, size, verbose=True)

config.json:   0%|          | 0.00/598 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step143000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer


In [13]:
# imshow_p(
#     per_head_ablated_logit_diffs,
#     title="Headwise logit diff contribution, post NMH KO",
#     labels={"x": "Head", "y": "Layer", "color": "Logit diff attribution"},
#     #coloraxis=dict(colorbar_ticksuffix = "%"),
#     border=True,
#     width=600,
#     margin={"r": 100, "l": 100}
# )

### Run Experiment

In [14]:
experiment_metrics = dict()
# create folder
os.makedirs(f'results/backup/{MODEL_SHORTNAME}', exist_ok=True)

for checkpoint in range(4000, 144000, 1000):

    experiment_metrics = run_iteration(
        BASE_MODEL, VARIANT, df, checkpoint=checkpoint, dataset=ioi_dataset, experiment_metrics=experiment_metrics, 
        threshold=COPY_SCORE_THRESHOLD
    )
    experiment_metrics = process_backup_results(df, checkpoint, experiment_metrics)

    # save to file, using pytorch format
    torch.save(experiment_metrics, f'results/backup/{MODEL_SHORTNAME}/nmh_backup_metrics.pt')

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step4000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 4000:
Heads ablated:            [(8, 1)]
Original logit diff:      0.8401681781
Post ablation logit diff: 1.0291959047
Logit diff % change:      22.50%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step5000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 82.38095238095238%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 70.0%
Checkpoint 5000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      1.9407143593
Post ablation logit diff: 2.2735850811
Logit diff % change:      17.15%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step6000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.76190476190476%
Checkpoint 6000:
Heads ablated:            [(8, 1)]
Original logit diff:      2.3940787315
Post ablation logit diff: 2.6625077724
Logit diff % change:      11.21%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step7000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 73.80952380952381%
Checkpoint 7000:
Heads ablated:            [(8, 1)]
Original logit diff:      2.8361926079
Post ablation logit diff: 3.1126420498
Logit diff % change:      9.75%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step8000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 10.0%
Checkpoint 8000:
Heads ablated:            [(9, 4), (8, 2), (8, 1)]
Original logit diff:      2.8423736095
Post ablation logit diff: 3.1041798592
Logit diff % change:      9.21%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step9000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 42.38095238095238%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 96.66666666666667%
Checkpoint 9000:
Heads ablated:            [(8, 2), (8, 1), (6, 2)]
Original logit diff:      2.5634901524
Post ablation logit diff: 2.9622433186
Logit diff % change:      15.56%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step10000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 54.285714285714285%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 81.42857142857143%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 81.42857142857143%
Checkpoint 10000:
Heads ablated:            [(10, 7), (8, 1), (8, 10), (9, 4)]
Original logit diff:      3.1145403385
Post ablation logit diff: 3.4700305462
Logit diff % change:      11.41%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step11000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 46.19047619047619%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 79.04761904761905%
Checkpoint 11000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10), (9, 4)]
Original logit diff:      2.8407638073
Post ablation logit diff: 3.4531602859
Logit diff % change:      21.56%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step12000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 5.238095238095238%
Checkpoint 12000:
Heads ablated:            [(8, 1), (8, 2), (8, 10)]
Original logit diff:      2.4739444256
Post ablation logit diff: 3.2910175323
Logit diff % change:      33.03%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step13000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 21.904761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 5.714285714285714%
Checkpoint 13000:
Heads ablated:            [(8, 1), (8, 2), (8, 10), (9, 4)]
Original logit diff:      2.5679347515
Post ablation logit diff: 3.1047751904
Logit diff % change:      20.91%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step14000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 43.80952380952381%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 5.714285714285714%
Checkpoint 14000:
Heads ablated:            [(8, 10), (8, 2), (8, 1), (10, 7)]
Original logit diff:      2.7442243099
Post ablation logit diff: 3.1209573746
Logit diff % change:      13.73%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step15000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.57142857142858%
Checkpoint 15000:
Heads ablated:            [(8, 10), (8, 2), (8, 1), (10, 7)]
Original logit diff:      3.0464704037
Post ablation logit diff: 3.7810251713
Logit diff % change:      24.11%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step16000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 86.66666666666667%
Checkpoint 16000:
Heads ablated:            [(8, 1), (8, 10)]
Original logit diff:      3.0003478527
Post ablation logit diff: 3.6426327229
Logit diff % change:      21.41%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step17000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 23.809523809523807%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.47619047619048%
Checkpoint 17000:
Heads ablated:            [(10, 7), (8, 1)]
Original logit diff:      2.9728684425
Post ablation logit diff: 3.1847674847
Logit diff % change:      7.13%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step18000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 23.333333333333332%
Checkpoint 18000:
Heads ablated:            [(8, 1)]
Original logit diff:      3.1881437302
Post ablation logit diff: 3.3728911877
Logit diff % change:      5.79%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step19000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 41.42857142857143%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Checkpoint 19000:
Heads ablated:            [(8, 2), (8, 1), (10, 7)]
Original logit diff:      3.4113707542
Post ablation logit diff: 3.8865439892
Logit diff % change:      13.93%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step20000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 44.285714285714285%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 83.33333333333334%
Checkpoint 20000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10), (9, 4)]
Original logit diff:      3.2258758545
Post ablation logit diff: 3.7366254330
Logit diff % change:      15.83%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step21000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 26.190476190476193%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 42.38095238095238%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 92.38095238095238%
Checkpoint 21000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      3.1164991856
Post ablation logit diff: 3.7776720524
Logit diff % change:      21.22%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step22000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 93.80952380952381%
Checkpoint 22000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.7475376129
Post ablation logit diff: 3.9040491581
Logit diff % change:      4.18%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step23000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 28.57142857142857%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 23000:
Heads ablated:            [(10, 7), (8, 2)]
Original logit diff:      3.5139567852
Post ablation logit diff: 3.8596022129
Logit diff % change:      9.84%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step24000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 41.904761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 28.57142857142857%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Checkpoint 24000:
Heads ablated:            [(8, 10), (8, 2), (8, 1), (10, 7)]
Original logit diff:      3.5544216633
Post ablation logit diff: 4.1349320412
Logit diff % change:      16.33%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step25000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.952380952380953%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 43.333333333333336%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 25000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      3.6209757328
Post ablation logit diff: 4.1330761909
Logit diff % change:      14.14%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step26000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 34.285714285714285%
Checkpoint 26000:
Heads ablated:            [(8, 2), (8, 1), (10, 7)]
Original logit diff:      3.6167533398
Post ablation logit diff: 4.0316176414
Logit diff % change:      11.47%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step27000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 33.80952380952381%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Checkpoint 27000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      3.9796869755
Post ablation logit diff: 4.4565253258
Logit diff % change:      11.98%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step28000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 34.76190476190476%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 44.285714285714285%
Checkpoint 28000:
Heads ablated:            [(8, 10), (8, 2), (8, 1), (10, 7)]
Original logit diff:      3.7514591217
Post ablation logit diff: 4.2660951614
Logit diff % change:      13.72%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step29000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 33.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 29000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.3610277176
Post ablation logit diff: 3.7239105701
Logit diff % change:      10.80%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step30000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 34.76190476190476%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 97.14285714285714%
Checkpoint 30000:
Heads ablated:            [(8, 10), (8, 2), (8, 1), (10, 7)]
Original logit diff:      3.4386322498
Post ablation logit diff: 4.0678386688
Logit diff % change:      18.30%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step31000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 34.285714285714285%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 31000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.5734572411
Post ablation logit diff: 3.9339795113
Logit diff % change:      10.09%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step32000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 33.33333333333333%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 40.476190476190474%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 32000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.8031930923
Post ablation logit diff: 4.1642317772
Logit diff % change:      9.49%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step33000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 36.666666666666664%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.38095238095238%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 33000:
Heads ablated:            [(9, 4), (8, 10), (8, 2), (8, 1), (10, 7)]
Original logit diff:      3.9761745930
Post ablation logit diff: 4.0743989944
Logit diff % change:      2.47%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step34000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 34.285714285714285%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 34000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      4.4500985146
Post ablation logit diff: 4.6024250984
Logit diff % change:      3.42%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step35000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 35000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      4.0028758049
Post ablation logit diff: 4.4501914978
Logit diff % change:      11.17%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step36000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 41.904761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Checkpoint 36000:
Heads ablated:            [(9, 4), (8, 10), (8, 2), (10, 7)]
Original logit diff:      4.0934247971
Post ablation logit diff: 4.0724430084
Logit diff % change:      -0.51%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step37000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 33.33333333333333%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 34.285714285714285%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 37000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.9952275753
Post ablation logit diff: 4.0428805351
Logit diff % change:      1.19%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step38000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.71428571428572%
Checkpoint 38000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      4.3226270676
Post ablation logit diff: 4.5441956520
Logit diff % change:      5.13%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step39000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 34.285714285714285%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.38095238095238%
Checkpoint 39000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      4.2591438293
Post ablation logit diff: 4.2727317810
Logit diff % change:      0.32%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step40000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.8 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 58.5714285714

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step41000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.38095238095238%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 94.28571428571428%
Checkpoint 41000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      4.2436356544
Post ablation logit diff: 4.4349055290
Logit diff % change:      4.51%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step42000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 10.8 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 33.33333333333333%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 34.285714285714285%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 63.8095238095238%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 91.428571428571

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step43000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 35.714285714285715%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 43000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.7736327648
Post ablation logit diff: 3.9724695683
Logit diff % change:      5.27%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step44000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 32.38095238095238%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Checkpoint 44000:
Heads ablated:            [(8, 10), (8, 2), (8, 1), (10, 7)]
Original logit diff:      3.9635663033
Post ablation logit diff: 4.2971892357
Logit diff % change:      8.42%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step45000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.952380952380953%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 45000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.8541038036
Post ablation logit diff: 4.2500000000
Logit diff % change:      10.27%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step46000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 29.523809523809526%
Checkpoint 46000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      3.6657984257
Post ablation logit diff: 3.7562165260
Logit diff % change:      2.47%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step47000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 36.666666666666664%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 10.8 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 8.10 (sign=1) : Top 5

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step48000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 48000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      3.9826269150
Post ablation logit diff: 4.4064106941
Logit diff % change:      10.64%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step49000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Checkpoint 49000:
Heads ablated:            [(10, 7), (8, 2)]
Original logit diff:      4.0723476410
Post ablation logit diff: 4.1287608147
Logit diff % change:      1.39%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step50000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 33.80952380952381%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 9.523809523809524%
Checkpoint 50000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.9813957214
Post ablation logit diff: 4.4181709290
Logit diff % change:      10.97%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step51000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 51000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      4.2498250008
Post ablation logit diff: 4.5835623741
Logit diff % change:      7.85%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step52000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 35.23809523809524%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 40.476190476190474%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 96.66666666666667%
Checkpoint 52000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      3.9850504398
Post ablation logit diff: 4.1977314949
Logit diff % change:      5.34%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step53000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 34.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 40.0%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 8.571428571428571%
Checkpoint 53000:
Heads ablated:            [(8, 10), (8, 2), (8, 1), (10, 7)]
Original logit diff:      4.2921242714
Post ablation logit diff: 

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step54000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 37.142857142857146%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 37.142857142857146%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 84.

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step55000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 39.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 42.38095238095238%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 0.4761

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step56000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 41.904761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 40.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step57000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 81.42857142857143%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 42.857142857142854%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 42.38095238095238%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 0.9

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step58000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 49.047619047619044%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 81.9047619047619%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 42.857142857142854%

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step59000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 43.80952380952381%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 41.42857142857143%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step60000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 40.95238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step61000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 41.904761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 87.

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step62000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 47.61904761904761%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 48.095238095238095%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 73.33333333333333%
Checkpoint 62000:
Heads ablated:            [(8, 10), (8,

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step63000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 46.19047619047619%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 87.1

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step64000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 45.7142

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step65000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 45.714285714285715%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 40.95238095238095%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 65000:
Heads ablated:            [(8, 1), (8, 2)

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step66000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 37.61904761904762%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 47.61904761904761%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 4.761904761904762%

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step67000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 44.285714285714285%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 37.61904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.7 (sign=1) : Top 5 accur

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step68000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 37.142857142857146%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 47.14285714285714%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.476

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step69000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 38.095238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 70.0%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 43.333333333333336%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 2.3809

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step70000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 70.47619047619048%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 44.761904761904766%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 43.333333333333336%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 2.857142857142857%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step71000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 39.523809523809526%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 86.6666

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step72000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 40.476190476190474%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 70.0%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 45.23809523809524%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step73000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 45.714285714285715%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 40.476190476190474%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 73000:
Heads ablated:            [(8, 1), (8, 2), (8, 10), (9, 4)]
Original logit diff:      4.2062253952
Post ablation

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step74000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 44.285714285714285%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 44.285714285714285%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 2.380952380952381%
Checkpoint 74000:
Heads ablated:            [(8, 1), (8, 2), (8, 8), (8, 1

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step75000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 44.285714285714285%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 14.28

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step76000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 43.333333333333336%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 45.714285714285715%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 89.52

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step77000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 46.666666666666664%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 43.333333333333336%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Checkpoi

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step78000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 50.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 68.57142857142857%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 46.666666666666664%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 91.42857142857143%
Checkpoint 78000:
Heads ablated:            [(9, 4), (8, 1), (8, 2), (8,

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step79000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 48.095238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 42.857142857142854%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 89.

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step80000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 56.19047619047619%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 38.57142857142858%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 9.9 (sign=1) : Top 5 accu

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step81000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 70.47619047619048%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 35.714285714285715%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 17.14

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step82000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 56.19047619047619%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 34.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 19.523809523809526%
Checkpoint 82000:
Heads ablated:            [(8, 10), (8, 2), (8, 1)]
Original logit diff:      3.5972390175
Post ablation logit dif

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step83000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 13.333333333333334%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 34.76190476190476%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 18.571428571428573%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 58.0952380952381%
Checkpoint 83000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      4.8527421951
Post ablation logit diff: 4.7167601585
Logit diff % change:      -2.80%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step84000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 22.380952380952383%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 42.38095238095238%
Checkpoint 84000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      4.6263594627
Post ablation logit diff: 4.2733683586
Logit diff % change:      -7.63%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step85000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 77.14285714285715%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 20.476190476190474%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 40.95238095238095%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 30.952380952380953%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 10.952380952380953%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step86000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 61.904761904761905%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 60.0%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step87000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 55.714285714285715%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 87000:
Heads 

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step88000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 53.80952380952381%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 88.09523809523809%
Checkpoint 88000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      4.2507147789
Post ablation logit diff: 4.2054433823
Logit diff % change:      -1.07%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step89000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 25.71428571428571%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 54.761904761904766%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 19.047619047619047%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 8.095238095238095%
Checkpoint 89000:
Heads

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step90000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 5.238095238095238%
Checkpoint 90000:
Heads ablated:            [(10, 7), (8, 1

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step91000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 58.57142857142858%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 79.04761904761905%
Checkpoint 91000:
Heads ablated:            [(8, 10), (8, 2), (8, 1), (10, 7)]
Original logit diff:      3.9725716114
Post ablation logit diff: 3.9750440121
Logit diff % change:      0.06%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step92000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 52.38095238095239%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 7.6190476190476195%

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step93000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 56.666666666666664%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 89.04761904761904%
Checkpoint 93000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      3.7275280952
Post ablation logit diff: 3.8916461468
Logit diff % change:      4.40%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step94000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 8.095238095238095%
Checkpoint 94000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      3.8215069771
Post ablation logit diff: 3.9040443897
Logit diff % change:      2.16%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step95000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 54.761904761904766%
Checkpoint 95000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      3.8419084549
Post ablatio

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step96000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 55.714285714285715%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 8.095238095238095%
Checkpoint 96000:
Heads ablated:            [(8, 1), (8, 2), (8, 10)]
Original logit diff:      3.9171717167
Post ablation logit diff: 3.9397120476
Logit diff % change:      0.58%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step97000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 81.9047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 56.19047619047619%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 13.80952380952381%
Checkpoint 97000:
Heads ablated:            [(8, 10), (8, 2), (8, 1)]
Original logit diff:      4.0222864151
Post ablation logit diff: 4.2769942284
Logit diff % change:      6.33%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step98000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 50.476190476190474%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 80.47619047619048%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 5.238095238095238%
Checkpoint 98000:
Heads ablated:            [(8, 1), (8, 2), 

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step99000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 13.333333333333334%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 77.14285714285715%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 5.238095238095238%
Checkpoint 99000:
Heads ablated:            [(8, 1), (8, 2), (8, 10)]
Original logit diff:      3.5787348747
Post ablation logit diff: 3.6780867577
Logit diff % change:      2.78%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step100000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 57.61904761904761%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Checkpoint 100000:
Heads ablated:            [(8, 10), (8, 2), (8, 1)]
Original logit diff:      3.6995532513
Post ablation logit diff: 3.8150444031
Logit diff % change:      3.12%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step101000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 56.19047619047619%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 12.380952380952381%
Checkpoint 101000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.6974260807
Post ablation logit diff: 4.1130652428
Logit diff % change:      11.24%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step102000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 72.85714285714285%
Checkpoint 102000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.7235321999
Post ablation logit diff: 3.9006285667
Logit diff % change:      4.76%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step103000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 57.61904761904761%
Checkpoint 103000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.5225360394
Post ablation logit diff: 3.7476122379
Logit diff % change:      6.39%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step104000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 70.47619047619048%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 65.23809523809524%
Checkpoint 104000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.7000689507
Post ablation logit diff: 3.9882700443
Logit diff % change:      7.79%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step105000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 60.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.666666666666667%
Checkpoint 105000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.4684317112
Post ablation logit diff: 3.6152393818
Logit diff % change:      4.23%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step106000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 52.38095238095239%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 64.76190476190476%
Checkpoint 106000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.1450891495
Post ablation logit diff: 3.6471641064
Logit diff % change:      15.96%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step107000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 44.285714285714285%
Checkpoint 107000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.1006369591
Post ablation logit diff: 3.4664564133
Logit diff % change:      11.80%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step108000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 2.857142857142857%
Checkpoint 108000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.0788583755
Post ablation logit diff: 3.1155080795
Logit diff % change:      1.19%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step109000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 57.61904761904761%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Checkpoint 109000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.2591559887
Post ablation logit diff: 3.2159140110
Logit diff % change:      -1.33%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step110000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 18.571428571428573%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 52.85714285714286%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 110000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.5072703362
Post ablation logit diff: 3.5647058487
Logit diff % change:      1.64%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step111000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 51.90476190476191%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 50.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 58.57142857142858%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 13.333333333333334%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Checkpoint 111000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.4494361877
Post ablation logit diff: 3.3424186707
Logit diff % change:      -3

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step112000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 47.14285714285714%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 2.380952380952381%
Checkpoint 112000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.2677509785
Post ablation logit diff: 3.3256540298
Logit diff % change:      1.77%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step113000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 43.80952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 113000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.4777622223
Post ablation logit diff: 3.5448601246
Logit diff % change:      1.93%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step114000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 42.857142857142854%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 114000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.3148481846
Post ablation logit diff: 3.4643635750
Logit diff % change:      4.51%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step115000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 115000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.2615005970
Post ablation logit diff: 3.2006945610
Logit diff % change:      -1.86%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step116000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 41.904761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 62.38095238095238%
Checkpoint 116000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.3810138702
Post ablation logit diff: 3.4176435471
Logit diff % change:      1.08%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step117000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 41.42857142857143%
Checkpoint 117000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      2.9069039822
Post ablation logit diff: 2.9826943874
Logit diff % change:      2.61%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step118000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 39.523809523809526%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 2.380952380952381%
Checkpoint 118000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.1204590797
Post ablation logit diff: 3.2611353397
Logit diff % change:      4.51%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step119000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.95238095238095%
Checkpoint 119000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.3651401997
Post ablation logit diff: 3.1057806015
Logit diff % change:      -7.71%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step120000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 120000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.5581626892
Post ablation logit diff: 3.5526654720
Logit diff % change:      -0.15%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step121000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 34.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 121000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.0244240761
Post ablation logit diff: 3.0191018581
Logit diff % change:      -0.18%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step122000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 63.8095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.47619047619048%
Checkpoint 122000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.4330513477
Post ablation logit diff: 3.3264555931
Logit diff % change:      -3.10%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step123000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 63.8095238095238%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 123000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.1512703896
Post ablation logit diff: 3.1929745674
Logit diff % change:      1.32%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step124000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 64.28571428571429%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 124000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.2640368938
Post ablation logit diff: 3.1161663532
Logit diff % change:      -4.53%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step125000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 30.0%
Checkpoint 125000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.2859172821
Post ablation logit diff: 3.3728549480
Logit diff % change:      2.65%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step126000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 86.19047619047619%
Checkpoint 126000:
Heads ablated:            [(8, 2), (8, 1)]
Original logit diff:      3.3442046642
Post ablation logit diff: 3.1918475628
Logit diff % change:      -4.56%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step127000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 127000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.5363519192
Post ablation logit diff: 3.2919015884
Logit diff % change:      -6.91%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step128000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 70.0%
Checkpoint 128000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.3482873440
Post ablation logit diff: 3.2181522846
Logit diff % change:      -3.89%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step129000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 69.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 76.19047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 129000:
Heads ablated:            [(8, 1), (8, 2)]
Original logit diff:      3.1141633987
Post ablation logit diff: 3.0629851818
Logit diff % change:      -1.64%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step130000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 70.95238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 27.142857142857142%
Checkpoint 130000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.0722997189
Post ablation logit diff: 2.8402276039
Logit diff % change:      -7.55%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step131000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 131000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.0141928196
Post ablation logit diff: 2.9232745171
Logit diff % change:      -3.02%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step132000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 132000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.3714425564
Post ablation logit diff: 3.1311056614
Logit diff % change:      -7.13%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step133000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 68.57142857142857%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 133000:
Heads ablated:            [(8, 2)]
Original logit diff:      2.9597463608
Post ablation logit diff: 2.8532183170
Logit diff % change:      -3.60%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step134000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 33.80952380952381%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 134000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.1434023380
Post ablation logit diff: 2.9113852978
Logit diff % change:      -7.38%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step135000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 48.095238095238095%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 135000:
Heads ablated:            [(9, 8), (8, 1), (8, 2)]
Original logit diff:      3.6913521290
Post ablation logit diff: 3.1780772209
Logit diff % change:      -13.90%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step136000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 136000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.0477452278
Post ablation logit diff: 2.9375073910
Logit diff % change:      -3.62%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-data-seed3 at step137000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 53.80952380952381%
Checkpoint 137000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.0758056641
Post ablation logit diff: 3.2517895699
Logit diff % change:      5.72%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step138000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.3 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 7.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 49.047619047619044%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 4.76

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step139000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 139000:
Heads ablated:            [(8, 2)]
Original logit diff:      3.3186020851
Post ablation logit diff: 3.3690657616
Logit diff % change:      1.52%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step140000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 79.04761904761905%
Checkpoint 140000:
Heads ablated:            [(8, 2), (9, 8)]
Original logit diff:      2.6343762875
Post ablation logit diff: 2.9985921383
Logit diff % change:      13.83%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step141000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 52.38095238095239%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 85.23809523809524%
Checkpoint 141000:
Heads ablated:            [(8, 10), (8, 2), (8, 1), (9, 8), (10, 7)]
Original logit diff:      3.3804793358
Post ablation logit diff: 3.1386985779
Logit diff % change:      -7.15%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step142000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 83.80952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 78.57142857142857%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 80.47619047619048%
Checkpoint 142000:
Heads ablated:            [(10, 7), (9, 8), (8, 1), (8, 2), (8, 10)]
Original logit diff:      2.9138956070
Post ablation logit diff: 2.7787187099
Logit diff % change:      -4.64%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-data-seed3 at step143000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.3 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 26.666666666666668%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 77.14285714285715%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 44.285714285714285%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Checkpoint 143000:
Heads ablated:            [(9, 8), (8, 1),

In [9]:
experiment_metrics.keys()

dict_keys([4000, 5000, 6000, 7000])

## View Results

#### Pythia 160m

In [11]:
MODEL_TO_VIEW = "pythia-160m-alldropout"

In [12]:
experiment_metrics = torch.load(f'results/backup/{MODEL_TO_VIEW}/nmh_backup_metrics.pt')

In [13]:
experiment_metrics[4000].keys()

dict_keys(['logit_diff', 'per_head_logit_diffs', 'ablation_targets', 'ablated_logit_diff', 'per_head_ablated_logit_diffs', 'per_head_logit_diff_delta', 'in_circuit_head_delta', 'outside_circuit_head_delta', 'summed_in_circuit_head_delta', 'summed_outside_circuit_head_delta', 'summed_total_head_delta'])

In [14]:
summed_in_circuit_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_in_circuit_head_delta"] for checkpoint in experiment_metrics.keys()}
summed_outside_circuit_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_outside_circuit_head_delta"] for checkpoint in experiment_metrics.keys()}
summed_total_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_total_head_delta"] for checkpoint in experiment_metrics.keys()}
per_head_logit_diff_deltas = {checkpoint: experiment_metrics[checkpoint]["per_head_logit_diff_delta"] for checkpoint in experiment_metrics.keys()}
total_logit_diff_deltas = {checkpoint: experiment_metrics[checkpoint]['ablated_logit_diff'] - experiment_metrics[checkpoint]['logit_diff'] for checkpoint in experiment_metrics.keys()}

for checkpoint in experiment_metrics.keys():
    # divide by total original logit diff
    summed_in_circuit_head_deltas[checkpoint] = summed_in_circuit_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    summed_outside_circuit_head_deltas[checkpoint] = summed_outside_circuit_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    summed_total_head_deltas[checkpoint] = summed_total_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    per_head_logit_diff_deltas[checkpoint] = per_head_logit_diff_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    total_logit_diff_deltas[checkpoint] = total_logit_diff_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]

In [15]:
# plot summed_in_circuit_head_deltas with plotly express
fig = px.line(
    x=list(summed_in_circuit_head_deltas.keys()), 
    y=list(summed_in_circuit_head_deltas.values()), 
    title=f"Summed Post-NMH-Ablation In-Circuit Head Logit Diff Change Over Time ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'} 
)
fig.show()


In [16]:
# plot summed_outside_circuit_head_deltas
fig = px.line(
    x=list(summed_outside_circuit_head_deltas.keys()), 
    y=list(summed_outside_circuit_head_deltas.values()), 
    title=f"Summed Post-NMH-Ablation Outside-Circuit Head Attribution Change ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'} 
)
fig.show()

In [17]:
# plot total_head_deltas
fig = px.line(
    x=list(summed_total_head_deltas.keys()), 
    y=list(summed_total_head_deltas.values()), 
    title=f"Summed Total Post-NMH-Ablation Head Attribution Change ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'}
)

fig.show()

In [9]:
cumulative_nmhs, checkpoint_nmhs = get_past_nmhs_for_checkpoints(experiment_metrics)

In [18]:
top_backup_heads = plot_top_heads(model_name=MODEL_TO_VIEW, checkpoint_dict=per_head_logit_diff_deltas, cumulative_nmhs=cumulative_nmhs, top_k_per_checkpoint=10)

In [57]:
#per_head_logit_diff_deltas

imshow_p(
    experiment_metrics[143000]['per_head_logit_diff_delta'], #[143000],
    title="Headwise logit diff contribution, post NMH KO",
    labels={"x": "Head", "y": "Layer", "color": "Logit diff attribution"},
    #coloraxis=dict(colorbar_ticksuffix = "%"),
    border=True,
    width=600,
    margin={"r": 100, "l": 100}
)

In [58]:
experiment_metrics[143000].keys()

dict_keys(['logit_diff', 'per_head_logit_diffs', 'ablation_targets', 'ablated_logit_diff', 'per_head_ablated_logit_diffs', 'per_head_logit_diff_delta', 'in_circuit_head_delta', 'outside_circuit_head_delta', 'summed_in_circuit_head_delta', 'summed_outside_circuit_head_delta', 'summed_total_head_delta'])

In [59]:
top_backup_heads[top_backup_heads['Previous NMH']==True].head(50)

Unnamed: 0,Checkpoint,Layer-Head,Layer,Head,Value,Previous NMH,Checkpoint_sum,Value_sum,Previous NMH_sum,Top K
72,20000,Layer 9-Head 4,9,4,0.00246,True,3135000,0.334368,38,True
97,25000,Layer 9-Head 4,9,4,0.006947,True,3135000,0.334368,38,True
121,30000,Layer 9-Head 4,9,4,0.006404,True,3135000,0.334368,38,True
149,35000,Layer 9-Head 4,9,4,0.014621,True,3135000,0.334368,38,True
156,37000,Layer 9-Head 4,9,4,0.01344,True,3135000,0.334368,38,True
171,40000,Layer 9-Head 4,9,4,0.010845,True,3135000,0.334368,38,True
197,45000,Layer 9-Head 4,9,4,0.035425,True,3135000,0.334368,38,True
202,46000,Layer 9-Head 4,9,4,0.033058,True,3135000,0.334368,38,True
207,47000,Layer 9-Head 4,9,4,0.014968,True,3135000,0.334368,38,True
212,48000,Layer 9-Head 4,9,4,0.008163,True,3135000,0.334368,38,True


In [60]:
checkpoint_nmhs

{4000: set(),
 5000: set(),
 6000: set(),
 7000: set(),
 8000: {(8, 2)},
 9000: set(),
 10000: {(8, 1), (8, 10)},
 11000: {(10, 7)},
 12000: {(8, 2), (10, 7)},
 13000: {(10, 7)},
 14000: {(8, 2), (10, 7)},
 15000: {(8, 2), (10, 7)},
 16000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 17000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 18000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 19000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 20000: {(8, 2), (10, 7)},
 21000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 22000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 23000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 24000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 25000: {(8, 2), (10, 7)},
 26000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 27000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 28000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 29000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 30000: {(8, 2), (10, 7)},
 31000: {(8, 1), (8, 2), (9, 4), (10, 7)},
 32000: {(8, 1), (8

In [61]:
cumulative_nmhs

{4000: set(),
 5000: set(),
 6000: set(),
 7000: set(),
 8000: {(8, 2)},
 9000: {(8, 2)},
 10000: {(8, 1), (8, 2), (8, 10)},
 11000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 12000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 13000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 14000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 15000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 16000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 17000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 18000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 19000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 20000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 21000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 22000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 23000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 24000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 25000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 26000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 27000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 28000: {(8, 1), (8, 2), (8, 10

In [62]:
# plot number of nmhs over time
fig = px.line(
    x=list(checkpoint_nmhs.keys()), 
    y=list([len(heads) for heads in checkpoint_nmhs.values()]), 
    title=f"Number of NMHs Over Time ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Number of NMHs'}
)
fig.show()