## Setup

### Imports

In [2]:
import os
import json
import glob
import torch
import re
import einops
import pandas as pd
from functools import partial
from torch import Tensor
from torchtyping import TensorType as TT


import plotly.express as px

from utils.data_utils import generate_data_and_caches
from utils.data_processing import (
    load_edge_scores_into_dictionary,
)
from utils.visualization import plot_attention_heads, imshow_p
from utils.backup_analysis import (
    load_model,
    run_iteration,
    process_backup_results,
    get_past_nmhs_for_checkpoints,
    plot_top_heads
)

In [3]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f854ef7ab30>

### Functions

## Experiments

### Experiment Parameters

In [15]:
TASK = 'ioi'
PERFORMANCE_METRIC = 'logit_diff'
BASE_MODEL = "pythia-160m"
VARIANT = "EleutherAI/pythia-160m-weight-seed3"
MODEL_SHORTNAME = BASE_MODEL if not VARIANT else VARIANT[11:]
CACHE = "model_cache"
IOI_DATASET_SIZE = 70
COPY_SCORE_THRESHOLD = 75.0

### Circuit Data

In [17]:
folder_path = f'results/graphs/{MODEL_SHORTNAME}/{TASK}'
df = load_edge_scores_into_dictionary(folder_path)

# filter everything before 1000 steps
df = df[df['checkpoint'] >= 1000]

df[['source', 'target']] = df['edge'].str.split('->', expand=True)
len(df['target'].unique())

Processing file 1/143: results/graphs/pythia-160m-weight-seed3/ioi/57000.json
Processing file 2/143: results/graphs/pythia-160m-weight-seed3/ioi/141000.json
Processing file 3/143: results/graphs/pythia-160m-weight-seed3/ioi/95000.json
Processing file 4/143: results/graphs/pythia-160m-weight-seed3/ioi/107000.json
Processing file 5/143: results/graphs/pythia-160m-weight-seed3/ioi/34000.json
Processing file 6/143: results/graphs/pythia-160m-weight-seed3/ioi/6000.json
Processing file 7/143: results/graphs/pythia-160m-weight-seed3/ioi/37000.json
Processing file 8/143: results/graphs/pythia-160m-weight-seed3/ioi/39000.json
Processing file 9/143: results/graphs/pythia-160m-weight-seed3/ioi/104000.json
Processing file 10/143: results/graphs/pythia-160m-weight-seed3/ioi/59000.json
Processing file 11/143: results/graphs/pythia-160m-weight-seed3/ioi/67000.json
Processing file 12/143: results/graphs/pythia-160m-weight-seed3/ioi/111000.json
Processing file 13/143: results/graphs/pythia-160m-weight-

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

### Dataset Setup

In [None]:
initial_model = load_model(BASE_MODEL, VARIANT, 143000, CACHE, device)
size=70
ioi_dataset, abc_dataset = generate_data_and_caches(initial_model, size, verbose=True)

config.json:   0%|          | 0.00/598 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-weight-seed2 at step143000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer


In [None]:
# imshow_p(
#     per_head_ablated_logit_diffs,
#     title="Headwise logit diff contribution, post NMH KO",
#     labels={"x": "Head", "y": "Layer", "color": "Logit diff attribution"},
#     #coloraxis=dict(colorbar_ticksuffix = "%"),
#     border=True,
#     width=600,
#     margin={"r": 100, "l": 100}
# )

### Run Experiment

In [None]:
experiment_metrics = dict()
# create folder
os.makedirs(f'results/backup/{MODEL_SHORTNAME}', exist_ok=True)

for checkpoint in range(4000, 144000, 1000):

    experiment_metrics = run_iteration(
        BASE_MODEL, VARIANT, df, checkpoint=checkpoint, dataset=ioi_dataset, experiment_metrics=experiment_metrics, 
        threshold=COPY_SCORE_THRESHOLD
    )
    experiment_metrics = process_backup_results(df, checkpoint, experiment_metrics)

    # save to file, using pytorch format
    torch.save(experiment_metrics, f'results/backup/{MODEL_SHORTNAME}/nmh_backup_metrics.pt')

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step4000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Checkpoint 4000:
Heads ablated:            [(8, 1)]
Original logit diff:      0.8774140477
Post ablation logit diff: 1.0147744417
Logit diff % change:      15.66%
Loaded model EleutherAI/pythia-160m-weight-seed2 at step5000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 47.14285714285714%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 5000:
Heads ablated:            [(8, 1), (8, 10)]
Original logit diff:      1.5074634552
Post ablation logit diff: 1.4938864708
Logit diff % change:      -0.90%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step6000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 73.33333333333333%
Checkpoint 6000:
Heads ablated:            [(8, 10), (8, 1)]
Original logit diff:      2.0983071327
Post ablation logit diff: 1.9507633448
Logit diff % change:      -7.03%
Loaded model EleutherAI/pythia-160m-weight-seed2 at step7000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 19.523809523809526%
Checkpoint 7000:
Heads ablated:            [(10, 7), (8, 1), (8, 10)]
Original logit diff:      2.3801794052
Post ablation logit diff: 2.4812474251
Logit diff % change:      4.25%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step8000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 19.047619047619047%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 22.857142857142858%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 86.19047619047619%
Checkpoint 8000:
Heads ablated:            [(8, 10), (8, 1), (10, 7)]
Original logit diff:      2.5609090328
Post ablation logit diff: 2.4754891396
Logit diff % change:      -3.34%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step9000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 14.285714285714285%
Checkpoint 9000:
Heads ablated:            [(8, 10), (8, 1)]
Original logit diff:      2.4609885216
Post ablation logit diff: 2.2680990696
Logit diff % change:      -7.84%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step10000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 11.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 13.333333333333334%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 25.71428571428571%
Checkpoint 10000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      3.1183340549
Post ablation logit diff: 2.75874519

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step11000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 11000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      3.1467432976
Post ablation logit diff: 2.9557740688
Logit diff % change:      -6.07%
Loaded model EleutherAI/pythia-160m-weight-seed2 at step12000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 12000:
Heads ablated:            [(8, 1), (8, 2), (8, 10)]
Original logit diff:      3.0244793892
Post ablation logit diff: 2.6767439842
Logit diff % change:      -11.50%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step13000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 11.8 (sign=1) : Top 5 accuracy: 44.761904761904766%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 46.666666666666664%
Checkpoint 13000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      3.1469430923
Post ablation logit diff: 2.8826324940
Logit diff % change:      -8.40%
Loaded model EleutherAI/pythia-160m-weight-seed2 at step14000; now loading into Ho

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 14.761904761904763%
Checkpoint 14000:
Heads ablated:            [(8, 2), (8, 1), (10, 7)]
Original logit diff:      2.7993061543
Post ablation logit diff: 3.1556169987
Logit diff % change:      12.73%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step15000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 36.19047619047619%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 90.47619047619048%
Checkpoint 15000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      3.9921891689
Post ablation logit diff: 3.5350375175
Logit diff % change:      -11.45%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step16000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 13.333333333333334%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 35.23809523809524%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 50.95238095238095%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 20.476190476190474%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 16000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      3.4396693707
Post ablation logit diff: 3.2975933552
Logit diff % change:      -4.13%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step17000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 36.666666666666664%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 50.95238095238095%
Checkpoint 17000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.7224340439
Post ablation logit diff: 3.6142177582
Logit diff % change:      -2.91%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step18000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 37.142857142857146%
Checkpoint 18000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      3.7128663063
Post ablation logit diff: 3.6329803467
Logit diff % change:      -2.15%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step19000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 16.666666666666664%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 60.952380952380956%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 34.285714285714285%
Checkpoint 19000:
Heads ablated:            [(8, 10), (8, 2)]
Original logit diff:      3.2806375027
Post ablation logit diff: 3.1564421654
Logit diff % change:      -3.79%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step20000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 35.23809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 16.666666666666664%
Checkpoint 20000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10)]
Original logit diff:      4.5556230545
Post ablation logit diff: 4.4376325607
Logit diff % change:      -2.59%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step21000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 33.80952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 18.571428571428573%
Checkpoint 21000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      2.7181048393
Post ablation logit diff: 2.6167707443
Logit diff % change:      -3.73%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step22000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 18.571428571428573%
Checkpoint 22000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.5818908215
Post ablation logit diff: 3.7463593483
Logit diff % change:      4.59%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step23000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.476190476190478%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 53.80952380952381%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 18.095238095238095%
Checkpoint 23000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      2.8724248409
Post ablation logit diff: 2.8956525326
Logit diff % change:      0.81%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step24000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 46.666666666666664%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.38095238095238%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Checkpoi

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step25000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 48.095238095238095%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 14.285714285714285%
Checkpoint 25000:
Heads ablated:            [(10, 7), (8, 1), (8,

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step26000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Checkpoint 26000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      3.5316271782
Post ablation logit diff: 3.5185561180
Logit diff % change:      -0.37%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step27000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 60.952380952380956%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 29.523809523809526%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 3.33333333333

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step28000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.1428571

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step29000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 60.952380952380956%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 54.761904761904766%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 14.761904761904763%
Checkpoint 29000:
Heads ablated:            [(10, 7), (8, 2), (

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step30000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 58.57142857142858%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 61.904761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Checkpoint 30000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      3.6829319000
Post ablation logit diff: 3.5467934608
Logit diff % change:      -3.70%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step31000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.476190476190478%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.4 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 60.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 68.0952380952381%
Checkp

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 56.666666666666664%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 18.095238095238095%
Checkpoint 32000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.5337259769
Post ablation logit diff: 3.4055581093
Logit diff % change:      -3.63%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step33000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.476190476190478%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Checkpoint 33000:
Heads ablated:            [(8, 10), (8, 2), (10

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step34000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.476190476190478%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 34000:
Heads ablated:            [(8, 10), (8, 2), (10, 7)]
Original logit diff:      3.5629084110
Post ablation logit diff: 3.

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step35000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.4 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 60.476190476190474%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 19.047619047619047%
Checkpoint 35000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.9811048508
Post ablation logit diff: 3

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 19.047619047619047%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Checkpoint 36000:
Heads ablated:            [(8, 10), (8, 2), (8, 1), (10, 7)]
Original lo

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step37000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 60.952380952380956%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 19.047619047619047%
Checkpoint 37000:
Heads ablated:            [(10, 7), (8, 2), (8, 10)]
Original logit diff:      3.7472186089
Post ablation logit diff: 3.4548361301
Logit diff % change:      -7.80%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step38000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 60.476190476190474%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.619047619

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step39000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 63.33333333333333%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Checkp

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step40000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 65.23809523809524%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.142857142857142%
Checkpoint 40000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      3.5532555580
Post ablation logit diff: 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step41000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 77.14285714285715%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 18.571428571428573%
Checkpoint 41000:
Heads ablated:            [(10, 7), (8, 1), (8,

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step42000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.476190476190478%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 56.666666666666664%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 16.19047619047619%
Checkpoint 42000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      3.3687191010
Post ablation logit diff: 3.4951341152

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step43000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 16.66666666666

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step44000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Checkpoint 44000:
Heads ablated:            [(8, 10), (8, 8), (8, 2), (8, 1), (10, 7)]
Original logit diff:      3.4195830822
Post ablation l

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step45000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.8 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 7.8 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 61.904761904761

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step46000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Check

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step47000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 47000:
Heads ablated:            [(10, 7), (8, 1), (8, 8

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step48000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 7.8 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 0.95238095238

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step49000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 33.80952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 51.90476190476191%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 89.04761904761904%
Checkpoint 49000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 10), (

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step50000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 57.61904761904761%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 18.095238095238095%
Checkpoint 50000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      2.8237726688
Post ablation

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step51000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 48.57142857142857%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 20.476190476190474%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.8 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 3.809523809523

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step52000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 50.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 21.904761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 6.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy cir

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step53000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 42.857142857142854%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 20.952380952380953%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 53000:
Heads ablated:            [(8, 10), (8, 8), (8

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step54000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 33.33333333333333%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 50.476190476190474%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 19.047619047619047%
Checkpoint 54000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      3.2507045269
Post ablation logit diff: 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step55000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 46.666666666666664%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 19.523809523809526%
Checkpoint 55000:
Heads ablated:            [(10, 7), (8, 1), (8,

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step56000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.8 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 7.8 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step57000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 33.33333333333333%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 46.666666666666664%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 20.476190476190474%
Checkpoint 57000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step58000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 45.714285714285715%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.952380952380953%
Checkpoint 58000:
Heads ablated:            [(8, 10), (8, 8), (

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step59000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.476190476190478%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 45.714285714285715%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 59000:
Heads ablated:            [(10, 7), (8, 1), (8

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step60000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 16.666666666666664%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 46.19047619047619%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.476190476190478%
Checkpoint 60000:
Heads ablated:            [(8, 10), (8, 8), (8, 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step61000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 42.38095238095238%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Checkpoint 61000:
Heads ablated:            [(10, 7), (8, 2), (8, 8), (8, 10)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step62000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 40.476190476190474%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 88.57142857142857%
Checkp

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step63000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 44.761904761904766%
Copy

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step64000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 40.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 10.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 28.57142857142857%
Check

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step65000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 29.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 39.04761904761905%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 19.523809523809526%
Checkpoint 65000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      2.6949911118
Post ablation logit diff: 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step66000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 19.047619047619047%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 40.95238095238095%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 7.8 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step67000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 27.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 46.19047619047619%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 22.857142857142858%
Checkpoint 67000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      3.2707591057
Post ablation logit diff: 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step68000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 55.23809523809524%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 27.61904761904762%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 88.09523809523809%
Checkpoint 68000:
Heads ablated:            [(8, 10), (8, 8), (8, 2), (8, 1), (10, 7)]
Original logit diff:      2.9035291672
Post ablation logit diff: 2.6403608322
Logit diff % change:      -9.06%
Loaded model 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 25.238095238095237%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 55.714285714285715%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 26.666666666666668%
Checkpoint 69000:
Heads ablated:            [(8, 10), (8, 8), (8, 2), (8, 1), (10, 7)]
Original logit diff:      3.7018563747
Post ablation logit diff: 3.494024753

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step70000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 26.666666666666668%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 19.047619047619047%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 64.28571428571429%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 25.714285714

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step71000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 82.85714285714286%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 61.904761904761905%
Checkpoint 71000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      3.2580428123
Post ablation logit diff: 3.1738085747
Logit diff % change:      -2.59%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step72000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 64.28571428571429%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 83.80952380952381%
Checkpoint 72000:
Heads ablated:            [(8, 10), (8, 8), (8, 2), (10, 7)]
Original logit diff:      3.2968132496
Post ablation logit diff: 3.0506057739
Logit diff % change:      -7.47%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step73000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 80.47619047619048%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 27.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 19.047619047619047%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 68.57142857142857%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 30.0%
Checkpoint 73000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      3.2593789101
Post ablation logit diff: 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step74000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 23.809523809523807%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 20.952380952380953%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 30.0%
Checkpoint 74000:
Heads ablated:            [(8, 2), (8, 8), (8, 10)]
Original logit diff:      3.4909276962
Post ablation logit diff: 3.2871997356
Logi

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step75000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 34.76190476190476%
Checkpoint 75000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (8

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 34.285714285714285%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 68.0952380952381%
Checkpoint 76000:
Heads ablated:            [(10, 7), (8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      3.2125523090
Post ablation logit diff: 2.9223361015


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step77000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 36.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 21.428571428571427%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.19047619047619

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step78000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 40.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 76.19047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoi

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step79000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 70.95238095238095%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 79000:
Heads ablated:            [(8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      3.2653307915
Post ablation logit diff: 3.0547547340
Logit diff % change:      -6.45%
Loaded model EleutherAI/pythia-160m-w

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 40.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 20.952380952380953%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 72.38095238095238%
Checkpoint 80000:
Heads ablated:            [(8, 10), (8, 8), (8, 2), (8, 1)]
Original logit diff:      3.6578080654
Post ablation logit diff: 3.6707816124
Logit diff % change:      0.35%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step81000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 64.28571428571429%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 21.904761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 72.85714285714285%
Checkpoint 81000:
Heads ablated:            [(8, 10), (8, 8), (8, 2), (8, 1)]
Original logit diff:      3.0331616402
Post ablation logit dif

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step82000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 60.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 44.285714285714285%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 25.238095238095237%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 67.61904761904762%
Checkpoint 82000:
Heads ablated:            [(8, 10), (8, 8), (8, 2)]
Original logit diff:      3.6272935867
Post ablation logit diff: 3.2767765522
Logit diff % change:      -9.66%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step83000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 56.19047619047619%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 44.761904761904766%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 70.47619047619048%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 23.809523809523

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step84000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 70.0%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 48.57142857142857%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 50.0%
Checkpoint 84000:
Heads ablated:            [(8, 1), (8, 2), (8, 8), (8, 10)]
Original logit 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step85000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 42.38095238095238%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 10.476190476190476%
Checkpoint 85000:
Heads ablated:            [(8, 2), (8, 8), (8, 10)]
Original logit diff:      3.6393032074
Post ablation logit diff: 3.6112432480
Logit diff % change:      -0.77%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step86000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 29.04761904761905%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 51.4285714285714

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 25.238095238095237%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 67.14285714285714%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 87000:
Heads ablated:            [(8, 1), (8, 8), (8, 10), (8, 2)]
Original logit diff:      3.7018136978
Post ablation logit diff: 3.6515314579
Logit diff % change:      -1.36%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step88000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 25.71428571428571%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 55.714285714285715%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 9.523809523809524%
Checkpoint 88000:
Heads ablated:            [(8, 1), (8, 2), (8, 8), (8, 10)]
Ori

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step89000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 26.666666666666668%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 26.190476190476193%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 66.19047619047619%
Checkpoint 89000:
Heads ablated:            [(8, 10), (8, 8), (8, 2), (8, 1)]
Original logit diff:      3.9634082317
Post ablation logit diff: 3.9040296078
Logit diff % change:      -1.50%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step90000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 26.666666666666668%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 57.61904761904761%
Checkpoint 90000:
Heads ablated:            [(8, 1), (8, 2), (8, 8), (8, 10)]
Original logit diff:      3.8981649876
Post ablation logit diff: 3.8081648350
Logit diff % change:      -2.31%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step91000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 35.23809523809524%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 11.428571428571429%
Checkpoint 91000:
Heads ablated:            [(8, 10), (8, 8), (8, 2)]
Original logit diff:      3.8161077499
Post ablation logit diff: 3.7621653080
Logit diff % change:      -1.41%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step92000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 35.23809523809524%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 60.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 30.952380952380953%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 11.904761904761903%
Checkpoint 92000:
Heads ablated:            [(8, 10), (8, 8), (8, 2)

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step93000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 30.476190476190478%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 56.666666666666664%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 37.142857142857146%
Checkpoint 93000:
Heads ablated:            [(8, 2), (8, 8), (8, 10)]
Original logit diff:      3.4614629745
Post ablation logit diff: 3.3163011074
Logit diff % change:      -4.19%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step94000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 61.904761904761905%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 16.666666666666664%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 30.952380952380953%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 36.666666666666664%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 59.523809523809526%
Checkpoint 94000:
Heads ablated:            [(8, 1), 

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step95000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 42.38095238095238%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 29.04761904761905%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 96.19047619047619%
Checkpoint 95000:
Heads ablated:            [(8, 2), (8, 10), (8, 8)]
Original logit diff:      3.9980154037
Post ablation logit diff: 4.0376839638
Logit diff % change:      0.99%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step96000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 96000:
Heads ablated:            [(8, 8), (8, 10)]
Original logit diff:      3.5605666637
Post ablation logit diff: 3.7965645790
Logit diff % change:      6.63%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step97000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 43.333333333333336%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 60.476190476190474%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 29.523809523809526%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 13.333333333333334%
Checkpoint 97000:
Heads ablated:            [(8, 10),

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step98000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 43.80952380952381%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 98000:
Heads ablated:            [(8, 8), (8, 10)]
Original logit diff:      3.4448816776
Post ablation logit diff: 3.5361001492
Logit diff % change:      2.65%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step99000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 99000:
Heads ablated:            [(8, 8), (8, 10)]
Original logit diff:      3.1775939465
Post ablation logit diff: 3.3944940567
Logit diff % change:      6.83%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step100000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 45.714285714285715%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 14.285714285714285%
Checkpoint 100000:
Heads ablated:            [(8, 10), (8, 8), (8, 1)]
Original logit diff:      3.2676994801
Post ablation logit diff: 3.5644042492
Logit diff % change:      9.08%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step101000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 69.52380952380952%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 46.19047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 29.523809523809526%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 14.285714285714285%
Checkpoint 101000:
Heads ablated:            [(8, 10), (8, 8), (8, 2), (8, 1)]
Original logit diff:      3.5127615929
Post ablati

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step102000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 102000:
Heads ablated:            [(8, 1), (8, 8), (8, 10)]
Original logit diff:      3.6279888153
Post ablation logit diff: 3.9062352180
Logit diff % change:      7.67%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-weight-seed2 at step103000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 46.666666666666664%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 31.428571428571427%
Checkpoint 103000:
Heads ablated:            [(8, 10), (8, 8), (8, 1)]
Original logit diff:      3.8974649906
Post ablation logit diff: 4.1543936729
Logit diff % change:      6.59%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step104000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 47.61904761904761%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 86.19047619047619%
Checkpoint 104000:
Heads ablated:            [(8, 10), (8, 8), (8, 1)]
Original logit diff:      3.5403478146
Post ablation logit diff: 3.9770648479
Logit diff % change:      12.34%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step105000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 49.523809523809526%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 86.19047619047619%
Checkpoint 105000:
Heads ablated:            [(8, 10), (8, 1)]
Original logit diff:      3.6404767036
Post ablation logit diff: 4.0004153252
Logit diff % change:      9.89%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step106000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 70.95238095238095%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 86.66666666666667%
Checkpoint 106000:
Heads ablated:            [(8, 10), (8, 1)]
Original logit diff:      3.4120151997
Post ablation logit diff: 3.9302241802
Logit diff % change:      15.19%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-weight-seed2 at step107000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 107000:
Heads ablated:            [(8, 1), (8, 10)]
Original logit diff:      3.8128850460
Post ablation logit diff: 4.2036046982
Logit diff % change:      10.25%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step108000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 108000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.6820647717
Post ablation logit diff: 3.9877083302
Logit diff % change:      8.30%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step109000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 109000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.2483024597
Post ablation logit diff: 3.7379381657
Logit diff % change:      15.07%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step110000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 38.095238095238095%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 49.047619047619044%
Checkpoint 110000:
Heads ablated:            [(8, 10), (8, 1)]
Original logit diff:      3.5436263084
Post ablation logit diff: 4.0088539124
Logit diff % change:      13.13%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step111000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 48.57142857142857%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 111000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.7435317039
Post ablation logit diff: 4.2192454338
Logit diff % change:      12.71%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step112000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 44.761904761904766%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 55.23809523809524%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 27.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 112000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.6175262928
Post ablation logit diff: 3.9885284901
Logit diff % change:      10.26%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step113000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 19.047619047619047%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 113000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.4972681999
Post ablation logit diff: 4.1292743683
Logit diff % change:      18.07%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step114000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 55.714285714285715%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 114000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.6397414207
Post ablation logit diff: 3.9691214561
Logit diff % change:      9.05%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step115000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 44.761904761904766%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 50.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 115000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.6432216167
Post ablation logit diff: 4.0651578903
Logit diff % change:      11.58%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step116000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 66.19047619047619%
Checkpoint 116000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.5812804699
Post ablation logit diff: 3.8684601784
Logit diff % change:      8.02%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-weight-seed2 at step117000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 67.14285714285714%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 43.80952380952381%
Checkpoint 117000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.2543992996
Post ablation logit diff: 3.5804519653
Logit diff % change:      10.02%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step118000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 43.80952380952381%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 65.23809523809524%
Checkpoint 118000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.4010450840
Post ablation logit diff: 3.6946938038
Logit diff % change:      8.63%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step119000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 65.23809523809524%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 30.952380952380953%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 42.857142857142854%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 6.666666666666667%
Checkpoint 119000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.9135935307
Post ablation logit diff: 4.0135240555
Logit diff % change:      2.55%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-weight-seed2 at step120000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 65.23809523809524%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 26.190476190476193%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 42.38095238095238%
Checkpoint 120000:
Heads ablated:            [(8, 10)]
Original logit diff:      4.4822506905
Post ablation logit diff: 4.6183238029
Logit diff % change:      3.04%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step121000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 21.904761904761905%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 66.19047619047619%
Checkpoint 121000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.5944514275
Post ablation logit diff: 3.4419775009
Logit diff % change:      -4.24%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-weight-seed2 at step122000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 28.57142857142857%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 5.5 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 122000:
Heads ablated:            [(8, 10)]
Original logit diff:      4.5480418205
Post ablation logit diff: 4.2158889771
Logit diff % change

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-weight-seed2 at step123000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 60.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 123000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.6875662804
Post ablation logit diff: 3.4382033348
Logit diff % change:      -6.76%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step124000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 46.666666666666664%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 46.19047619047619%
Checkpoint 124000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.9744837284
Post ablation logit diff: 3.7679042816
Logit diff % change:      -5.20%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-weight-seed2 at step125000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 34.76190476190476%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 47.61904761904761%
Checkpoint 125000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.9687879086
Post ablation logit diff: 3.7922658920
Logit diff % change:      -4.45%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step126000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 126000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.7914445400
Post ablation logit diff: 3.7300384045
Logit diff % change:      -1.62%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-weight-seed2 at step127000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 20.952380952380953%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 127000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.3446874619
Post ablation logit diff: 3.4085471630
Logit diff % change:      1.91%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step128000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.5 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 48.57142857142857%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 47.14285714285714%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Checkpoint 128000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.7995648384
Post ablation logit diff: 3.9917380810
Logit diff % change:      5.06%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step129000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 129000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.5301592350
Post ablation logit diff: 3.7459716797
Logit diff % change:      6.11%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step130000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 41.42857142857143%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 42.38095238095238%
Checkpoint 130000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.7103049755
Post ablation logit diff: 3.8948459625
Logit diff % change:      4.97%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step131000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 43.333333333333336%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 40.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 131000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.8659989834
Post ablation logit diff: 3.9359099865
Logit diff % change:      1.81%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-weight-seed2 at step132000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 132000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.5665013790
Post ablation logit diff: 3.7362439632
Logit diff % change:      4.76%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step133000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 39.523809523809526%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 13.333333333333334%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 5.238095238095238%
Checkpoint 133000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.7026576996
Post ablation logit diff: 3.8817112446
Logit diff % change:      4.84%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step134000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 39.523809523809526%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 23.809523809523807%
Checkpoint 134000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.5571510792
Post ablation logit diff: 3.6855390072
Logit diff % change:      3.61%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step135000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 16.666666666666664%
Checkpoint 135000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.7393887043
Post ablation logit diff: 3.7103908062
Logit diff % change:      -0.78%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step136000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 136000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.6624240875
Post ablation logit diff: 3.9168386459
Logit diff % change:      6.95%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step137000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 137000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.8636553288
Post ablation logit diff: 3.9344565868
Logit diff % change:      1.83%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step138000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 138000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.9709925652
Post ablation logit diff: 4.0534920692
Logit diff % change:      2.08%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step139000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 139000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.8794176579
Post ablation logit diff: 4.1224994659
Logit diff % change:      6.27%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step140000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 140000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.7727386951
Post ablation logit diff: 3.8461897373
Logit diff % change:      1.95%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step141000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.7 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 30.476190476190478%
Checkpoint 141000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.8212573528
Post ablation logit diff: 3.8989706039
Logit diff % change:      2.03%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step142000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 142000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.6675205231
Post ablation logit diff: 3.8591077328
Logit diff % change:      5.22%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-weight-seed2 at step143000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 143000:
Heads ablated:            [(8, 10)]
Original logit diff:      3.8680002689
Post ablation logit diff: 3.8249075413
Logit diff % change:      -1.11%


In [9]:
experiment_metrics.keys()

dict_keys([4000, 5000, 6000, 7000])

## View Results

#### Pythia 160m

In [11]:
MODEL_TO_VIEW = "pythia-160m-alldropout"

In [12]:
experiment_metrics = torch.load(f'results/backup/{MODEL_TO_VIEW}/nmh_backup_metrics.pt')

In [13]:
experiment_metrics[4000].keys()

dict_keys(['logit_diff', 'per_head_logit_diffs', 'ablation_targets', 'ablated_logit_diff', 'per_head_ablated_logit_diffs', 'per_head_logit_diff_delta', 'in_circuit_head_delta', 'outside_circuit_head_delta', 'summed_in_circuit_head_delta', 'summed_outside_circuit_head_delta', 'summed_total_head_delta'])

In [14]:
summed_in_circuit_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_in_circuit_head_delta"] for checkpoint in experiment_metrics.keys()}
summed_outside_circuit_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_outside_circuit_head_delta"] for checkpoint in experiment_metrics.keys()}
summed_total_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_total_head_delta"] for checkpoint in experiment_metrics.keys()}
per_head_logit_diff_deltas = {checkpoint: experiment_metrics[checkpoint]["per_head_logit_diff_delta"] for checkpoint in experiment_metrics.keys()}
total_logit_diff_deltas = {checkpoint: experiment_metrics[checkpoint]['ablated_logit_diff'] - experiment_metrics[checkpoint]['logit_diff'] for checkpoint in experiment_metrics.keys()}

for checkpoint in experiment_metrics.keys():
    # divide by total original logit diff
    summed_in_circuit_head_deltas[checkpoint] = summed_in_circuit_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    summed_outside_circuit_head_deltas[checkpoint] = summed_outside_circuit_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    summed_total_head_deltas[checkpoint] = summed_total_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    per_head_logit_diff_deltas[checkpoint] = per_head_logit_diff_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    total_logit_diff_deltas[checkpoint] = total_logit_diff_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]

In [15]:
# plot summed_in_circuit_head_deltas with plotly express
fig = px.line(
    x=list(summed_in_circuit_head_deltas.keys()), 
    y=list(summed_in_circuit_head_deltas.values()), 
    title=f"Summed Post-NMH-Ablation In-Circuit Head Logit Diff Change Over Time ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'} 
)
fig.show()


In [16]:
# plot summed_outside_circuit_head_deltas
fig = px.line(
    x=list(summed_outside_circuit_head_deltas.keys()), 
    y=list(summed_outside_circuit_head_deltas.values()), 
    title=f"Summed Post-NMH-Ablation Outside-Circuit Head Attribution Change ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'} 
)
fig.show()

In [17]:
# plot total_head_deltas
fig = px.line(
    x=list(summed_total_head_deltas.keys()), 
    y=list(summed_total_head_deltas.values()), 
    title=f"Summed Total Post-NMH-Ablation Head Attribution Change ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'}
)

fig.show()

In [9]:
cumulative_nmhs, checkpoint_nmhs = get_past_nmhs_for_checkpoints(experiment_metrics)

In [18]:
top_backup_heads = plot_top_heads(model_name=MODEL_TO_VIEW, checkpoint_dict=per_head_logit_diff_deltas, cumulative_nmhs=cumulative_nmhs, top_k_per_checkpoint=10)

In [57]:
#per_head_logit_diff_deltas

imshow_p(
    experiment_metrics[143000]['per_head_logit_diff_delta'], #[143000],
    title="Headwise logit diff contribution, post NMH KO",
    labels={"x": "Head", "y": "Layer", "color": "Logit diff attribution"},
    #coloraxis=dict(colorbar_ticksuffix = "%"),
    border=True,
    width=600,
    margin={"r": 100, "l": 100}
)

In [58]:
experiment_metrics[143000].keys()

dict_keys(['logit_diff', 'per_head_logit_diffs', 'ablation_targets', 'ablated_logit_diff', 'per_head_ablated_logit_diffs', 'per_head_logit_diff_delta', 'in_circuit_head_delta', 'outside_circuit_head_delta', 'summed_in_circuit_head_delta', 'summed_outside_circuit_head_delta', 'summed_total_head_delta'])

In [59]:
top_backup_heads[top_backup_heads['Previous NMH']==True].head(50)

Unnamed: 0,Checkpoint,Layer-Head,Layer,Head,Value,Previous NMH,Checkpoint_sum,Value_sum,Previous NMH_sum,Top K
72,20000,Layer 9-Head 4,9,4,0.00246,True,3135000,0.334368,38,True
97,25000,Layer 9-Head 4,9,4,0.006947,True,3135000,0.334368,38,True
121,30000,Layer 9-Head 4,9,4,0.006404,True,3135000,0.334368,38,True
149,35000,Layer 9-Head 4,9,4,0.014621,True,3135000,0.334368,38,True
156,37000,Layer 9-Head 4,9,4,0.01344,True,3135000,0.334368,38,True
171,40000,Layer 9-Head 4,9,4,0.010845,True,3135000,0.334368,38,True
197,45000,Layer 9-Head 4,9,4,0.035425,True,3135000,0.334368,38,True
202,46000,Layer 9-Head 4,9,4,0.033058,True,3135000,0.334368,38,True
207,47000,Layer 9-Head 4,9,4,0.014968,True,3135000,0.334368,38,True
212,48000,Layer 9-Head 4,9,4,0.008163,True,3135000,0.334368,38,True


In [60]:
checkpoint_nmhs

{4000: set(),
 5000: set(),
 6000: set(),
 7000: set(),
 8000: {(8, 2)},
 9000: set(),
 10000: {(8, 1), (8, 10)},
 11000: {(10, 7)},
 12000: {(8, 2), (10, 7)},
 13000: {(10, 7)},
 14000: {(8, 2), (10, 7)},
 15000: {(8, 2), (10, 7)},
 16000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 17000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 18000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 19000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 20000: {(8, 2), (10, 7)},
 21000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 22000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 23000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 24000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 25000: {(8, 2), (10, 7)},
 26000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 27000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 28000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 29000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 30000: {(8, 2), (10, 7)},
 31000: {(8, 1), (8, 2), (9, 4), (10, 7)},
 32000: {(8, 1), (8

In [61]:
cumulative_nmhs

{4000: set(),
 5000: set(),
 6000: set(),
 7000: set(),
 8000: {(8, 2)},
 9000: {(8, 2)},
 10000: {(8, 1), (8, 2), (8, 10)},
 11000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 12000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 13000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 14000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 15000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 16000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 17000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 18000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 19000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 20000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 21000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 22000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 23000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 24000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 25000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 26000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 27000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 28000: {(8, 1), (8, 2), (8, 10

In [62]:
# plot number of nmhs over time
fig = px.line(
    x=list(checkpoint_nmhs.keys()), 
    y=list([len(heads) for heads in checkpoint_nmhs.values()]), 
    title=f"Number of NMHs Over Time ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Number of NMHs'}
)
fig.show()