## Setup

### Imports

In [1]:
import os
import json
import glob
import torch
import re
import einops
import pandas as pd
from functools import partial
from torch import Tensor
from torchtyping import TensorType as TT


import plotly.express as px

from utils.data_utils import generate_data_and_caches
from utils.data_processing import (
    load_edge_scores_into_dictionary,
)
from utils.visualization import plot_attention_heads, imshow_p
from utils.backup_analysis import (
    load_model,
    run_iteration,
    process_backup_results,
    get_past_nmhs_for_checkpoints,
    plot_top_heads
)

### Parameters

In [2]:
TASK = 'ioi'
PERFORMANCE_METRIC = 'logit_diff'
BASE_MODEL = "pythia-160m"
VARIANT = 'pythia-160m-alldropout'
CACHE = "model_cache"
IOI_DATASET_SIZE = 70

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f4c6c9c5120>

### Functions

## Experiments

### Experiment Parameters

In [3]:
TASK = 'ioi'
PERFORMANCE_METRIC = 'logit_diff'
BASE_MODEL = "pythia-160m"
VARIANT = "EleutherAI/pythia-160m-seed1"
MODEL_SHORTNAME = BASE_MODEL if not VARIANT else VARIANT[11:]
CACHE = "model_cache"
IOI_DATASET_SIZE = 70
COPY_SCORE_THRESHOLD = 75.0

### Circuit Data

In [4]:
folder_path = f'results/graphs/{MODEL_SHORTNAME}/{TASK}'
df = load_edge_scores_into_dictionary(folder_path)

# filter everything before 1000 steps
df = df[df['checkpoint'] >= 1000]

df[['source', 'target']] = df['edge'].str.split('->', expand=True)
len(df['target'].unique())

Processing file 1/140: results/graphs/pythia-160m-seed1/ioi/57000.json
Processing file 2/140: results/graphs/pythia-160m-seed1/ioi/141000.json
Processing file 3/140: results/graphs/pythia-160m-seed1/ioi/95000.json
Processing file 4/140: results/graphs/pythia-160m-seed1/ioi/107000.json
Processing file 5/140: results/graphs/pythia-160m-seed1/ioi/34000.json
Processing file 6/140: results/graphs/pythia-160m-seed1/ioi/6000.json
Processing file 7/140: results/graphs/pythia-160m-seed1/ioi/37000.json
Processing file 8/140: results/graphs/pythia-160m-seed1/ioi/39000.json
Processing file 9/140: results/graphs/pythia-160m-seed1/ioi/104000.json
Processing file 10/140: results/graphs/pythia-160m-seed1/ioi/59000.json
Processing file 11/140: results/graphs/pythia-160m-seed1/ioi/67000.json
Processing file 12/140: results/graphs/pythia-160m-seed1/ioi/111000.json
Processing file 13/140: results/graphs/pythia-160m-seed1/ioi/76000.json
Processing file 14/140: results/graphs/pythia-160m-seed1/ioi/5000.json

445

### Dataset Setup

In [5]:
initial_model = load_model(BASE_MODEL, VARIANT, 143000, CACHE, device)
size=70
ioi_dataset, abc_dataset = generate_data_and_caches(initial_model, size, verbose=True)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step143000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer


In [6]:
# imshow_p(
#     per_head_ablated_logit_diffs,
#     title="Headwise logit diff contribution, post NMH KO",
#     labels={"x": "Head", "y": "Layer", "color": "Logit diff attribution"},
#     #coloraxis=dict(colorbar_ticksuffix = "%"),
#     border=True,
#     width=600,
#     margin={"r": 100, "l": 100}
# )

### Run Experiment

In [10]:
experiment_metrics = dict()
# create folder
os.makedirs(f'results/backup/{MODEL_SHORTNAME}', exist_ok=True)

for checkpoint in range(4000, 144000, 1000):

    experiment_metrics = run_iteration(
        BASE_MODEL, VARIANT, df, checkpoint=checkpoint, dataset=ioi_dataset, experiment_metrics=experiment_metrics, 
        threshold=COPY_SCORE_THRESHOLD
    )
    experiment_metrics = process_backup_results(df, checkpoint, experiment_metrics)

    # save to file, using pytorch format
    torch.save(experiment_metrics, f'results/backup/{MODEL_SHORTNAME}/nmh_backup_metrics.pt')

Loaded model EleutherAI/pythia-160m-seed1 at step4000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Checkpoint 4000:
Heads ablated:            []
Original logit diff:      0.4883192480
Post ablation logit diff: 0.4883192480
Logit diff % change:      0.00%
Loaded model EleutherAI/pythia-160m-seed1 at step5000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 5000:
Heads ablated:            [(6, 5)]
Original logit diff:      1.3061420918
Post ablation logit diff: 1.2755876780
Logit diff % change:      -2.34%
Loaded model EleutherAI/pythia-160m-seed1 at step6000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 11.2 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 47.14285714285714%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 6000:
Heads ablated:            [(9, 4), (6, 5)]
Original logit diff:      1.6456636190
Post ablation logit diff: 1.6411461830
Logit diff % change:      -0.27%
Loaded model EleutherAI/pythia-160m-seed1 at step7000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 37.142857142857146%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 11.2 (sign=1) : Top 5 accuracy: 26.190476190476193%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 46.19047619047619%
Copy circuit for head 6.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 79.52380952380952%
Checkpoint 7000:
Heads ablated:            [(9, 4), (7, 9), (7, 7), (6, 9), (6, 5), (6, 2)]
Or

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 6.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 11.2 (sign=1) : Top 5 accuracy: 30.952380952380953%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Checkpoint 8000:
Heads ablated:            [(6, 2), (6, 5), (6, 9), (7, 7), (7, 9)]
Original logit diff:      2.1917302608
Post ablation logit diff: 2.1420717239
Log

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step9000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 31.9047619047619%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 45.714285714285715%
Copy circuit for head 11.2 (sign=1) : Top 5 accuracy: 37.142857142857146%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 6.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 90.47619047619048%
Checkpoint 9000:
Heads ablated:            [(7, 9), (7, 7), (6, 9), (6, 5), (6, 2)]
Original 

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step10000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 29.04761904761905%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 27.61904761904762%
Copy circuit for head 11.2 (sign=1) : Top 5 accuracy: 33.80952380952381%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 44.285714285714285%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 92.85714285714286%
Checkpoin

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step11000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 28.095238095238095%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 44.761904761904766%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 11000:
Heads ablated:            [(7, 7), (9, 4), (6, 5)]
Original logit diff:      2.5621244907
Post ablation logit diff: 2.2477440834
Logit diff % change:      -12.27%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step12000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 26.190476190476193%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 26.666666666666668%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 43.333333333333336%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 78.57142857142857%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 12000:
Heads ablated:            [(9, 4), (7, 7), (6, 5)]
Original logit diff:      2.6678431034
Post ablation logit diff: 2.3599007130
Logit diff % change:      -11.54%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step13000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 11.2 (sign=1) : Top 5 accuracy: 36.666666666666664%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 38.57142857142858%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 19.523809523809526%
Checkpoint 13000:
Heads ablated:            [(6, 2), (6, 5), (7, 9), (7, 7), (9, 4)]
Original logit diff:      2.4753828049
Post ablation logit diff: 1.98

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step14000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 41.904761904761905%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 15.714285714285714%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 78.57142857142857%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 14000:
Heads ablated:            [(6, 2), (6, 5), (7, 7), (9, 4)]
Original logit diff:      2.9184849262
Post ablation logit diff: 2.5127847195
Logit diff % change:      -13.90%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step15000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 42.857142857142854%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 97.61904761904762%
Checkpoint 15000:
Heads ablated:            [(7, 9), (9, 4)]
Original logit diff:      2.8664309978
Post ablation logit diff: 2.3307163715
Logit diff % change:      -18.69%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step16000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 15.714285714285714%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 35.23809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Checkpoint 16000:
Heads ablated:            [(6, 2), (7, 7), (9, 4)]
Original logit diff:      3.0151734352
Post ablation logit diff: 2.3903312683
Logit diff % change:      -20.72%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step17000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 36.19047619047619%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 15.714285714285714%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.857142857142858%
Checkpoint 17000:
Heads ablated:            [(6, 2), (9, 4), (7, 7), 

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step18000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 70.47619047619048%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 98.57142857142858%
Checkpoint 18000:
Heads ablated:            [(7, 9), (9, 4), (6, 2)]
Original logit diff:      3.1994860172
Post ablation logit diff: 2.5466

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step19000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 26.666666666666668%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 19.047619047619047%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 28.095238095238095%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 12.857142857142856%
Checkpoint 19000:
Heads ablated:            [(9, 4), (7, 9)]
Original logit diff:      3.5188028812
Post ablation logit diff: 2.8616716862
Logit diff % change:      -18.67%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step20000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 22.857142857142858%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 68.57142857142857%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Checkpoint 20000:
Heads ablated:            [(6, 2), (7, 9), (9, 4)]
Original logit diff:      3.8367567062
Post ablation logit diff: 2

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step21000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 28.095238095238095%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 23.809523809523807%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Checkpoint 21000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.2442209721
Post ablation logit diff: 2.5282549858
Logit diff % change:      -22.07%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step22000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.809523809523807%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 44.761904761904766%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 34.285714285714285%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 26.666666666666668%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 94.285

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step23000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Checkpoint 23000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.8865993023
Post ablation logit diff: 3.0545184612
Logit diff % change:      -21.41%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step24000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 15.714285714285714%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 54.285714285714285%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 28.57142857142857%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 21.428571428571427%
Checkpoint 24000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.4092087746
Post ablation logit diff: 2.7277810574
Logit diff % change:      -19.99%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step25000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.380952380952383%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 26.666666666666668%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 16.666666666666664%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 52.85714285714286%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 13.80952380952381%
Checkpoint 25000:
Heads ablated:            [(9, 4), (7, 9)]
Original logit diff:      3.4135973454
Post ablation logit diff: 2.5710132122
Logit diff % change:      -24.68%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step26000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 53.333333333333336%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 16.666666666666664%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 29.523809523809526%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.0%
Checkpoint 26000:
Heads ablated:            [(7, 9), (9, 4)]
Original logit diff:      3.43000

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step27000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 53.333333333333336%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 11.428571428571429%
Checkpoint 27000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.9375889301
Post ablation logit diff: 3.0321161747
Logit diff % change:      -23.00%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step28000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.2 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 56.19047619047619%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 28.57142857142857%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 25.238095238095237%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.809523809523807%
Copy circ

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step29000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 26.190476190476193%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 56.666666666666664%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 97.61904761904762%
Checkpoint 29000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.8444402218
Post ablation logit diff: 2.9600272179
Logit diff % change:      -23.00%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step30000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 29.523809523809526%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 25.238095238095237%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 60.952380952380956%
Checkpoint 30000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.6603560448
Post ablation logit diff: 2.8263506889
Logit diff % change:      -22.78%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step31000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 32.38095238095238%
Checkpoint 31000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.3969445229
Post ablation logit diff: 2.5804700851
Logit diff % change:      -24.04%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step32000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 16.666666666666664%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 34.285714285714285%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 29.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.857142857142858%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 32000:
Heads ablated:            [(7, 9), (9, 4)]
Original logit diff:      3.9896893501
Post ablation logit diff: 2.9204747677


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step33000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 28.095238095238095%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 33000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.2891156673
Post ablation logit diff: 2.5718388557
Logit diff % change:      -21.81%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step34000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.380952380952383%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.714285714285714%
Checkpoint 34000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.0030670166
Post ablation logit diff: 2.9906995296
Logit diff % change:      -25.29%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step35000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 29.523809523809526%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.857142857142858%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 35000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.5902304649
Post ablation logit diff: 2.7152640820
Logit diff % change:      -24.37%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step36000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 28.57142857142857%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 8.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.857142857142858%
Checkpoint 36000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.5038461685
Post ablation logit diff: 2.6678073406
Logit diff % change:      -23.86%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step37000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 21.428571428571427%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 20.476190476190474%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 11.904761904761903%
Checkpoint 37000:
Heads ablated:            [(9, 4), (7, 9)]
Original logit diff:      3.9260063171
Post ablation logit diff: 2.9072592258
Logit diff % change:      -25.95%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step38000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 29.04761904761905%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 20.476190476190474%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.857142857142858%
Checkpoint 38000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.7442700863
Post ablation logit diff: 2.8718061447
Logit diff % change:      -23.30%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step39000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 25.71428571428571%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 70.47619047619048%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 29.523809523809526%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Checkpoint 39000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.9355041981
Post ablation logit diff: 2.9590353966
Logit diff % change:      -24.81%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step40000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 67.61904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 40000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.2015888691
Post ablation logit diff: 2.4716193676
Logit diff % change:      -22.80%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step41000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.809523809523807%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.952380952380953%
Checkpoint 41000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.6185610294
Post ablation logit diff: 2.6267118454
Logit diff % change:      -27.41%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step42000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.857142857142858%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 97.61904761904762%
Checkpoint 42000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.5045366287
Post ablation logit diff: 2.4332091808
Logit diff % change:      -30.57%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step43000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 28.095238095238095%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 20.952380952380953%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 21.428571428571427%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.71428571428572%
Checkpoint 43000:
Heads ablated:            [(10, 7), (7, 9), (9, 4)]
Original logit diff:      4.0419116020
Post ablation logit diff: 3.0659754276
Logit diff % change:      -24.15%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step44000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 97.61904761904762%
Checkpoint 44000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.6379678249
Post ablation logit diff: 2.7220175266
Logit diff % change:      -25.18%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step45000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.476190476190478%
Checkpoint 45000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.6509871483
Post ablation logit diff: 2.6584887505
Logit diff % change:      -27.18%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step46000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.952380952380953%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 20.952380952380953%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 74.28571428571429%
Checkpoint 46000:
Heads ablated:            [(9, 4), (7, 9)]
Original logit diff:      4.1936593056
Post ablation logit diff: 3.0248901844
Logit diff % change:      -27.87%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step47000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.952380952380953%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 47000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.1424846649
Post ablation logit diff: 3.0635030270
Logit diff % change:      -26.05%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step48000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 48000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.4861955643
Post ablation logit diff: 2.5028870106
Logit diff % change:      -28.21%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step49000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.857142857142858%
Checkpoint 49000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.0814199448
Post ablation logit diff: 2.9077925682
Logit diff % change:      -28.76%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step50000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 79.52380952380952%
Checkpoint 50000:
Heads ablated:            [(9, 4), (10, 7)]
Original logit diff:      3.6973965168
Post ablation logit diff: 2.6896388531
Logit diff % change:      -27.26%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step51000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.380952380952383%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 79.52380952380952%
Checkpoint 51000:
Heads ablated:            [(9, 4), (10, 7)]
Original logit diff:      3.2232468128
Post ablation logit diff: 2.5220363140
Logit diff % change:      -21.75%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step52000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 77.14285714285715%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 20.952380952380953%
Checkpoint 52000:
Heads ablated:            [(10, 7), (9, 4)]
Original logit diff:      4.0100221634
Post ablation logit diff: 2.9494888783
Logit diff % change:      -26.45%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step53000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 32.38095238095238%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 77.61904761904762%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 21.904761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 21.904761904761905%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 53000:
Heads ablated:            [(10, 7), (7, 9), (9, 4)]
Original logit diff:      4.2180585861
Post ablation logit diff: 3.1862819195
Logit diff % change:      -24.46%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step54000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.380952380952383%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 76.66666666666667%
Checkpoint 54000:
Heads ablated:            [(9, 4), (10, 7)]
Original logit diff:      4.1220326424
Post ablation logit diff: 3.1170897484
Logit diff % change:      -24.38%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step55000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 20.476190476190474%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 78.0952380952381%
Checkpoint 55000:
Heads ablated:            [(9, 4), (10, 7)]
Original logit diff:      3.6567335129
Post ablation logit diff: 2.7164523602
Logit diff % change:      -25.71%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step56000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 21.428571428571427%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 78.57142857142857%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 35.714285714285715%
Checkpoint 56000:
Heads ablated:            [(9, 4), (10, 7)]
Original logit diff:      3.6387138367
Post ablation logit diff: 2.5444419384
Logit diff % change:      -30.07%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step57000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 77.14285714285715%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.952380952380953%
Checkpoint 57000:
Heads ablated:            [(9, 4), (10, 7)]
Original logit diff:      3.9495539665
Post ablation logit diff: 3.0349915028
Logit diff % change:      -23.16%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step58000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 77.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 58000:
Heads ablated:            [(10, 7), (9, 4)]
Original logit diff:      3.7310407162
Post ablation logit diff: 2.8688542843
Logit diff % change:      -23.11%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step59000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 21.904761904761905%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 59000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.0946054459
Post ablation logit diff: 3.0021619797
Logit diff % change:      -26.68%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step60000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 20.0%
Checkpoint 60000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.9870021343
Post ablation logit diff: 2.7961189747
Logit diff % change:      -29.87%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step61000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 19.523809523809526%
Checkpoint 61000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.5288734436
Post ablation logit diff: 3.2354993820
Logit diff % change:      -28.56%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step62000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 18.571428571428573%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 62000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.3536715508
Post ablation logit diff: 3.1797568798
Logit diff % change:      -26.96%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step63000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 20.952380952380953%
Checkpoint 63000:
Heads ablated:            [(10, 7), (9, 4)]
Original logit diff:      3.9957475662
Post ablation logit diff: 3.0076088905
Logit diff % change:      -24.73%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step64000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 64000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.0706906319
Post ablation logit diff: 2.8701951504
Logit diff % change:      -29.49%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step65000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 21.904761904761905%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.952380952380953%
Checkpoint 65000:
Heads ablated:            [(9, 4), (7, 9), (10, 7)]
Original logit diff:      3.8199245930
Post ablation logit diff: 2.9957001209
Logit diff % change:      -21.58%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step66000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 19.047619047619047%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 78.57142857142857%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 33.80952380952381%
Checkpoint 66000:
Heads ablated:            [(9, 4), (10, 7)]
Original logit diff:      4.2061839104
Post ablation logit diff: 3.2141239643
Logit diff % change:      -23.59%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step67000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 78.57142857142857%
Checkpoint 67000:
Heads ablated:            [(9, 4), (10, 7)]
Original logit diff:      3.7779767513
Post ablation logit diff: 2.9182565212
Logit diff % change:      -22.76%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step68000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 16.666666666666664%
Checkpoint 68000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.8012237549
Post ablation logit diff: 2.6346006393
Logit diff % change:      -30.69%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step69000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.09523809523809%
Checkpoint 69000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.9573757648
Post ablation logit diff: 2.7337894440
Logit diff % change:      -30.92%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step70000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 29.523809523809526%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.380952380952383%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Checkpoint 70000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.5520052910
Post ablation logit diff: 3.2888369560
Logit diff % change:      -27.75%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step71000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 26.190476190476193%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.809523809523807%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Checkpoint 71000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.3181972504
Post ablation logit diff: 2.9645009041
Logit diff % change:      -31.35%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step72000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 24.761904761904763%
Checkpoint 72000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.2969908714
Post ablation logit diff: 2.9704041481
Logit diff % change:      -30.87%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step73000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 29.523809523809526%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 97.61904761904762%
Checkpoint 73000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.1196913719
Post ablation logit diff: 2.8461194038
Logit diff % change:      -30.91%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step74000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 27.142857142857142%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 31.9047619047619%
Checkpoint 74000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.7691187859
Post ablation logit diff: 3.5346071720
Logit diff % change:      -25.89%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step75000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.476190476190478%
Checkpoint 75000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.2502665520
Post ablation logit diff: 3.1139261723
Logit diff % change:      -26.74%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step76000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 29.523809523809526%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.476190476190478%
Checkpoint 76000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.6031956673
Post ablation logit diff: 3.3143250942
Logit diff % change:      -28.00%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step77000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 35.714285714285715%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 29.04761904761905%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 68.0952380952381%
Checkpoint 77000:
Heads ablated:            [(9, 4), (7, 9)]
Original logit diff:      4.2473039627
Post ablation logit diff: 3.4592707157
Logit diff % change:      -18.55%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step78000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.476190476190478%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 96.66666666666667%
Checkpoint 78000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.9035549164
Post ablation logit diff: 2.9668130875
Logit diff % change:      -24.00%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step79000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 36.19047619047619%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.476190476190478%
Checkpoint 79000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.6397361755
Post ablation logit diff: 3.7240126133
Logit diff % change:      -19.74%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step80000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 39.04761904761905%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 32.38095238095238%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 41.42857142857143%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 61.904761904761905%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 4.761904761904762%
Checkpoint 80000:
Heads ablated:            [(9, 4), (7, 9)]
Original

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step81000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 36.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 37.142857142857146%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Checkpoint 81000:
Heads ablated:            [(7, 9), (9, 4)]
Original logit diff:      4.1107807159
Post ablation logit diff: 3.6432967186
Logit diff % change:      -11.37%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step82000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 36.666666666666664%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 39.04761904761905%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Checkpoint 82000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.1647262573
Post ablation logit diff: 3.4556372166
Logit diff % change:      -17.03%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step83000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 37.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 61.904761904761905%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 37.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Checkpoint 83000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.9369411469
Post ablation logit diff: 3.1750335693
Logit diff % change:      -19.35%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step84000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 37.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 34.285714285714285%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 18.571428571428573%
Checkpoint 84000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.0487155914
Post ablation logit diff: 3.3913390636
Logit diff % change:      -16.24%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step85000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 36.19047619047619%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 39.04761904761905%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Checkpoint 85000:
Heads ablated:            [(7, 9), (9, 4)]
Original logit diff:      4.0690698624
Post ablation logit diff: 3.7052295208
Logit diff % change:      -8.94%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step86000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 37.142857142857146%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 38.095238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.238095238095238%
Checkpoint 86000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.2123036385
Post ablation logit diff: 3.4532034397
Logit diff % change:      -18.02%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step87000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 33.33333333333333%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 40.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 64.28571428571429%
Copy 

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step88000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 38.57142857142858%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 30.952380952380953%
Checkpoint 88000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.4651060104
Post ablation logit diff: 3.5784504414
Logit diff % change:      -19.86%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step89000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 39.523809523809526%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Checkpoint 89000:
Heads ablated:            [(7, 9), (9, 4)]
Original logit diff:      4.4992170334
Post a

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step90000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 36.666666666666664%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 64.28571428571429%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit 

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step91000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 28.095238095238095%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 8.10 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 30.476190476190478%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Cop

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step92000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 26.666666666666668%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 8.095238095238095%
C

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step93000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 21.428571428571427%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 63.8095238095238%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 15.714285714285714%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Co

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step94000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 21.428571428571427%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Checkpoint 94000:
Heads ablated:            [(7, 9), (9, 4)]
Original logit diff:      3.7619745731
Post ablation logit diff: 3.7470912933
Logit diff % change:      -0.40%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step95000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 20.952380952380953%
Checkpoint 95000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.5374121666
Post ablation logit diff: 3.6587786674
Logit diff % change:      -19.36%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step96000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 60.952380952380956%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 12.857142857142856%
Checkpoint 96000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.3737378120
Post ablation logit diff: 3.6137661934
Logit diff % change:      -17.38%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step97000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.380952380952383%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Checkpoint 97000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.8905107975
Post ablation logit diff: 3.2574594021
Logit diff % change:      -16.27%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step98000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.857142857142858%
Checkpoint 98000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.1684527397
Post ablation logit diff: 3.4029834270
Logit diff % change:      -18.36%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step99000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 16.666666666666664%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 63.33333333333333%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 22.380952380952383%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 89.04761904761904%
Checkpoint 99000:
Hea

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step100000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 20.952380952380953%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 16.19047619047619%
Checkpoint 100000:
Heads ablated:            [(9, 4)]
Original logit diff:      3.9002842903
Post ablation logit diff: 3.1865835190
Logit diff % change:      -18.30%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step101000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 83.80952380952381%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 15.714285714285714%
Checkpoint 101000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.3399715424
Post ablation logit diff: 3.5631897449
Logit diff % change:      -17.90%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step102000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 60.952380952380956%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 82.38095238095238%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 8.571428571428571%
Checkpoint 102000:
Heads ablated:            [(6, 5), (9, 11), (7, 9), (9, 4)]
Original logit diff:      4.6

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step103000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 81.9047619047619%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 103000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.0318021774
Post ablation logit diff: 3.3574888706
Logit diff % change:      -16.72%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step104000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 78.57142857142857%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 20.476190476190474%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Checkpoint 104000:
Heads ablated:            [(9, 4), (7, 9)]
Original logit diff:      4.0640726089
Post ablation logit diff: 4.1390528679
Logit diff % change:      1.84%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step105000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 105000:
Heads ablated:            [(7, 9), (9, 11), (6, 5)]
Original logit diff:   

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step106000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 106000:
Heads ablated:            [(7, 9), (9, 11), (6, 5)]
Original logit diff:      4.0057411194
Post ablation logit diff: 4.9847536087
Logit diff % cha

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step107000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 69.52380952380952%
Ch

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step108000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 67.61904761904762%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Checkpoint 108000:
Heads ablated:            [(6, 5), (7, 9)]
Original lo

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step109000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 55.714285714285715%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 65.71428571428571%
Checkpoint 109000:
Heads ablated:            [(6, 5), (9, 11), (7, 9)]
Original logit diff:      4.1018290520
Post ablation logit diff: 4.6790719032
Logit diff % change:      14.07%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step110000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 67.61904761904762%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 54.285714285714285%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 110000:
Heads ablated:            [(7, 9), (9, 11), (6, 5)]
Original logit diff:      4.4156990051
Post ablation logit diff: 5.0655627251
Logit diff % change:      14.72%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step111000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 70.0%
Copy circuit for head 8.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 111000:
Heads ablated:            [(6, 5), (9, 11), (7, 9)]
Original logit diff:      4.5978817940
Post ablation logit diff: 4.7608127594
Logit diff % change:      3.54%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step112000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 60.952380952380956%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 74.28571428571429%
Checkpoint 112000:
Heads ablated:            [(6, 5), (9, 11), (7, 9)]
Original logit diff:      4.3038253784
Post ablation logit diff: 4.4980916977
Logit diff % change:      4.51%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step113000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 81.42857142857143%
Checkpoint 113000:
Heads ablated:            [(7, 9), (9, 4)]
Original logit diff:      4.1878333092
Post ablation logit diff: 3.9840052128
Logit diff % change:      -4.87%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step114000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 13.333333333333334%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 81.42857142857143%
Checkpoint 114000:
Heads ablated:            [(6, 5), (9, 11), (7, 9), (9, 4)]
Original logit diff:      4.0382838249
Post ablation logit diff: 3.8254237175
Logit diff % change:      -5.27%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step115000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 83.80952380952381%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 21.904761904761905%
Checkpoint 115000:
Heads ablated:            [(6, 5), (9, 4), (7, 9)]
Original logit diff:      4.2476220131
Post ablation logit diff: 4.1571927071
Logit diff % change:      -2.13%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step116000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 64.28571428571429%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 116000:
Heads ablated:            [(7, 9), (9, 4), (9, 11), (6, 5)]
Original logit diff:      4.2377133369
Post ablation logit diff: 3.9852373600
Logit di

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step117000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 21.904761904761905%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 15.714285714285714%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 117000:
Heads ablated:            [(9, 4), (7, 9), (9, 11), (6, 5)]
Original logit diff:     

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step118000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 63.33333333333333%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 20.476190476190474%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 82.38095238095238%
Checkpoint 118000:
Heads ablated:            [(6, 5), (9, 11), (7, 9), (9, 4)]
Original logit diff:      4.8152790070
Post ablation logit diff: 4.0367293358
Logit diff % change:      -16.17%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step119000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 83.80952380952381%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 20.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 60.476190476190474%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 119000:
Heads ablated:      

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step120000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Checkpoint 120000:
Heads ablated:            [(9, 4), (7, 9)]
Original logit diff:      4.0286650658
Post ablation logit diff: 3.7592551708
Logit diff % change:      -6.69%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step121000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 82.38095238095238%
Checkpoint 121000:
Heads ablated:            [(6, 5), (9, 11), (7, 9), (9, 4)]
Original logit diff:      4.9286441803
Post ablation logit diff: 4.4905133247
Logit diff % change:      -8.89%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step122000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 17.142857142857142%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 57.61904761904761%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 122000:
Heads ablated:            [(9, 4), (7, 9), (6, 5)]
Original logit diff:      4.4834618568
Post ablation logit diff: 4.2635087967
Logit diff % change:      -4.91%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step123000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 12.857142857142856%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 56.666666666666664%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step124000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 52.38095238095239%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 79.52380952380952%
C

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step125000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 77.61904761904762%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 16.19047619047619%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 50.95238095238095%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.4761

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step126000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 47.14285714285714%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 126000:
Heads ablated:            [(9, 4), (7, 9), (6, 5)]
Original logit diff:      3.7175040245
Post ablation logit diff: 3.643748760

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step127000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 76.66666666666667%
Checkpoint 127000:
Heads ablated:            [(9, 4)]
Original logit diff:      4.0880470276
Post ablation logit diff: 3.6595773697
Logit diff % change:      -10.48%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step128000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 42.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 10.7 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 128000:
Heads ablated:            [(7, 9), (6, 5)]
Original logit diff:      4.1163740158
Post ablation logit diff: 4.5164976120
Logit diff % change:      9.72%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step129000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 37.61904761904762%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 15.238095238095239%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 70.95238095238095%
Checkpoint 129000:
Heads ablated:            [(6, 5), (7, 9)]
Original logit diff:      4.1987295151
Post ablation logit diff: 4.6413297653
Logit diff % change:      10.54%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step130000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 12.380952380952381%
Checkpoint 130000:
Heads ablated:            [(7, 9)]
Original logit diff:      4.1157593727
Post ablation logit diff: 4.4240784645
Logit diff % change:      7.49%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step131000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 70.0%
Checkpoint 131000:
Heads ablated:            []
Original logit diff:      3.8637290001
Post ablation logit diff: 3.8637290001
Logit diff % change:      0.00%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step132000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 132000:
Heads ablated:            [(7, 9), (6, 5)]
Original logit diff:      3.7537474632
Post ablation logit diff: 3.9796819687
Logit diff % change:      6.02%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step133000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 67.61904761904762%
Checkpoint 133000:
Heads ablated:            [(6, 5)]
Original logit diff:      3.9673488140
Post ablation logit diff: 4.2723121643
Logit diff % change:      7.69%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step134000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 10.952380952380953%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 12.380952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 68.57142857142857%
Checkpoint 134000:
Heads ablated:            [(6, 5), (7, 9)]
Original logit diff:      3.4627699852
Post ablation logit diff: 3.8828229904
Logit diff % change:      12.13%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step135000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 66.66666666666666%
Checkpoint 135000:
Heads ablated:            []
Original logit diff:      3.7677934170
Post ablation logit diff: 3.7677934170
Logit diff % change:      0.00%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step136000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 136000:
Heads ablated:            [(6, 5)]
Original logit diff:      3.7300231457
Post ablation logit diff: 4.0226554871
Logit diff % change:      7.85%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step137000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 10.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 66.66666666666666%
Checkpoint 137000:
Heads ablated:            [(6, 5), (7, 9)]
Original logit diff:      3.9361040592
Post ablation logit diff: 4.4704632759
Logit diff % change:      13.58%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step138000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 6.7 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 11.904761904761903%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 64.76190476190476%
Checkpoint 138000:
Heads ablated:            [(6, 5), (7, 9)]
Original logit diff:      3.7484784126
Post ablation logit diff: 4.2262396812
Logit diff % change:      12.75%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step139000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 9.523809523809524%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 66.19047619047619%
Checkpoint 139000:
Heads ablated:            [(6, 5)]
Original logit diff:      3.5742802620
Post ablation logit diff: 3.8744885921
Logit diff % change:      8.40%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Loaded model EleutherAI/pythia-160m-seed1 at step140000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 8.095238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 140000:
Heads ablated:            [(7, 9), (6, 5)]
Original logit diff:      3.9236083031
Post ablation logit diff: 4.4777450562
Logit diff % change:      14.12%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step141000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 10.476190476190476%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 141000:
Heads ablated:            [(6, 5)]
Original logit diff:      4.0417561531
Post ablation logit diff: 4.3179554939
Logit diff % change:      6.83%


pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step142000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 7.142857142857142%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 62.857142857142854%
Checkpoint 142000:
Heads ablated:            [(6, 5), (7, 9)]
Original logit diff:      3.8105905056
Post ablation logit diff: 4.3644423485
Logit d

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-seed1 at step143000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 6.5 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 10.11 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 7.7 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.0 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.1 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 60.0%
Checkpoint 143000:
Heads ablated:            [(6, 5), (7, 9)]
Original logit diff:      3.9022033215
Post ablation logit diff: 4.2479395866
Logit diff % change:      8.86%


: 

In [9]:
experiment_metrics.keys()

dict_keys([4000, 5000, 6000, 7000])

## View Results

#### Pythia 160m

In [25]:
MODEL_TO_VIEW = "pythia-160m-hiddendropout"

In [26]:
experiment_metrics = torch.load(f'results/backup/{MODEL_TO_VIEW}/nmh_backup_metrics.pt')

In [27]:
experiment_metrics[4000].keys()

dict_keys(['logit_diff', 'per_head_logit_diffs', 'ablation_targets', 'ablated_logit_diff', 'per_head_ablated_logit_diffs', 'per_head_logit_diff_delta', 'in_circuit_head_delta', 'outside_circuit_head_delta', 'summed_in_circuit_head_delta', 'summed_outside_circuit_head_delta', 'summed_total_head_delta'])

In [28]:
summed_in_circuit_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_in_circuit_head_delta"] for checkpoint in experiment_metrics.keys()}
summed_outside_circuit_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_outside_circuit_head_delta"] for checkpoint in experiment_metrics.keys()}
summed_total_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_total_head_delta"] for checkpoint in experiment_metrics.keys()}
per_head_logit_diff_deltas = {checkpoint: experiment_metrics[checkpoint]["per_head_logit_diff_delta"] for checkpoint in experiment_metrics.keys()}
total_logit_diff_deltas = {checkpoint: experiment_metrics[checkpoint]['ablated_logit_diff'] - experiment_metrics[checkpoint]['logit_diff'] for checkpoint in experiment_metrics.keys()}

for checkpoint in experiment_metrics.keys():
    # divide by total original logit diff
    summed_in_circuit_head_deltas[checkpoint] = summed_in_circuit_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    summed_outside_circuit_head_deltas[checkpoint] = summed_outside_circuit_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    summed_total_head_deltas[checkpoint] = summed_total_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    per_head_logit_diff_deltas[checkpoint] = per_head_logit_diff_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    total_logit_diff_deltas[checkpoint] = total_logit_diff_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]

In [29]:
# plot summed_in_circuit_head_deltas with plotly express
fig = px.line(
    x=list(summed_in_circuit_head_deltas.keys()), 
    y=list(summed_in_circuit_head_deltas.values()), 
    title=f"Summed Post-NMH-Ablation In-Circuit Head Logit Diff Change Over Time ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'} 
)
fig.show()


In [30]:
# plot summed_outside_circuit_head_deltas
fig = px.line(
    x=list(summed_outside_circuit_head_deltas.keys()), 
    y=list(summed_outside_circuit_head_deltas.values()), 
    title=f"Summed Post-NMH-Ablation Outside-Circuit Head Attribution Change ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'} 
)
fig.show()

In [31]:
# plot total_head_deltas
fig = px.line(
    x=list(summed_total_head_deltas.keys()), 
    y=list(summed_total_head_deltas.values()), 
    title=f"Summed Total Post-NMH-Ablation Head Attribution Change ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'}
)

fig.show()

In [32]:
cumulative_nmhs, checkpoint_nmhs = get_past_nmhs_for_checkpoints(experiment_metrics)

In [33]:
top_backup_heads = plot_top_heads(model_name=MODEL_TO_VIEW, checkpoint_dict=per_head_logit_diff_deltas, cumulative_nmhs=cumulative_nmhs, top_k_per_checkpoint=10)

In [57]:
#per_head_logit_diff_deltas

imshow_p(
    experiment_metrics[143000]['per_head_logit_diff_delta'], #[143000],
    title="Headwise logit diff contribution, post NMH KO",
    labels={"x": "Head", "y": "Layer", "color": "Logit diff attribution"},
    #coloraxis=dict(colorbar_ticksuffix = "%"),
    border=True,
    width=600,
    margin={"r": 100, "l": 100}
)

In [58]:
experiment_metrics[143000].keys()

dict_keys(['logit_diff', 'per_head_logit_diffs', 'ablation_targets', 'ablated_logit_diff', 'per_head_ablated_logit_diffs', 'per_head_logit_diff_delta', 'in_circuit_head_delta', 'outside_circuit_head_delta', 'summed_in_circuit_head_delta', 'summed_outside_circuit_head_delta', 'summed_total_head_delta'])

In [59]:
top_backup_heads[top_backup_heads['Previous NMH']==True].head(50)

Unnamed: 0,Checkpoint,Layer-Head,Layer,Head,Value,Previous NMH,Checkpoint_sum,Value_sum,Previous NMH_sum,Top K
72,20000,Layer 9-Head 4,9,4,0.00246,True,3135000,0.334368,38,True
97,25000,Layer 9-Head 4,9,4,0.006947,True,3135000,0.334368,38,True
121,30000,Layer 9-Head 4,9,4,0.006404,True,3135000,0.334368,38,True
149,35000,Layer 9-Head 4,9,4,0.014621,True,3135000,0.334368,38,True
156,37000,Layer 9-Head 4,9,4,0.01344,True,3135000,0.334368,38,True
171,40000,Layer 9-Head 4,9,4,0.010845,True,3135000,0.334368,38,True
197,45000,Layer 9-Head 4,9,4,0.035425,True,3135000,0.334368,38,True
202,46000,Layer 9-Head 4,9,4,0.033058,True,3135000,0.334368,38,True
207,47000,Layer 9-Head 4,9,4,0.014968,True,3135000,0.334368,38,True
212,48000,Layer 9-Head 4,9,4,0.008163,True,3135000,0.334368,38,True


In [60]:
checkpoint_nmhs

{4000: set(),
 5000: set(),
 6000: set(),
 7000: set(),
 8000: {(8, 2)},
 9000: set(),
 10000: {(8, 1), (8, 10)},
 11000: {(10, 7)},
 12000: {(8, 2), (10, 7)},
 13000: {(10, 7)},
 14000: {(8, 2), (10, 7)},
 15000: {(8, 2), (10, 7)},
 16000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 17000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 18000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 19000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 20000: {(8, 2), (10, 7)},
 21000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 22000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 23000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 24000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 25000: {(8, 2), (10, 7)},
 26000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 27000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 28000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 29000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 30000: {(8, 2), (10, 7)},
 31000: {(8, 1), (8, 2), (9, 4), (10, 7)},
 32000: {(8, 1), (8

In [61]:
cumulative_nmhs

{4000: set(),
 5000: set(),
 6000: set(),
 7000: set(),
 8000: {(8, 2)},
 9000: {(8, 2)},
 10000: {(8, 1), (8, 2), (8, 10)},
 11000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 12000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 13000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 14000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 15000: {(8, 1), (8, 2), (8, 10), (10, 7)},
 16000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 17000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 18000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 19000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 20000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 21000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 22000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 23000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 24000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 25000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 26000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 27000: {(8, 1), (8, 2), (8, 10), (9, 4), (10, 7)},
 28000: {(8, 1), (8, 2), (8, 10

In [62]:
# plot number of nmhs over time
fig = px.line(
    x=list(checkpoint_nmhs.keys()), 
    y=list([len(heads) for heads in checkpoint_nmhs.values()]), 
    title=f"Number of NMHs Over Time ({MODEL_TO_VIEW})",
    labels={'x': 'Checkpoint', 'y': 'Number of NMHs'}
)
fig.show()