## Setup

### Imports

In [1]:
import os
import json
import glob
import torch
import re
import einops
import pandas as pd
from functools import partial
from torch import Tensor
from torchtyping import TensorType as TT


import plotly.express as px

from utils.data_utils import generate_data_and_caches
from utils.data_processing import (
    load_edge_scores_into_dictionary,
)
from utils.visualization import plot_attention_heads, imshow_p
from utils.backup_analysis import (
    load_model,
    run_iteration,
    process_backup_results,
    get_past_nmhs_for_checkpoints,
    plot_top_heads
)

### Parameters

In [2]:
TASK = 'ioi'
PERFORMANCE_METRIC = 'logit_diff'
BASE_MODEL = "pythia-160m"
VARIANT = None
CACHE = "model_cache"
IOI_DATASET_SIZE = 70

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7ff7c54634c0>

### Functions

## Experiments

### Experiment Parameters

In [3]:
TASK = 'ioi'
PERFORMANCE_METRIC = 'logit_diff'
BASE_MODEL = "pythia-160m"
VARIANT = "EleutherAI/pythia-160m-alldropout"
MODEL_SHORTNAME = BASE_MODEL if not VARIANT else VARIANT[11:]
CACHE = "model_cache"
IOI_DATASET_SIZE = 70
COPY_SCORE_THRESHOLD = 75.0

### Circuit Data

In [55]:
folder_path = f'results/graphs/{MODEL_SHORTNAME}/{TASK}'
df = load_edge_scores_into_dictionary(folder_path)

# filter everything before 1000 steps
df = df[df['checkpoint'] >= 1000]

df[['source', 'target']] = df['edge'].str.split('->', expand=True)
len(df['target'].unique())

Processing file 1/143: results/graphs/pythia-160m-alldropout/ioi/57000.json
Processing file 2/143: results/graphs/pythia-160m-alldropout/ioi/141000.json
Processing file 3/143: results/graphs/pythia-160m-alldropout/ioi/95000.json
Processing file 4/143: results/graphs/pythia-160m-alldropout/ioi/107000.json
Processing file 5/143: results/graphs/pythia-160m-alldropout/ioi/34000.json
Processing file 6/143: results/graphs/pythia-160m-alldropout/ioi/6000.json
Processing file 7/143: results/graphs/pythia-160m-alldropout/ioi/37000.json
Processing file 8/143: results/graphs/pythia-160m-alldropout/ioi/39000.json
Processing file 9/143: results/graphs/pythia-160m-alldropout/ioi/104000.json
Processing file 10/143: results/graphs/pythia-160m-alldropout/ioi/59000.json
Processing file 11/143: results/graphs/pythia-160m-alldropout/ioi/67000.json
Processing file 12/143: results/graphs/pythia-160m-alldropout/ioi/111000.json
Processing file 13/143: results/graphs/pythia-160m-alldropout/ioi/76000.json
Proce

445

### Dataset Setup

In [56]:
initial_model = load_model(BASE_MODEL, VARIANT, 143000, CACHE, device)
size=70
ioi_dataset, abc_dataset = generate_data_and_caches(initial_model, size, verbose=True)

Loaded model EleutherAI/pythia-160m-alldropout at step143000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer


In [57]:
# imshow_p(
#     per_head_ablated_logit_diffs,
#     title="Headwise logit diff contribution, post NMH KO",
#     labels={"x": "Head", "y": "Layer", "color": "Logit diff attribution"},
#     #coloraxis=dict(colorbar_ticksuffix = "%"),
#     border=True,
#     width=600,
#     margin={"r": 100, "l": 100}
# )

### Run Experiment

In [58]:
experiment_metrics = dict()
# create folder
os.makedirs(f'results/backup/{MODEL_SHORTNAME}', exist_ok=True)

for checkpoint in range(4000, 144000, 1000):

    experiment_metrics = run_iteration(
        BASE_MODEL, VARIANT, df, checkpoint=checkpoint, dataset=ioi_dataset, experiment_metrics=experiment_metrics, 
        threshold=COPY_SCORE_THRESHOLD
    )
    experiment_metrics = process_backup_results(df, checkpoint, experiment_metrics)

    # save to file, using pytorch format
    torch.save(experiment_metrics, f'results/backup/{MODEL_SHORTNAME}/nmh_backup_metrics.pt')

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step4000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 47.61904761904761%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 82.85714285714286%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 4000:
Heads ablated:            [(9, 11), (9, 4), (7, 6), (7, 10), (8, 9)]
Original logit diff:      0.7503997087
Post ablation logit diff: 0.8393040299
Logit diff % change:      11.85%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step5000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Checkpoint 5000:
Heads ablated:            []
Original logit diff:      1.5525176525
Post ablation logit diff: 1.5525176525
Logit diff % change:      0.00%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step6000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 93.80952380952381%
Checkpoint 6000:
Heads ablated:            [(9, 4), (8, 9), (7, 6), (9, 11)]
Original logit diff:      1.7438025475
Post ablation logit diff: 1.5554083586
Logit diff % change:      -10.80%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step7000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 52.38095238095239%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 70.95238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 7000:
Heads ablated:            [(9, 11), (7, 10), (9, 4), (8, 9)]
Original logit diff:      1.6021207571
Post ablation logit diff: 1.9914293289
Logit diff % change:      24.30%
Loaded model EleutherAI/pythi

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 70.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 68.57142857142857%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 56.19047619047619%
Checkpoint 8000:
Heads ablated:            [(9, 4), (8, 9), (9, 11)]
Original logit diff:      2.1946053505
Post ablation logit diff: 2.3080902100
Logit diff % change:      5.17%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step9000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 61.904761904761905%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 6.666666666666667%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Checkpoint 9000:
Heads ablated:            [(8, 9), (9, 11), (9, 9), (9, 4)]
Original logit diff:      2.2502546310
Post ablation logit diff: 2.4143366814
Logit diff % change:      7.29%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step10000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 78.57142857142857%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 76.19047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 63.33333333333333%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Checkpoint 10000:
Heads ablated:            [(9, 9), (9, 11), (9, 8), (8, 9), (9, 4)]
Original logit diff:      2.0763607025
Post ablation logit diff: 2.7305092812
Logit diff % change:      31.50%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step11000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 77.61904761904762%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 58.57142857142858%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.71428571428572%
Checkpoint 11000:
Heads ablated:            [(9, 11), (9, 8), (8, 9), (9, 4)]
Original logit diff:      2.8749055862
Post ablation logit diff: 3.3432726860
Logit diff % change:      16.29%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step12000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 82.85714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 58.57142857142858%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 12000:
Heads ablated:            [(9, 11), (9, 9), (9, 8), (8, 1), (8, 9)]
Original logit diff:      2.9505121708
Post ablation logit diff: 3.0151007175
Logit diff % change:      2.19%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step13000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 67.61904761904762%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 33.80952380952381%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 11.428571428571429%
Checkpoint 13000:
Heads ablated:            [(9, 11), (9, 8), (8, 9)]
Original logit diff:      2.6273641586
Post ablation logit diff: 2.9601230621
Logit diff % change:      12.67%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step14000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 20.952380952380953%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 38.57142857142858%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 14.285714285714285%
Checkpoint 14000:
Heads ablated:            [(8, 9), (9, 4), (9, 11), (9, 8)]
Original logit diff:      2.6580519676
Post ablation logit diff: 3.2050671577
Logit diff % change:      20.58%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step15000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 35.23809523809524%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 14.285714285714285%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 67.14285714285714%
Checkpoint 15000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (9, 8), (9, 11)]
Original logit diff:      2.7998774052
Post ablation logit diff: 3.5529823303
Logit diff % change:      26.90%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step16000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 70.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 18.095238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 26.190476190476193%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 21.428571428571427%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 16000:
Heads ablated:            [(9, 8), (9, 9), (9, 11), (9, 4), (8, 9)]
Original logit diff:      2.5674970150
Post ablation logit diff: 3.1832628250
Logit diff % change:      23.98%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step17000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 22.380952380952383%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 81.9047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 20.952380952380953%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 23.809523809523807%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 16.19047619047619%
Checkpoint 17000:
Heads ablated:            [(9, 11), (9, 8), (8, 2), (8, 9), (9, 4)]
Original logit diff:      3.2116754055
Post ablation logi

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step18000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 28.095238095238095%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 26.666666666666668%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 80.47619047619048%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 22.857142857142858%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Checkpoint 18000:
Heads ablated:            [(8, 9), (8, 2), (9, 8), (9, 11)]
Original logit diff:      3.0395021439
Post ablation logit diff: 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step19000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 19.523809523809526%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 81.42857142857143%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 73.80952380952381%
Checkpoint 19000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (9, 8), (9, 11)]
Original logit diff:      3.0793039799
Post ablation logit diff: 3.6368265152
Logit diff % change:      18.11%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step20000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 14.761904761904763%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 30.476190476190478%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 96.19047619047619%
Checkpoin

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step21000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 19.047619047619047%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 21000:
Heads ablated:            [(9, 11), (9, 9), (9, 8), (8, 2), (8, 9),

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step22000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 17.61904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 18.095238095238095%
Checkpoint 22000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4)]
Original logit diff:      3.3527474403
Post ablati

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step23000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 23.333333333333332%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 81.9047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 9.047619047619047%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 36.19047619047619%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 17.61904761904762%
Checkpoint 23000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8,

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step24000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 38.095238095238095%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 11.428571428571429%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 18.571428571428573%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 31.428571428571427%
Checkpoi

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step25000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 29.04761904761905%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 70.0%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 81.42857142857143%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 13.80952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 33.80952380952381%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 18.571428571428573%
Checkpoint 25000:
Heads a

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step26000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 22.380952380952383%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 40.476190476190474%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 28.095238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Checkpoin

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step27000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 43.80952380952381%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 8.571428571428571%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 23.809523809523807%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 77.61904761904762%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 34.76190476190476%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 71.9047619047619%
Checkpoint 27000:
Heads ablated:            [(9, 4), (8, 9), (9, 8), (9, 9), (9, 11)]
Original logit diff:      3.6870427132
Post ablation logit 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step28000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 44.761904761904766%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 35.23809523809524%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 77.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 25.71428571428571%
Copy circuit for head 9

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step29000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 35.23809523809524%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 46.666666666666664%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 25.71428571428571%
Checkpoint 29000:
Heads ablated:            [(10, 10), (9, 11), (9, 8), (8, 1), (8, 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step30000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 24.761904761904763%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 47.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 7.6190476190476195%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 81.9047619047619%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 37.61904761904762%
Checkpoint 3

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step31000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 6.190476190476191%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 40.476190476190474%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 8.8 (sign=1) :

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step32000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 34.76190476190476%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 55.714285714285715%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 32.38095238095238%
Checkpoint 32000:
Heads a

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step33000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 50.95238095238095%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 32.857142857142854%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 52.38095238095239%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 39.523809523809526%
Checkpoint 34000:
Heads ablated:            [(

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 40.476190476190474%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 77.61904761904762%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 5.238095238095238%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 52.85714285714286%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 31.428571428571427%
Checkpoint 35000:
Heads ablated:            [(10, 10), (9, 11), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4)]
Ori

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 49.523809523809526%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 31.428571428571427%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 41.42857142857143%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 79.04761904761905%
Checkpoint 36000:
Heads ablated:            [

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step37000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 47.14285714285714%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.47619047619048%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 77.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.76190476190476%
Checkpoint 37000:
Heads ablated:            [(10, 10), (9, 11), (9, 9), (9, 8), (8, 1

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step38000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 47.61904761904761%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 56.666666666666664%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.28571428571428%
Checkpoint 38000:
Heads ablated:            [(10, 10), (9, 9), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4)]
Original logit diff:      4.5856761932
Po

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step39000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 48.095238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 39000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 8),

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 46.666666666666664%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 60.476190476190474%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.80952380952381%
Checkpoint 40000:
Heads ablated:            [(10, 10), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4)]
Original logit diff:      4.2882342339
Post ablation logit diff: 4.9187889099
Logit diff % change:      14.70%
Loaded model EleutherAI/pythia-160m

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 49.523809523809526%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 82.38095238095238%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.28571428571428%
Checkpoint 41000:
Heads ablated:            [(10, 10), (9, 8), (8, 2), (8, 9), (9, 4)]
Original logit diff:      4.5821361542
Post ablation logit diff: 5.2278418541
Logit diff % change:      14.09%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step42000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 49.047619047619044%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 57.61904761904761%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 33.33333333333333%
Checkpoint 42000:
Heads ablated:            [(10, 10), (9, 9), (9, 8), (8, 2), (8, 9), (9, 4)]
Original logit diff:      4.4717941284
Post ablatio

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step43000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 49.523809523809526%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 37.142857142857146%
Checkpoint 43000:
Heads ablated:            [(10, 10), (9, 8), (8, 1), (8

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step44000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 50.95238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 44000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 8), (9, 9), (10, 10), (9, 11)]
Original logit diff:      4.3092579842
Post 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step45000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 51.42857142857142%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 57.61904761904761%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 38.095238095238095%
Checkpoint 4

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step46000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 50.95238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 78.57142857142857%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.19047619047619%
Checkpoint 46000:
Heads ablated:            [(8, 9), (8, 2), (8, 1), (9, 4), (10, 10)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step47000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 55.714285714285715%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 63.33333333333333%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 47000:
Heads ablated:            [(9, 8), (9, 9), (9, 11), (10, 10), (9, 4

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step48000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 63.8095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 82.85714285714286%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 51.90476190476191%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 85.71428571428571%
Checkpoint 48000:
Heads ablated:            [(8, 9), (8, 2), (9, 4), (10, 10), (9, 9), (9, 8)]
Original logit diff:      4.1561117172
Post ablation logit diff: 4.8135166168
Logit diff % change:      15.82%
Loaded model 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 63.8095238095238%
Checkpoint 49000:
Heads ablated:            [(10, 10), (9, 11), (8, 1), (8, 2), (8, 9), (9, 4), (9, 8)]
Original logit diff:      4.7086668015
Post ablation logit diff: 5.2478537560


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step50000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 56.19047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.47619047619048%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 63.8095238095238%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 39.04761904761905%
Checkpoint 500

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step51000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 54.285714285714285%
Checkpoint 51000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8,

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step52000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 60.476190476190474%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 85.23809523809524%
Checkpoint 52000:
Heads ablated:            [(8, 9), (8, 2), (8, 1), (9, 4), (10, 10), (9, 8)]
Original logit diff:      4.3767466545
Post ablation logit diff: 5.6696691513
Logit diff % change:      29.54%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step53000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 37.142857142857146%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 83.80952380952381%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 60.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.14285714285714%
Checkpoint 53000:
Heads ablated:            [(8, 9), (8, 2), (8, 1), (9, 4), (10, 10), (9, 8)]
Original logit diff:      4.0848083496
Post ablation logit diff

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step54000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 82.85714285714286%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 37.61904761904762%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 67.14285714285714%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
C

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step55000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 82.38095238095238%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 39.04761904761905%
Checkpoint 55000:
Heads ablated:            [(9, 11), (10, 10), (9, 8), (8, 2), (8, 9), (9, 4)]
Original logit diff:      4.4829554558
Post ablation logit d

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step56000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 63.33333333333333%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 82.38095238095238%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 78.57142857142857%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 40.0%
Checkpoint 560

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step57000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 63.33333333333333%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 68.0952380952381%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 38.57142857142858%
Checkpoint 57000:
Heads ablated:            [(9, 11), (10, 10), (9, 9), (9, 8), (8, 1), (8, 2), (8,

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step58000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 41.904761904761905%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 67.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.47619047619048%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Checkpoint 58000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 8),

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step59000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 82.38095238095238%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 76.19047619047619%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 38.57142857142858%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 67.61904761904762%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 59000:
Heads ablated:            [(9, 8), (9, 9), (9, 11), (10, 10), (9, 4), (8, 2), (8, 9)]
Original logit diff:      4.2955198288
Po

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step60000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 67.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 83.80952380952381%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 81.9047619047619%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 70.0%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 60000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 8), (9, 9), (10, 10), (9, 11)]
Original logit diff:      4.5323991776
Post ablation logit 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step61000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 41.904761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 68.57142857142857%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.9047619047619%
Checkpoint 61000:
Heads ablated:            [(9, 11), (10, 10), (9, 8), (8

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 40.476190476190474%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.19047619047619%
Checkpoint 62000:
Heads ablated:            [(8, 9), (8, 2), (8, 1), (9, 4), (10, 10), (9, 11), (9, 8)]
Original logit diff:      4.3011960983
Post ablation logit diff: 4.7344746590
Logit diff % change:      10.07%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step63000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 81.42857142857143%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 83.80952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 41.42857142857143%
Checkpoint 63000:
Heads ablated:            [(10, 10), (9, 11), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4)]
Original logit diff:      4.3011336327
Post ablation 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step64000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 40.95238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 82.85714285714286%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Checkpoint 64000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step65000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 73.80952380952381%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 60.476190476190474%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 40.476190476190474%
Checkpoint 65000:
Heads ablated:            [(10, 10), (9, 11), (9, 8),

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step66000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 42.38095238095238%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 71.42857142857143%
Checkpoint 66000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (9, 8), (9, 11), (10, 10)]
Original logit diff:      3.9939069748
Post ablat

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step67000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 81.42857142857143%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 40.476190476190474%
Checkpoint 67000:
Heads ablated:            [(10, 10), (9, 11), (9, 8), (8, 1), (8, 2), (8, 9)]
Original logit diff:      4.3274483681
Post ablation logit diff: 5.3949599266
Logit diff % change:      24.67%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step68000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 39.523809523809526%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 60.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 81.9047619047619%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 73.33333333333333%
Checkpoint 68000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 8), (

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 41.42857142857143%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 76.19047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Checkpoint 69000:
Heads ablated:            [(9, 4), (8, 9), (8, 1), (9, 8), (9, 9), (10, 10), (9, 11)]
Original logit diff:      4.3300323486
Post ablation logit diff: 4.4531407356
Logit diff % change:      2.84%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step70000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 42.38095238095238%
Checkpoint 70000:
Heads ablated:            [(10, 9), (10, 10), (9, 11), 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 83.80952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 42.857142857142854%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 60.952380952380956%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Checkpoint 71000:
Heads ablated:            [(9, 11), (10, 10), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4)]
Original logit diff:      3.9566597939
Post ablation logit diff: 4.3486576080
Logit diff % change:      9.91%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step72000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 45.23809523809524%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 57.61904761904761%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 10.2 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step73000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 77.14285714285715%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 44.285714285714285%
Checkpoint 73000:
Heads ablated:            [(10, 9), (10, 10), (9, 11), (9, 9), (9, 8), (8, 1), (8, 9), (9, 4)]
Original logit diff:

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 48.57142857142857%
Checkpoint 74000:
Heads ablated:            [(10, 9), (10, 10), (9, 11), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4)]
Original logit diff:      3.9810400009
Post ablation logit diff: 4.8340358734
Logit diff % change:      21.43%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step75000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 57.61904761904761%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 49.047619047619044%
Checkpoint 75000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4)]
Original logit diff:      3.9297153950
Post ablation

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step76000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 46.19047619047619%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Checkpoint 76000:
Heads ablated:            [(9, 11), (10, 9), (10, 10), (

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step77000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 58.0952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 79.04761904761905%
Checkpoint 77000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 8), (10, 10), (9, 11), (10, 9)]
Original logit diff:      4.09021

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 60.476190476190474%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 78000:
Heads ablated:            [(9, 4), (9, 8), (9, 9), (9, 11), (10, 9), (10, 10), (8, 1), (8, 9)]
Original logit diff:      4.1592407227
Post ablation logit diff: 4.5878028870
Logit diff % change:      10.30%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step79000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 47.14285714285714%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 77.61904761904762%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 10.9 (sign=1) : Top 5 accuracy: 76.19047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 82.38095238095238%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 79000:
Heads ablated:            [(9, 4), (9, 8), (9, 9), (9, 11), (10, 9), (10, 10), (8, 1), (8, 2), (8, 9)]
Original logit diff:    

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step80000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 48.57142857142857%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 80.47619047619048%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 97.14285714285714%
Checkpoint 80000:
Heads ablated:            [(8, 9), (8, 2), (9, 8), (9, 11)]
Original logit diff:      3.8981869221
Post ablation logit diff: 4.9297637939
Logit diff % change:      26.46%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step81000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 60.952380952380956%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 55.23809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 74.28571428571429%
Checkpoint 81000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 8), (9, 11)]
Original logit diff:      3.7783634663
Post ablation logit diff: 4.1968059540
Logit diff % change:      11.07%
Loaded model E

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 82.85714285714286%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 71.9047619047619%
Checkpoint 82000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 8), (10, 10), (9, 11)]
Original logit diff:      3.8537945747
Post ablation logit diff: 4.

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step83000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 4.285714285714286%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 70.95238095238095%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 88.09523809523809%
Checkpoint 83000:
Heads ablated:            [(8, 9), (8, 2), (8, 1), (9, 4), (9, 6), (9, 11), (9, 8)]
Original logit diff:      3.9027187824
Post ab

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 4.761904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 88.57142857142857%
Checkpoint 84000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4)]
Original logit diff:      3.7948217392
Post ablation logit diff: 3.7924461365
Logit diff % change:      -0.06%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step85000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 55.23809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 2.380952380952381%
Checkpoint 85000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4), (9, 6)]
Original logit diff:      3.6635603905
Post ablation logit diff: 3.2390737534
Logit diff % change:      -1

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 51.90476190476191%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 85.23809523809524%
Checkpoint 86000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8, 9), (9, 4)]
Original logit diff:      3.8036525249
Post ablation logit diff: 4.1245656013
Logit diff % change:      8.44%
Loaded model EleutherAI/pythia-160m-alldropout at step87000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 87000:
Heads ablated:            [(9, 8), (9, 11), (9, 6), (8, 1), (8, 9)]
Original logit diff:      3.4856913090
Post ablation logit diff: 3.3863351345
Logit diff % change:      -2.85%
Loaded model EleutherAI/pythia-160m-alldropout at step88000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 78.0952380952381%
Checkpoint 88000:
Heads ablated:            [(9, 11), (9, 8), (9, 6), (8, 1), (8, 2), (8, 9), (9, 4)]
Original logit diff:      3.8774125576
Post ablation logit diff: 3.2408926487
Logit diff % change:      -16.42%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step89000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 45.714285714285715%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.71428571428572%
Checkpoint 89000:
Heads ablated:            [(9, 6), (8, 9), (8, 2), (8, 1), (9, 8), (9, 11)]
Original logit diff:      3.6312358379
Post ablation logit diff: 3.1907584667
Logit diff % change:      -12.13%
Loaded model EleutherAI/pythia-160m-alldropout at step90000; now loading 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 58.57142857142858%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 53.333333333333336%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 71.42857142857143%
Checkpoint 90000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8, 2), (8, 9)]
Original logit diff:      4.0732855797
Post ablation logit diff: 3.8399069309
Logit diff % change:      -5.73%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step91000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 91.42857142857143%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 67.61904761904762%
Checkpoint 91000:
Heads ablated:            [(9, 4), (8, 9), (8, 1), (9, 6), (9, 8), (9, 11)]
Original logit diff:      3.6509788036
Post ablation logit diff: 3.2772970200
Logit diff % change:      -10.24%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step92000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 63.8095238095238%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 63.33333333333333%
Checkpoint 92000:
Heads ablated:            [(9, 4), (8, 9), (8, 1), (9, 8), (9, 11)]
Original logit diff:      3.7650799751
Post ablation logit diff: 3.8739407063
Logit diff % change:      2.89%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step93000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 42.857142857142854%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 3.8095238095238098%
Checkpoint 93000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8, 9), (9, 4), (9, 6)]
Original logit diff:      3.851083040

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step94000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 63.33333333333333%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 46.19047619047619%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 8.4 (sign=1) : Top 5 accuracy: 30.952380952380953%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 52.38095238095239%
Copy circuit for head 8.9 (sign=1) : Top 5 accur

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step95000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 63.8095238095238%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Checkpoint 95000:
Heads ablated:            [(9, 11), (8, 1), (8, 2), (8, 9), (9, 4), (9, 8)]
Original logit diff:      3.8157775402
Post ablation logit diff: 3.7719552517
Logit diff % change:      -1.15%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step96000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 62.857142857142854%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 74.76190476190476%
Checkpoint 96000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8, 2), (8, 9)]
Original logit diff:      3.3174202442
Post ablation logit diff: 3.4195487499
Logit diff % change:      3.08%
Loaded model EleutherAI/pythia-160m-alldropout at step97000; now loading into Hoo

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 76.19047619047619%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 68.0952380952381%
Checkpoint 97000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 6), (9, 8)]
Original logit diff:      3.5353643894
Post ablation logit diff: 2.9611625671
Logit diff % change:      -16.24%
Loaded model EleutherAI/pythia-160m-alldropout at step98000; now loading into HookedTransformer


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 63.8095238095238%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 69.52380952380952%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 75.71428571428571%
Checkpoint 98000:
Heads ablated:            [(9, 11), (9, 8), (9, 6), (8, 1), (8, 2), (8, 9), (9, 4)]
Original logit diff:      3.7724151611
Post ablation logit diff: 2.7300455570
Logit diff % change:      -27.63%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step99000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 58.57142857142858%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 94.28571428571428%
Checkpoint 99000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8, 2), (8, 9), (9, 6)]
Original logit diff:      3.8813066483
Post ablation logit diff: 3.1179172993
Logit diff % change:      -19.67%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step100000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 93.80952380952381%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 70.0%
Checkpoint 100000:
Heads ablated:            [(9, 6), (8, 9), (8, 2), (8, 1), (9, 8)]
Original logit diff:      4.0274939537
Post ablation logit diff: 3.1162800789
Logit diff % change:      -22.62%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step101000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 74.76190476190476%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 73.80952380952381%
Checkpoint 101000:
Heads ablated:            [(9, 6), (8, 9), (8, 2), (8, 1), (9, 8)]
Original logit diff:      3.8571720123
Post ablation logit diff: 3.1220989227
Logit diff % change:      -19.06%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step102000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 76.66666666666667%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 96.19047619047619%
Checkpoint 102000:
Heads ablated:            [(9, 11), (9, 8), (8, 1), (8, 2), (8, 9), (9, 4), (9, 6)]
Original logit diff:      4.0581960678
Post ablation logit diff: 2.8886754513
Logit diff % change:      -28.82%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step103000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 96.66666666666667%
Checkpoint 103000:
Heads ablated:            [(8, 9), (8, 2), (8, 1), (9, 4), (9, 6), (9, 11), (9, 8)]
Original logit diff:      4.1358156204
Post ablation logit diff: 2.9038691521
Logit diff % change:      -29.79%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step104000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 64.28571428571429%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 78.57142857142857%
Checkpoint 104000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 6), (9, 8), (9, 9)]
Original logit diff:      3.7935748100
Post ablation logit diff: 2.3894762993
Logit diff % change:      -37.01%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step105000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 81.9047619047619%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 64.76190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 36.666666666666664%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 78.57142857142857%
Checkpoint 105000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 6), (9, 8), (9, 11), (9, 9)]
Original logit diff:      4.019974

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.71428571428572%
Checkpoint 106000:
Heads ablated:            [(9, 6), (9, 4), (8, 9), (8, 2), (8, 1), (9, 8)]
Original logit diff:      4.3689026833
Post ablation logit diff: 2.9052648544
Logit diff % change:      -33.50%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step107000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 30.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 96.66666666666667%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 60.0%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 70.95238095238095%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 61.42857142857143%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.3 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.4 (sign=1) : T

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step108000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 24.285714285714285%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 70.95238095238095%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 62.38095238095238%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 72.38095238095238%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 96.66666666666667%
Checkpoint 109000:
Heads ablated:            [(9, 8), (7, 10), (8, 1), (8, 2), (8, 9), (9, 4), (9, 6)]
Original logit diff:      4.2349190712
Post ablation logit diff: 2.9743044376
Logit diff % change:      -29.77%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step110000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 110000:
Heads ablated:            [(9, 4), (8, 9), (8, 2), (8, 1), (9, 6), (9, 8)]
Original logit diff:      4.3877296448
Post ablation logit diff: 2.8735165596
Logit diff % change:      -34.51%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step111000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 65.23809523809524%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 96.19047619047619%
Checkpoint 111000:
Heads ablated:            [(9, 9), (9, 8), (7, 10), (8, 1), (8, 2), (8, 5), (8, 9), (9, 4), (9, 6)]
Original logit diff:      4.1714777946
Post ablation 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step112000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 65.23809523809524%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 59.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 94.76190476190476%
Checkpoint 112000:
Heads ablated:            [(9, 8), (7, 10), (8, 1), (8, 2), (8, 5), 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step113000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 56.666666666666664%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 76.19047619047619%
Checkpoint 113000:
Heads ablated:            [(9, 11), (9, 9), (7, 10), (8, 1), (8,

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step114000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 65.71428571428571%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 76.19047619047619%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.9047619047619%
Checkpoint 114000:
Heads ablated:            [(9, 11), (9, 8), (9, 6), (7, 10), (8, 1),

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step115000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 64.28571428571429%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.4 (sign=1) : Top 5 accuracy: 39.523809523809526%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 75.71428571428571%
Copy circuit for head 8.8 (sign=1) : Top 5 ac

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step116000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 53.80952380952381%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 67.14285714285714%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 1.4285714285714286%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step117000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 53.333333333333336%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 79.04761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 117000:
Heads ablated:            [(9, 6), (8, 9), (8, 5), (8, 1), (7, 10)]
Original logit diff:      4.7434997559
Post ablation logit diff: 3.5868151188
Logit diff % change:      -24.38%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step118000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 66.19047619047619%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.61904761904762%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 79.52380952380952%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 48.095238095238095%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 92.85714285714286%
C

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 49.047619047619044%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 78.0952380952381%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 97.14285714285714%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 67.61904761904762%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 91.42857142857143%
Checkpoint 119000:
Heads ablated:            [(9, 6), (9, 4), (8, 9), (8, 5), (8, 2), (8, 1), (7, 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step120000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 52.85714285714286%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 80.0%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 67.61904761904762%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Checkpoint 120

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step121000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 88.57142857142857%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 54.761904761904766%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 78.57142857142857%
Checkpoint 121000:
Heads ablated:            [(9, 9), (7, 10), (8, 1), (8, 9), (9, 4), (8, 5)]
Original logit diff:      4.6029114723
Post ablation logit diff: 3.2199792862
Logit diff % change:      -30.04%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step122000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 51.90476190476191%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 80.95238095238095%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 93.33333333333333%
Checkpoint 122000:
Heads ablated:            [(9, 9), (9, 6), (9, 4), (8, 9), (8, 5), (8, 1), (7, 10), (9, 11)]
Original logit diff:      4.5792465210
Post abl

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step123000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.8 (sign=1) : Top 5 accuracy: 81.42857142857143%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 90.47619047619048%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.7 (sign=1) : Top 5 accuracy: 5.714285714285714%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 90.95238095238095%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 67.61904761904762%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 81.9047619047619%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 48.095238095238095%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step124000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 82.38095238095238%
Checkpoint 124000:
Heads ablated:            [(7, 10), (8, 1), (8, 9), (8, 5)]
Original logit diff:      4.6726117134
Post ablation logit diff: 3.3874263763
Logit diff % change:      -27.50%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step125000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 49.047619047619044%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 82.38095238095238%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 66.66666666666666%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 94.76190476190476%
Checkpoint 125000:
Heads ablated:            [(8, 9), (8, 5), (8, 2), (8, 1), (7, 10), (9, 11)]
Original logit diff:      4.6287317276
Post ablation logit diff: 3.2932188511
Logit diff % change:      -28.85%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step126000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 83.80952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 69.52380952380952%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 126000:
Heads ablated:            [(9, 6), (9, 4), (8, 9), (8, 5), (8, 1), (7, 10), (9, 9), (9, 11)]
Original logit diff:      4.39078

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step127000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 89.04761904761904%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 69.04761904761905%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 52.38095238095239%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 82.85714285714286%
C

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step128000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 51.90476190476191%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 83.80952380952381%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 70.95238095238095%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 96.19047619047619%
Checkpoint 128000:
Heads ablated:            [(9, 6), (9, 4), (8, 9), (8, 5), (8, 1), (7, 10), (9, 11)]
Original logit diff:      4.9464960098
Pos

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step129000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Checkpoint 129000:
Heads ablated:            [(8, 1), (8, 5), (8, 9)]
Original logit diff:      5.0566964149
Post ablation logit diff: 3.7143292427
Logit diff % change:      -26.55%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step130000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 51.42857142857142%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 69.52380952380952%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 96.19047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Checkpoint 130000:
Heads ablated:            [(9, 6), (8, 9), (8, 5), (8, 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step131000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.71428571428572%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 70.47619047619048%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 49.523809523809526%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 85.71428571428571%
Checkpoint 131000:
Heads ablated:            [(9, 11), (7, 10), (8, 1), (8, 9), (9, 4), (8, 5)]
Original logit diff:      4.6185445786
Post ablation logit diff: 3.3961455822
Logit diff % change:      -26.47%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step132000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 53.80952380952381%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 69.52380952380952%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Checkpoint 132000:
Heads ablated:            [(9, 6), (9, 4), (8, 9), (8, 5), (8, 1), (7, 10), (9, 11)]
Original logit diff:      4.8236279488
Po

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step133000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.9523809523809524%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 88.09523809523809%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 1.9047619047619049%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 70.47619047619048%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 53.333333333333336%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.8571428571428

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step134000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.23809523809523%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 71.42857142857143%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.85714285714286%
Checkpoint 134000:
Heads ablated:            [(9, 11), (7, 10), (8, 1), (8, 5), (8, 9), (9, 4)]
Original logit diff:      4.6599798203
Post ablation logit diff: 3.4568464756
Logit diff % change:      -25.82%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step135000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 57.14285714285714%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 92.38095238095238%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 85.23809523809524%
Checkpoint 135000:
Heads ablated:            [(9, 9), (7, 10), 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step136000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 49.523809523809526%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 71.9047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 95.23809523809523%
Checkpoint 136000:
Heads ablated:            [(8, 9), (8, 5), (8, 2), (8,

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step137000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 87.14285714285714%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 74.28571428571429%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 99.04761904761905%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 52.38095238095239%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 84.76190476190476%
Checkpoint 137000:
Heads ablated:            [(9, 9), (7, 10), (8, 1), (8, 2), (8, 5), (8, 9), (9, 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 91.9047619047619%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 84.28571428571429%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 51.90476190476191%
Checkpoint 138000:
Heads ablated:            [(9, 9), (7, 10), (8, 1), (8, 5), (8, 9), (9, 4), (9, 6)]
Original logit diff:      4.7791352272
Post ablation logit diff: 2.9975485802
Logit diff % change:      -37.28%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step139000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 86.66666666666667%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 72.85714285714285%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.09523809523809%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 51.90476190476191%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 83.33333333333334%
Checkpoint 139000:
Heads ablated:            [(9, 9), (7, 10), (8, 1), (8, 5), (8, 9), (9, 6)]
Original logit diff:      4.6881947517
Post ablation logit diff: 3.4748618603
Logit diff % change:      -25.88%


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step140000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 84.76190476190476%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 93.33333333333333%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 53.333333333333336%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 85.71428571428571%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 94.28571428571428%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 2.857142857142857%
C

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step141000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.0%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 50.0%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 73.33333333333333%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 83.33333333333334%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 87.61904761904762%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 10.10 (sign=1) : Top 5 accuracy: 0.4761904761904762%
Checkpoint 141000:
Heads 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step142000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 3.3333333333333335%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 85.23809523809524%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 52.38095238095239%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 89.52380952380953%
Copy circuit for head 9.6 (sign=1) : Top 5 accuracy: 82.38095238095238%
Checkpoint 142000:
Heads ablated:            [(9, 11), (9, 9), (7, 9), (7

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model EleutherAI/pythia-160m-alldropout at step143000; now loading into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Copy circuit for head 5.11 (sign=1) : Top 5 accuracy: 59.523809523809526%
Copy circuit for head 9.11 (sign=1) : Top 5 accuracy: 94.76190476190476%
Copy circuit for head 9.9 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 7.6 (sign=1) : Top 5 accuracy: 2.380952380952381%
Copy circuit for head 7.9 (sign=1) : Top 5 accuracy: 75.23809523809524%
Copy circuit for head 7.10 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 8.1 (sign=1) : Top 5 accuracy: 98.57142857142858%
Copy circuit for head 8.2 (sign=1) : Top 5 accuracy: 99.52380952380952%
Copy circuit for head 8.5 (sign=1) : Top 5 accuracy: 86.19047619047619%
Copy circuit for head 8.8 (sign=1) : Top 5 accuracy: 52.85714285714286%
Copy circuit for head 8.9 (sign=1) : Top 5 accuracy: 100.0%
Copy circuit for head 9.4 (sign=1) : Top 5 accuracy: 90.95238095238095%


## View Results

#### Pythia 160m

In [14]:
MODEL_TO_VIEW = "pythia-160m-alldropout"

In [15]:
experiment_metrics = torch.load(f'results/backup/{MODEL_TO_VIEW}/nmh_backup_metrics.pt')

In [16]:
experiment_metrics[4000].keys()

dict_keys(['logit_diff', 'per_head_logit_diffs', 'ablation_targets', 'ablated_logit_diff', 'per_head_ablated_logit_diffs', 'per_head_logit_diff_delta', 'in_circuit_head_delta', 'outside_circuit_head_delta', 'summed_in_circuit_head_delta', 'summed_outside_circuit_head_delta', 'summed_total_head_delta'])

In [17]:
summed_in_circuit_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_in_circuit_head_delta"] for checkpoint in experiment_metrics.keys()}
summed_outside_circuit_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_outside_circuit_head_delta"] for checkpoint in experiment_metrics.keys()}
summed_total_head_deltas = {checkpoint: experiment_metrics[checkpoint]["summed_total_head_delta"] for checkpoint in experiment_metrics.keys()}
per_head_logit_diff_deltas = {checkpoint: experiment_metrics[checkpoint]["per_head_logit_diff_delta"] for checkpoint in experiment_metrics.keys()}
total_logit_diff_deltas = {checkpoint: experiment_metrics[checkpoint]['ablated_logit_diff'] - experiment_metrics[checkpoint]['logit_diff'] for checkpoint in experiment_metrics.keys()}

for checkpoint in experiment_metrics.keys():
    # divide by total original logit diff
    summed_in_circuit_head_deltas[checkpoint] = summed_in_circuit_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    summed_outside_circuit_head_deltas[checkpoint] = summed_outside_circuit_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    summed_total_head_deltas[checkpoint] = summed_total_head_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    per_head_logit_diff_deltas[checkpoint] = per_head_logit_diff_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]
    total_logit_diff_deltas[checkpoint] = total_logit_diff_deltas[checkpoint] / experiment_metrics[checkpoint]["logit_diff"]

In [18]:
# plot summed_in_circuit_head_deltas with plotly express
fig = px.line(
    x=list(summed_in_circuit_head_deltas.keys()), 
    y=list(summed_in_circuit_head_deltas.values()), 
    title="Summed Post-NMH-Ablation In-Circuit Head Logit Diff Change Over Time (Pythia 160m Default)",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'} 
)
fig.show()


In [19]:
# plot summed_outside_circuit_head_deltas
fig = px.line(
    x=list(summed_outside_circuit_head_deltas.keys()), 
    y=list(summed_outside_circuit_head_deltas.values()), 
    title="Summed Post-NMH-Ablation Outside-Circuit Head Attribution Change (Pythia 160m Default)",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'} 
)
fig.show()

In [20]:
# plot total_head_deltas
fig = px.line(
    x=list(summed_total_head_deltas.keys()), 
    y=list(summed_total_head_deltas.values()), 
    title="Summed Total Post-NMH-Ablation Head Attribution Change (Pythia 160m Default)",
    labels={'x': 'Checkpoint', 'y': 'Change as % of original logit diff'}
)

fig.show()

In [21]:
cumulative_nmhs, checkpoint_nmhs = get_past_nmhs_for_checkpoints(experiment_metrics)

{(8, 1), (7, 10), (9, 4), (8, 9), (9, 8), (9, 11), (8, 2)}


In [12]:
top_backup_heads = plot_top_heads(MODEL_TO_VIEW, checkpoint_dict=per_head_logit_diff_deltas, cumulative_nmhs=cumulative_nmhs)

In [68]:
top_backup_heads[top_backup_heads['Previous NMH']==True].head(50)

Unnamed: 0,Checkpoint,Layer-Head,Layer,Head,Value,Previous NMH,Checkpoint_sum,Value_sum,Previous NMH_sum,Top K
36,11000,Layer 9-Head 9,9,9,0.007299,True,5602000,0.951745,78,True
39,12000,Layer 9-Head 4,9,4,0.020281,True,2013000,0.203263,22,False
44,13000,Layer 9-Head 9,9,9,0.008192,True,5602000,0.951745,78,True
46,13000,Layer 9-Head 4,9,4,0.010593,True,2013000,0.203263,22,False
50,14000,Layer 9-Head 9,9,9,0.01201,True,5602000,0.951745,78,True
57,15000,Layer 9-Head 9,9,9,0.012112,True,5602000,0.951745,78,True
67,17000,Layer 9-Head 9,9,9,0.013145,True,5602000,0.951745,78,True
71,18000,Layer 9-Head 4,9,4,0.013086,True,2013000,0.203263,22,False
72,18000,Layer 9-Head 9,9,9,0.012296,True,5602000,0.951745,78,True
77,19000,Layer 9-Head 9,9,9,0.01248,True,5602000,0.951745,78,True


In [13]:
checkpoint_nmhs

{4000: {(7, 6), (7, 10), (8, 9), (9, 4), (9, 11)},
 5000: set(),
 6000: {(7, 6), (8, 9), (9, 4), (9, 11)},
 7000: {(7, 10), (8, 9), (9, 4), (9, 11)},
 8000: {(8, 9), (9, 4), (9, 11)},
 9000: {(8, 9), (9, 4), (9, 9), (9, 11)},
 10000: {(8, 9), (9, 4), (9, 8), (9, 9), (9, 11)},
 11000: {(8, 9), (9, 4), (9, 8), (9, 11)},
 12000: {(8, 1), (8, 9), (9, 8), (9, 9), (9, 11)},
 13000: {(8, 9), (9, 8), (9, 11)},
 14000: {(8, 9), (9, 4), (9, 8), (9, 11)},
 15000: {(8, 2), (8, 9), (9, 4), (9, 8), (9, 11)},
 16000: {(8, 9), (9, 4), (9, 8), (9, 9), (9, 11)},
 17000: {(8, 2), (8, 9), (9, 4), (9, 8), (9, 11)},
 18000: {(8, 2), (8, 9), (9, 8), (9, 11)},
 19000: {(8, 2), (8, 9), (9, 4), (9, 8), (9, 11)},
 20000: {(7, 10), (8, 1), (8, 2), (8, 9), (9, 4), (9, 8), (9, 11)},
 21000: {(8, 2), (8, 9), (9, 4), (9, 8), (9, 9), (9, 11)},
 22000: {(8, 1), (8, 2), (8, 9), (9, 4), (9, 8), (9, 11)},
 23000: {(8, 1), (8, 2), (8, 9), (9, 8), (9, 11)},
 24000: {(7, 10), (8, 1), (8, 2), (8, 9), (9, 4), (9, 8), (9, 11)},