# Phase Change Analysis

## Setup

In [1]:
!ls

README.md	    quick_start_pytorch.ipynb
circuits-over-time  quick_start_pytorch_images


In [2]:
%cd circuits-over-time

/notebooks/circuits-over-time


In [3]:
!pip install git+https://github.com/neelnanda-io/TransformerLens.git
!pip install circuitsvis
!pip install jaxtyping==0.2.13
!pip install einops
!pip install protobuf==3.20.*
!pip install plotly
!pip install torchtyping
!pip install git+https://github.com/neelnanda-io/neel-plotly.git

Collecting git+https://github.com/neelnanda-io/TransformerLens.git
  Cloning https://github.com/neelnanda-io/TransformerLens.git to /tmp/pip-req-build-0ydpp3df
  Running command git clone --filter=blob:none --quiet https://github.com/neelnanda-io/TransformerLens.git /tmp/pip-req-build-0ydpp3df
  Resolved https://github.com/neelnanda-io/TransformerLens.git to commit 0d2827ecce4ef17b86060bdaaaaf50e724684085
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting datasets>=2.7.1
  Downloading datasets-2.14.1-py3-none-any.whl (492 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m492.4/492.4 kB[0m [31m48.9 MB/s[0m eta [36m0:00:00[0m
Collecting fancy-einsum>=0.0.3
  Downloading fancy_einsum-0.0.3-py3-none-any.whl (6.2 kB)
Collecting einops>=0.6.0
  Downloading einops-0.6.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:

IN_COLAB = False
from IPython import get_ipython

ipython = get_ipython()
# Code to automatically update the HookedTransformer code as its edited without restarting the kernel
ipython.magic("load_ext autoreload")
ipython.magic("autoreload 2")

  ipython.magic("load_ext autoreload")
  ipython.magic("autoreload 2")


In [4]:
import os
import pathlib
from typing import List, Optional, Union
from collections import namedtuple

import torch
import numpy as np
import yaml

import einops
from fancy_einsum import einsum

from datasets import load_dataset
from transformers import pipeline
import plotly.io as pio
import plotly.express as px
#import pysvelte
from IPython.display import HTML

import plotly.graph_objs as go
import ipywidgets as widgets
from IPython.display import display

# if IN_COLAB or not DEBUG_MODE:
#     # Thanks to annoying rendering issues, Plotly graphics will either show up in colab OR Vscode depending on the renderer - this is bad for developing demos! Thus creating a debug mode.
#     pio.renderers.default = "colab"
# else:
#     pio.renderers.default = "plotly_mimetype+notebook"

if torch.cuda.is_available():
    device = int(os.environ.get("LOCAL_RANK", 0))
else:
    device = "cpu"

In [5]:
import transformers
from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
import transformer_lens
import transformer_lens.utils as utils
import transformer_lens.patching as patching
from transformer_lens.hook_points import (
    HookedRootModule,
    HookPoint,
)  # Hooking utilities
from transformer_lens import HookedTransformer, HookedTransformerConfig, FactoredMatrix, ActivationCache

from functools import partial

from torchtyping import TensorType as TT

In [6]:
from model_utils import (
    load_model,
    clear_gpu_memory
)
import circuit_utils as cu

In [7]:
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fda6ea95970>

In [8]:
from neel_plotly import line, imshow, scatter

def l_imshow(tensor, renderer=None, **kwargs):
    px.imshow(utils.to_numpy(tensor), color_continuous_midpoint=0.0, color_continuous_scale="RdBu", **kwargs).show(renderer)

def l_line(tensor, renderer=None, **kwargs):
    px.line(y=utils.to_numpy(tensor), **kwargs).show(renderer)

def l_scatter(x, y, xaxis="", yaxis="", caxis="", renderer=None, **kwargs):
    x = utils.to_numpy(x)
    y = utils.to_numpy(y)
    px.scatter(y=y, x=x, labels={"x":xaxis, "y":yaxis, "color":caxis}, **kwargs).show(renderer)

def two_lines(tensor1, tensor2, renderer=None, **kwargs):
    px.line(y=[utils.to_numpy(tensor1), utils.to_numpy(tensor2)], **kwargs).show(renderer)

## Analysis Functions

In [9]:
def get_induction_scores(model):
    seq_len = 100
    batch_size = 2

    prev_token_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads), device="cuda")

    def prev_token_hook(pattern, hook):
        layer = hook.layer()
        diagonal = pattern.diagonal(offset=1, dim1=-1, dim2=-2)
        prev_token_scores[layer] = einops.reduce(diagonal, "batch head_index diagonal -> head_index", "mean")

    duplicate_token_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads), device="cuda")

    def duplicate_token_hook(pattern, hook):
        layer = hook.layer()
        diagonal = pattern.diagonal(offset=seq_len, dim1=-1, dim2=-2)
        duplicate_token_scores[layer] = einops.reduce(diagonal, "batch head_index diagonal -> head_index", "mean")

    induction_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads), device="cuda")

    def induction_hook(pattern, hook):
        layer = hook.layer()
        diagonal = pattern.diagonal(offset=seq_len-1, dim1=-1, dim2=-2)
        induction_scores[layer] = einops.reduce(diagonal, "batch head_index diagonal -> head_index", "mean")

    original_tokens = torch.randint(100, 20000, size=(batch_size, seq_len))
    repeated_tokens = einops.repeat(original_tokens, "batch seq_len -> batch (2 seq_len)").cuda()

    pattern_filter = lambda act_name: act_name.endswith("hook_pattern")
    loss = model.run_with_hooks(repeated_tokens, return_type="loss", fwd_hooks=[(pattern_filter, prev_token_hook), (pattern_filter, duplicate_token_hook), (pattern_filter, induction_hook)])

    return induction_scores, prev_token_scores, duplicate_token_scores

## Pythia Analysis

### Pythia 160M

In [17]:
model_name = "pythia-160m"

model_full_name = f"EleutherAI/{model_name}"

cache_dir="/media/curttigges/project-files/projects/circuits"

In [18]:
model = HookedTransformer.from_pretrained(
    model_full_name,
    center_unembed=True,
    center_writing_weights=True,
    fold_ln=True,
    #refactor_factored_attn_matrices=True,
    cache_dir=cache_dir,
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/567 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.


Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


In [19]:
from tqdm import tqdm

steps = [2**n for n in range(10)] + [n * 1000 for n in range(1, 10)] # + [n * 1000 for n in range(10, 143, 5)]
labels = [f"Step {n}" for n in steps]
induction_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
induction_maxes = torch.zeros(len(steps))
induction_sums = torch.zeros(len(steps))

duplication_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
duplication_maxes = torch.zeros(len(steps))
duplication_sums = torch.zeros(len(steps))

previous_token_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
previous_token_maxes = torch.zeros(len(steps))
previous_token_sums = torch.zeros(len(steps))

for i in tqdm(range(len(steps))):
    model = HookedTransformer.from_pretrained(
        model_full_name,
        center_unembed=True,
        center_writing_weights=True,
        fold_ln=True,
        #refactor_factored_attn_matrices=True,
        revision=f"step{steps[i]}",
        cache_dir=cache_dir,
    )

    induction_scores[:,:,i], previous_token_scores[:,:,i], duplication_scores[:,:,i] = get_induction_scores(model)
    induction_maxes[i] = induction_scores[:,:,i].max()
    induction_sums[i] = induction_scores[:,:,i].sum()

  0%|          | 0/19 [00:00<?, ?it/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
  5%|▌         | 1/19 [00:20<06:17, 21.00s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 11%|█         | 2/19 [00:44<06:17, 22.19s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 16%|█▌        | 3/19 [00:58<05:02, 18.88s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 21%|██        | 4/19 [01:41<07:02, 28.14s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 26%|██▋       | 5/19 [01:58<05:39, 24.24s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 32%|███▏      | 6/19 [02:19<05:00, 23.12s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 37%|███▋      | 7/19 [02:38<04:19, 21.64s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 42%|████▏     | 8/19 [02:58<03:54, 21.34s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 47%|████▋     | 9/19 [03:22<03:40, 22.01s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 53%|█████▎    | 10/19 [03:46<03:24, 22.69s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 58%|█████▊    | 11/19 [04:05<02:53, 21.63s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 63%|██████▎   | 12/19 [04:40<02:58, 25.47s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 68%|██████▊   | 13/19 [05:00<02:23, 23.90s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 74%|███████▎  | 14/19 [05:30<02:09, 25.85s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 79%|███████▉  | 15/19 [05:51<01:36, 24.21s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 84%|████████▍ | 16/19 [06:00<00:59, 19.78s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 89%|████████▉ | 17/19 [06:12<00:35, 17.52s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 95%|█████████▍| 18/19 [06:26<00:16, 16.40s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
100%|██████████| 19/19 [06:49<00:00, 21.57s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer





In [20]:
l_line(induction_maxes, hover_name=labels, log_x=True)


In [21]:
l_line(induction_sums, hover_name=labels, log_x=True)

In [22]:
from circuit_utils import visualize_tensor
visualize_tensor(induction_scores.cpu(), labels, zmin=-1, zmax=1)

IntSlider(value=0, description='Slice:', max=18)

Output()

### Pythia 410M

In [8]:
model_name = "pythia-410m"

model_full_name = f"EleutherAI/{model_name}"

cache_dir="/media/curttigges/project-files/projects/circuits"

In [9]:
model = HookedTransformer.from_pretrained(
    model_full_name,
    center_unembed=True,
    center_writing_weights=True,
    fold_ln=True,
    #refactor_factored_attn_matrices=True,
    cache_dir=cache_dir,
)

Using pad_token, but it is not set yet.


Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


In [11]:
from tqdm import tqdm

steps = [2**n for n in range(10)] + [n * 1000 for n in range(1, 10)] # + [n * 1000 for n in range(10, 143, 5)]
labels = [f"Step {n}" for n in steps]
induction_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
induction_maxes = torch.zeros(len(steps))
induction_sums = torch.zeros(len(steps))

duplication_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
duplication_maxes = torch.zeros(len(steps))
duplication_sums = torch.zeros(len(steps))

previous_token_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
previous_token_maxes = torch.zeros(len(steps))
previous_token_sums = torch.zeros(len(steps))

for i in tqdm(range(len(steps))):
    model = HookedTransformer.from_pretrained(
        model_full_name,
        center_unembed=True,
        center_writing_weights=True,
        fold_ln=True,
        #refactor_factored_attn_matrices=True,
        revision=f"step{steps[i]}",
        cache_dir=cache_dir,
    )

    induction_scores[:,:,i], previous_token_scores[:,:,i], duplication_scores[:,:,i] = get_induction_scores(model)
    induction_maxes[i] = induction_scores[:,:,i].max()
    induction_sums[i] = induction_scores[:,:,i].sum()

  0%|          | 0/19 [00:00<?, ?it/s]

Using pad_token, but it is not set yet.
  5%|▌         | 1/19 [00:03<00:55,  3.08s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 11%|█         | 2/19 [00:05<00:49,  2.92s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 16%|█▌        | 3/19 [00:08<00:46,  2.91s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 21%|██        | 4/19 [00:12<00:49,  3.31s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 26%|██▋       | 5/19 [00:16<00:46,  3.34s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 32%|███▏      | 6/19 [00:19<00:42,  3.31s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 37%|███▋      | 7/19 [00:23<00:41,  3.49s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 42%|████▏     | 8/19 [00:26<00:36,  3.33s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 47%|████▋     | 9/19 [00:30<00:35,  3.59s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 53%|█████▎    | 10/19 [00:33<00:31,  3.55s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 58%|█████▊    | 11/19 [00:36<00:26,  3.36s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 63%|██████▎   | 12/19 [00:39<00:22,  3.24s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 68%|██████▊   | 13/19 [00:42<00:19,  3.25s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Using pad_token, but it is not set yet.
 74%|███████▎  | 14/19 [00:48<00:19,  3.85s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/911M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 79%|███████▉  | 15/19 [01:26<00:57, 14.37s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/911M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 84%|████████▍ | 16/19 [02:06<01:05, 21.98s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/911M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 89%|████████▉ | 17/19 [02:47<00:55, 27.74s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/911M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 95%|█████████▍| 18/19 [03:24<00:30, 30.29s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/911M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
100%|██████████| 19/19 [04:04<00:00, 12.85s/it]

Loaded pretrained model EleutherAI/pythia-410m into HookedTransformer





In [12]:
l_line(induction_maxes, hover_name=labels, log_x=True)


In [13]:
l_line(induction_sums, hover_name=labels, log_x=True)

In [14]:
from circuit_utils import visualize_tensor
visualize_tensor(induction_scores.cpu(), labels, zmin=-1, zmax=1)

IntSlider(value=0, description='Slice:', max=18)

Output()

### Pythia 1.4B

In [8]:
model_name = "pythia-1.4b"

model_full_name = f"EleutherAI/{model_name}"

cache_dir="/media/curttigges/project-files/projects/circuits"

In [9]:
model = HookedTransformer.from_pretrained(
    model_full_name,
    center_unembed=True,
    center_writing_weights=True,
    fold_ln=True,
    #refactor_factored_attn_matrices=True,
    cache_dir=cache_dir,
)

Using pad_token, but it is not set yet.


Loaded pretrained model EleutherAI/pythia-1.4b into HookedTransformer


In [10]:
from tqdm import tqdm

steps = [2**n for n in range(10)] + [n * 1000 for n in range(1, 5)] # + [n * 1000 for n in range(10, 143, 5)]
labels = [f"Step {n}" for n in steps]
induction_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
induction_maxes = torch.zeros(len(steps))
induction_sums = torch.zeros(len(steps))

duplication_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
duplication_maxes = torch.zeros(len(steps))
duplication_sums = torch.zeros(len(steps))

previous_token_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
previous_token_maxes = torch.zeros(len(steps))
previous_token_sums = torch.zeros(len(steps))

for i in tqdm(range(len(steps))):
    model = HookedTransformer.from_pretrained(
        model_full_name,
        center_unembed=True,
        center_writing_weights=True,
        fold_ln=True,
        #refactor_factored_attn_matrices=True,
        revision=f"step{steps[i]}",
        cache_dir=cache_dir,
    )

    induction_scores[:,:,i], previous_token_scores[:,:,i], duplication_scores[:,:,i] = get_induction_scores(model)
    induction_maxes[i] = induction_scores[:,:,i].max()
    induction_sums[i] = induction_scores[:,:,i].sum()

  0%|          | 0/14 [00:00<?, ?it/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/2.93G [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
  7%|▋         | 1/14 [01:30<19:36, 90.53s/it]

Loaded pretrained model EleutherAI/pythia-1.4b into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/2.93G [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
  7%|▋         | 1/14 [02:53<37:34, 173.41s/it]


In [None]:
l_line(induction_maxes, hover_name=labels, log_x=True)


In [None]:
l_line(induction_sums, hover_name=labels, log_x=True)

In [None]:
from circuit_utils import visualize_tensor
visualize_tensor(induction_scores.cpu(), labels, zmin=-1, zmax=1)

IntSlider(value=0, description='Slice:', max=18)

Output()

### Pythia 2.8B

In [None]:
model_name = "pythia-160m"

model_full_name = f"EleutherAI/{model_name}"

cache_dir="/media/curttigges/project-files/projects/circuits"

In [None]:
model = HookedTransformer.from_pretrained(
    model_full_name,
    center_unembed=True,
    center_writing_weights=True,
    fold_ln=True,
    #refactor_factored_attn_matrices=True,
    cache_dir=cache_dir,
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/567 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.


Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


In [None]:
from tqdm import tqdm

steps = [2**n for n in range(10)] + [n * 1000 for n in range(1, 10)] # + [n * 1000 for n in range(10, 143, 5)]
labels = [f"Step {n}" for n in steps]
induction_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
induction_maxes = torch.zeros(len(steps))
induction_sums = torch.zeros(len(steps))

duplication_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
duplication_maxes = torch.zeros(len(steps))
duplication_sums = torch.zeros(len(steps))

previous_token_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
previous_token_maxes = torch.zeros(len(steps))
previous_token_sums = torch.zeros(len(steps))

for i in tqdm(range(len(steps))):
    model = HookedTransformer.from_pretrained(
        model_full_name,
        center_unembed=True,
        center_writing_weights=True,
        fold_ln=True,
        #refactor_factored_attn_matrices=True,
        revision=f"step{steps[i]}",
        cache_dir=cache_dir,
    )

    induction_scores[:,:,i], previous_token_scores[:,:,i], duplication_scores[:,:,i] = get_induction_scores(model)
    induction_maxes[i] = induction_scores[:,:,i].max()
    induction_sums[i] = induction_scores[:,:,i].sum()

  0%|          | 0/19 [00:00<?, ?it/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
  5%|▌         | 1/19 [00:20<06:17, 21.00s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 11%|█         | 2/19 [00:44<06:17, 22.19s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 16%|█▌        | 3/19 [00:58<05:02, 18.88s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 21%|██        | 4/19 [01:41<07:02, 28.14s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 26%|██▋       | 5/19 [01:58<05:39, 24.24s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 32%|███▏      | 6/19 [02:19<05:00, 23.12s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 37%|███▋      | 7/19 [02:38<04:19, 21.64s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 42%|████▏     | 8/19 [02:58<03:54, 21.34s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 47%|████▋     | 9/19 [03:22<03:40, 22.01s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 53%|█████▎    | 10/19 [03:46<03:24, 22.69s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 58%|█████▊    | 11/19 [04:05<02:53, 21.63s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 63%|██████▎   | 12/19 [04:40<02:58, 25.47s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 68%|██████▊   | 13/19 [05:00<02:23, 23.90s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 74%|███████▎  | 14/19 [05:30<02:09, 25.85s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 79%|███████▉  | 15/19 [05:51<01:36, 24.21s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 84%|████████▍ | 16/19 [06:00<00:59, 19.78s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 89%|████████▉ | 17/19 [06:12<00:35, 17.52s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 95%|█████████▍| 18/19 [06:26<00:16, 16.40s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
100%|██████████| 19/19 [06:49<00:00, 21.57s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer





In [None]:
l_line(induction_maxes, hover_name=labels, log_x=True)


In [None]:
l_line(induction_sums, hover_name=labels, log_x=True)

In [None]:
from circuit_utils import visualize_tensor
visualize_tensor(induction_scores.cpu(), labels, zmin=-1, zmax=1)

IntSlider(value=0, description='Slice:', max=18)

Output()

### Pythia 6.9B

In [10]:
model_name = "pythia-6.9b"

model_full_name = f"EleutherAI/{model_name}"

cache_dir="model_cache"

In [11]:
from transformers import AutoModelForCausalLM

def load_model(model_name):
    if model_name == "EleutherAI/pythia-6.9b" or model_name == "EleutherAI/pythia-12b":
        print("Loading in bfloat16 model")
        source_model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir="model_cache").to('cpu').half()
        print("Loading into HookedTransformer")
        model = HookedTransformer.from_pretrained(
            model_name,
            center_unembed=True,
            center_writing_weights=True,
            fold_ln=True,
            refactor_factored_attn_matrices=False,
            hf_model=source_model,
        )
    else:
        model = HookedTransformer.from_pretrained(
            model_name,
            center_unembed=True,
            center_writing_weights=True,
            fold_ln=True,
            refactor_factored_attn_matrices=False
        )
    return model


In [12]:
model = load_model("EleutherAI/pythia-6.9b")
#model.set_use_hook_mlp_in(True)

Loading in bfloat16 model


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Using pad_token, but it is not set yet.


Loading into HookedTransformer


RuntimeError: "baddbmm_with_gemm" not implemented for 'Half'

In [None]:
from tqdm import tqdm

steps = [2**n for n in range(10)] + [n * 1000 for n in range(1, 10)] # + [n * 1000 for n in range(10, 143, 5)]
labels = [f"Step {n}" for n in steps]
induction_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
induction_maxes = torch.zeros(len(steps))
induction_sums = torch.zeros(len(steps))

duplication_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
duplication_maxes = torch.zeros(len(steps))
duplication_sums = torch.zeros(len(steps))

previous_token_scores = torch.zeros((model.cfg.n_layers, model.cfg.n_heads, len(steps)), device="cuda")
previous_token_maxes = torch.zeros(len(steps))
previous_token_sums = torch.zeros(len(steps))

for i in tqdm(range(len(steps))):
    model = HookedTransformer.from_pretrained(
        model_full_name,
        center_unembed=True,
        center_writing_weights=True,
        fold_ln=True,
        #refactor_factored_attn_matrices=True,
        revision=f"step{steps[i]}",
        cache_dir=cache_dir,
    )

    induction_scores[:,:,i], previous_token_scores[:,:,i], duplication_scores[:,:,i] = get_induction_scores(model)
    induction_maxes[i] = induction_scores[:,:,i].max()
    induction_sums[i] = induction_scores[:,:,i].sum()

  0%|          | 0/19 [00:00<?, ?it/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
  5%|▌         | 1/19 [00:20<06:17, 21.00s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 11%|█         | 2/19 [00:44<06:17, 22.19s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 16%|█▌        | 3/19 [00:58<05:02, 18.88s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 21%|██        | 4/19 [01:41<07:02, 28.14s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 26%|██▋       | 5/19 [01:58<05:39, 24.24s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 32%|███▏      | 6/19 [02:19<05:00, 23.12s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 37%|███▋      | 7/19 [02:38<04:19, 21.64s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 42%|████▏     | 8/19 [02:58<03:54, 21.34s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 47%|████▋     | 9/19 [03:22<03:40, 22.01s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 53%|█████▎    | 10/19 [03:46<03:24, 22.69s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 58%|█████▊    | 11/19 [04:05<02:53, 21.63s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 63%|██████▎   | 12/19 [04:40<02:58, 25.47s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 68%|██████▊   | 13/19 [05:00<02:23, 23.90s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 74%|███████▎  | 14/19 [05:30<02:09, 25.85s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 79%|███████▉  | 15/19 [05:51<01:36, 24.21s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 84%|████████▍ | 16/19 [06:00<00:59, 19.78s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 89%|████████▉ | 17/19 [06:12<00:35, 17.52s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
 95%|█████████▍| 18/19 [06:26<00:16, 16.40s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/166M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.
100%|██████████| 19/19 [06:49<00:00, 21.57s/it]

Loaded pretrained model EleutherAI/pythia-70m into HookedTransformer





In [None]:
l_line(induction_maxes, hover_name=labels, log_x=True)


In [None]:
l_line(induction_sums, hover_name=labels, log_x=True)

In [None]:
from circuit_utils import visualize_tensor
visualize_tensor(induction_scores.cpu(), labels, zmin=-1, zmax=1)

IntSlider(value=0, description='Slice:', max=18)

Output()