In [44]:
# Import stuff
import torch
import torch.nn as nn
import torch.nn.functional as F
import einops
from fancy_einsum import einsum
import tqdm.auto as tqdm
import plotly.express as px
import numpy as np
from dataclasses import dataclass
import matplotlib.pyplot as plt

import os
import datetime
import pickle
import random

from jaxtyping import Float
from functools import partial

import wandb

In [45]:
import transformer_lens

In [46]:
DEVICE="cuda"

In [47]:
N_CTX = 64
D_VOCAB = 11  # 10 digits and a comma
cfg = transformer_lens.HookedTransformerConfig(
    n_layers=2,
    d_model=128,
    n_ctx=N_CTX,
    d_head=32,
    n_heads=4,
    d_mlp=None,
    d_vocab=D_VOCAB,
    act_fn="relu",
    seed=42,
    device=DEVICE,
    attn_only=True,
)
model = transformer_lens.HookedTransformer(cfg, move_to_device=True)

In [48]:
def load_model_state(model: transformer_lens.HookedTransformer, filename: str):
    assert os.path.isdir("models"), "Make a directory `models` with model state dicts"
    if not filename.startswith("models/"):
        filename = f"models/{filename}"
    with open(filename, "rb") as f:
        state_dict = pickle.load(f)
    model.load_state_dict(state_dict)

load_model_state(model, "addition_model_state_dict_2024-06-21T12-55.pkl")

In [49]:
def tokenize(c: str):
    return ord(c) - ord("0") if c.isdigit() else 10  # 10 is comma

def str_to_tokens(seq_str):
    return torch.tensor([tokenize(c) for c in seq_str], device=DEVICE)

In [50]:
tests = []

for _ in range(1000):
    a = random.randint(0, 300)
    b = random.randint(0, 300)
    test_example = f"{str(a).zfill(4)},{str(b).zfill(4)},"
    example_toks = str_to_tokens(test_example)
    if len(example_toks) < cfg.n_ctx:
        example_toks = F.pad(example_toks, (cfg.n_ctx - len(example_toks), 0), value=10)

    expected = a + b
    tests.append((example_toks, expected))

len(tests), tests[0]

(1000,
 (tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
          10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
          10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
           0,  0,  5,  7, 10,  0,  0,  1,  2, 10], device='cuda:0'),
  69))

In [51]:
correct_count = 0
incorrect = []
for (x, y) in tests:
    example_toks = x
    logits = model(example_toks)[:, -1, :]
    pred = logits.argmax(dim=-1)
    digits_ans = [str(pred[0].item())]

    for _ in range(3):
        example_toks = torch.cat([example_toks[1:], pred], dim=0)
        logits = model(example_toks)[:, -1, :]
        pred = logits.argmax(dim=-1)
        digits_ans.append(str(pred[0].item()))

    if y != int(''.join(digits_ans)):
        incorrect.append((x, y, digits_ans))
    else:
        correct_count += 1
correct_count, incorrect

(952,
 [(tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
           10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
           10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
            0,  0,  1,  3, 10,  0,  2,  8,  7, 10], device='cuda:0'),
   300,
   ['0', '2', '0', '0']),
  (tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
           10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
           10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
            0,  2,  3,  5, 10,  0,  2,  7,  4, 10], device='cuda:0'),
   509,
   ['0', '4', '0', '9']),
  (tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
           10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
           10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
            0,  2,  9,  9, 10,  0,  2, 