In [1]:
import einops
import torch
import numpy as np
import matplotlib.pyplot as plt
from transformer_lens import HookedTransformer, HookedTransformerConfig, utils


In [2]:
# model parameter constants
N_LAYERS = 1
N_HEADS = 1
D_MODEL = 32
D_HEAD = 32
D_MLP = None
D_VOCAB = 64
SEED = 123
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

Generating lists of a fixed length parameter

In [3]:
# data generation
import random

FIXED_LENGTH = 3

def generateLists(n):
    output = []
    for _ in range(n):
        curr = []
        for _ in range(FIXED_LENGTH):
            curr.append(random.randint(0, D_VOCAB - 1))

        # maximum = max(curr)
        # output.append((curr, maximum))
        output.append(curr)

    output = torch.tensor(output)
    return output

Model paramters. We are using one layer, one attention head (which pays to the 
tokens in contex to another), the dimensions of the model, dimension of the head,
vocab is the size of the logits

In [11]:
# model setup
cfg = HookedTransformerConfig(
    d_model=D_MODEL,
    n_layers=N_LAYERS,
    n_heads=N_HEADS,
    d_head=D_HEAD,
    n_ctx=FIXED_LENGTH,
    d_vocab=D_VOCAB,
    act_fn="relu",
    seed=SEED,
    device=DEVICE,
    attn_only=True
)

# hooked transformer used for interpretation later
model = HookedTransformer(cfg, move_to_device=True)

In [22]:
def loss_function(logits, tokens, return_per_token=True, print_tokens=False):
    # we take the last element of the logits to make the next prediction
    logits = logits[:, -1, :]
    answer = torch.max(tokens, dim=1)[0]
    log_prob = logits.log_softmax(-1)
    if print_tokens:
        print("tokens", tokens)
        print("predicted", torch.argmax(logits, dim=-1))
    # shape is (batch_size, 1) which represents probabilities 
    # of the correct answer
    output_prob = log_prob.gather(-1, answer.unsqueeze(-1))
    if return_per_token:
        return -1 * output_prob.squeeze()
    return -1 * output_prob.mean()

In [13]:
def accuracy(logits, tokens, return_per_token=False):
    logits = logits[:, -1, :]
    predicted = torch.argmax(logits, dim=1)
    answer = torch.max(tokens, dim=1)[0]
    if return_per_token:
        return (predicted == answer).float()
    return (predicted == answer).float().mean().item()

In [14]:
x = torch.tensor([[1], [2], [3], [4]])

In [23]:
def train_model(model, n_epochs, batch_size, batches_per, sequence_length=2):
    lr = 1e-3
    betas = (0.9, 0.999)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=betas)

    train_losses = []
    for epoch in range(n_epochs):
        epoch_losses = []
        for _ in range(batches_per):
            tokens = generateLists(batch_size)
            logits = model(tokens)
            # print(tokens.shape)
            # print(logits.shape)
            losses = loss_function(logits, tokens, print_tokens=True)
            losses.mean().backward()
            optimizer.step()
            optimizer.zero_grad()
            epoch_losses.extend(losses.detach())

        train_losses.append(np.mean(epoch_losses))
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, train loss: {train_losses[-1]}")

    model.eval()
    test_data = generateLists(1280)
    logits = model(test_data)
    acc = accuracy(logits, test_data, return_per_token=False)

    print(f"Test accuracy: {acc}")

    return losses
                

In [24]:
losses = train_model(model, 1, 128, 10, 3)

tokens tensor([[31, 53, 37],
        [ 4, 29, 44],
        [33,  2, 38],
        [ 9, 17, 27],
        [13, 19, 12],
        [24, 52,  9],
        [29, 44, 58],
        [ 9, 35, 60],
        [24, 35, 62],
        [30, 56, 31],
        [ 1, 15, 23],
        [19, 49, 59],
        [60, 63, 50],
        [59, 14,  4],
        [17, 47, 30],
        [32, 48, 52],
        [32, 63, 59],
        [33, 61, 36],
        [ 4,  5, 51],
        [37, 32, 42],
        [59, 46, 15],
        [59, 63, 35],
        [31, 21, 31],
        [52, 51, 29],
        [41, 50, 61],
        [53, 10,  3],
        [52,  8, 36],
        [37, 28, 27],
        [39, 16, 26],
        [ 3, 21, 40],
        [54, 47, 49],
        [59, 57, 24],
        [43, 28, 42],
        [49, 58, 58],
        [54,  1, 25],
        [ 8, 10, 35],
        [32,  2, 28],
        [17, 17, 12],
        [35, 44, 35],
        [52, 60, 51],
        [44, 37,  7],
        [24, 31, 43],
        [42, 48, 14],
        [21, 53,  2],
        [53, 31,  0],
   