# Transformer






In [1]:
import sys
import os 
import gc

# Add the test directory to sys.path
parent_dir = os.path.expanduser('..')
sys.path.append(parent_dir)



# Standard library imports
import importlib
import gc
import copy

# Third-party imports
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import einops
import matplotlib.pyplot as plt
from transformers import GPT2Model, GPT2Config, GPT2Tokenizer
from datasets import load_dataset
from transformer_lens.utils import tokenize_and_concatenate

from transformers import AutoModelForCausalLM, AutoTokenizer, GPT2LMHeadModel
import transformer_lens


# Reload modules using importlib
importlib.reload(importlib.import_module('eigenestimation.eigenhora'))
importlib.reload(importlib.import_module('eigenestimation.loss'))
importlib.reload(importlib.import_module('eigenestimation.train'))
importlib.reload(importlib.import_module('evaluation.examples'))
importlib.reload(importlib.import_module('toy_models.transformer_wrapper'))
importlib.reload(importlib.import_module('eigenestimation.utils'))



from eigenestimation.eigenhora import EigenHora
from eigenestimation import loss
from eigenestimation.train import Train
from evaluation.examples import TopActivatingTexts
from toy_models import transformer_wrapper
from eigenestimation.utils import TransformDataLoader, DeleteParams, RetrieveWandBArtifact

device = 'cuda'

## Set up

In [2]:
# @title Import pretrained gpt2 (2 layers)
# Disable fused kernels (FlashAttention and memory-efficient attention)
# We have to disable this to compute second-order gradients on transformer models.
torch.backends.cuda.enable_flash_sdp(False)
torch.backends.cuda.enable_mem_efficient_sdp(False)

# Ensure the math kernel is enabled (it is True by default)
torch.backends.cuda.enable_math_sdp(True)


tinystories_1m  = transformer_lens.HookedTransformer.from_pretrained("roneneldan/TinyStories-1M")#
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
tokenizer.pad_token = tokenizer.eos_token
transformer_model = transformer_wrapper.TransformerWrapper(tinystories_1m, tokenizer)


print( [(name, param.numel()) for name, param in transformer_model.named_parameters()])


# Make the eigenestimation a little smaller but only looking at a subset of the parameters.
# Pick a random subset of tensors to include in paramters, and turn the rest into frozen buffers.
params_to_delete = [name for name, param in transformer_model.named_parameters()]
params_to_delete = [p for p in params_to_delete if #('blocks.4.attn.W' not in p)]# and ('blocks.6.mlp.W' not in p)]#!='transformer.h.1.ln_2.weight']
   'transformer.blocks.3.attn.W_K' not in p]#!='transformer.h.1.ln_2.weight']

# Delete 3/4 of the parameters.
#for p in (params_to_delete[::20]):
#  params_to_delete.remove(p)

DeleteParams(transformer_model, params_to_delete)

print(sum([p.numel() for p in transformer_model.parameters()]))
for n,p in transformer_model.named_parameters(): print(n, p.shape, p.numel())

# Load in data.
dataset = load_dataset('roneneldan/TinyStories', split="validation[:1%]")
X_transformer = tokenize_and_concatenate(dataset, transformer_model.tokenizer, max_length = 8, add_bos_token=False)['tokens']
print(X_transformer.shape)

Loaded pretrained model roneneldan/TinyStories-1M into HookedTransformer
[('transformer.embed.W_E', 3216448), ('transformer.pos_embed.W_pos', 131072), ('transformer.blocks.0.attn.W_Q', 4096), ('transformer.blocks.0.attn.W_O', 4096), ('transformer.blocks.0.attn.b_Q', 64), ('transformer.blocks.0.attn.b_O', 64), ('transformer.blocks.0.attn.W_K', 4096), ('transformer.blocks.0.attn.W_V', 4096), ('transformer.blocks.0.attn.b_K', 64), ('transformer.blocks.0.attn.b_V', 64), ('transformer.blocks.0.mlp.W_in', 16384), ('transformer.blocks.0.mlp.b_in', 256), ('transformer.blocks.0.mlp.W_out', 16384), ('transformer.blocks.0.mlp.b_out', 64), ('transformer.blocks.1.attn.W_Q', 4096), ('transformer.blocks.1.attn.W_O', 4096), ('transformer.blocks.1.attn.b_Q', 64), ('transformer.blocks.1.attn.b_O', 64), ('transformer.blocks.1.attn.W_K', 4096), ('transformer.blocks.1.attn.W_V', 4096), ('transformer.blocks.1.attn.b_K', 64), ('transformer.blocks.1.attn.b_V', 64), ('transformer.blocks.1.mlp.W_in', 16384), ('

In [24]:
tinystories_1m.generate('There was a boy.')

  0%|          | 0/10 [00:00<?, ?it/s]

'There was a boy. He was a boy who loved to play. One'

## Eigenestimation

In [10]:
gc.collect()
torch.cuda.empty_cache()

def transformer_model0(y):
    return torch.ones_like(y).softmax(dim=-1)


hora_features = 100
hora_rank = 1

eigenmodel = EigenHora(transformer_model, transformer_model0, loss.KLDivergenceLoss(), hora_features, hora_rank, device=device).to(device)
dataloader = TransformDataLoader(X_transformer[::10,:8], batch_size=8, transform_fn=eigenmodel.compute_jacobian)
eval_dataloader = TransformDataLoader(X_transformer[1::10,:8], batch_size=8, transform_fn=eigenmodel.compute_jacobian)

run_name =  'tiny_stores_transformer_1m'
Train(eigenmodel, dataloader, lr=.01, n_epochs=10, L0_penalty=.01, device=device, project_name='eigenestimation', run_name=run_name,
      eval_fns={TopActivatingTexts:[3]}, eval_dataloader=eval_dataloader)


Epoch 0 : 113.981,  Reconstruction Loss: 113.981,  Sparsity Loss: 0.000
Epoch 1 : 112.240,  Reconstruction Loss: 112.240,  Sparsity Loss: 0.000
Epoch 2 : 113.214,  Reconstruction Loss: 113.214,  Sparsity Loss: 0.000
Epoch 3 : 112.593,  Reconstruction Loss: 112.593,  Sparsity Loss: 0.000
Epoch 4 : 114.226,  Reconstruction Loss: 114.226,  Sparsity Loss: 0.000
Epoch 5 : 112.661,  Reconstruction Loss: 112.661,  Sparsity Loss: 0.000
Epoch 6 : 112.481,  Reconstruction Loss: 112.481,  Sparsity Loss: 0.000
Epoch 7 : 112.235,  Reconstruction Loss: 112.235,  Sparsity Loss: 0.000
Epoch 8 : 112.388,  Reconstruction Loss: 112.388,  Sparsity Loss: 0.000
Epoch 9 : 112.795,  Reconstruction Loss: 112.795,  Sparsity Loss: 0.000
evaluating...
TopActivatingTexts


0,1
reconstruction_loss,▇▁▄▂█▂▂▁▂▃
sparsity_loss,█▁▁▁▁▁▁▁▁▁
total_loss,▇▁▄▂█▂▂▁▂▃

0,1
reconstruction_loss,112.79547
sparsity_loss,0.0
total_loss,112.79547


In [11]:
top_texts = RetrieveWandBArtifact(project_path=f"brianna-chrisman-2024/Eigenestimation/eigenestimation_{run_name}", metric_name="TopActivatingTexts")
for feature_idx in top_texts:
    print(f'-----f{feature_idx}------')
    for val, _, _, text in top_texts[feature_idx]:
        print(f'{text}->{round(val, 5)}')

[34m[1mwandb[0m:   1 of 1 files downloaded.  


/root/workspace/eigenestimation/notebooks/artifacts/eigenestimation_tiny_stores_transformer_1m_TopActivatingTexts:v5
-----f0------
 with Max* and* have fun. They ran->0.0
 Max* and* Sue knew they had to help->0.0
 children in the classroom. The end*.*->0.0
-----f1------
 at mom. He looked* at* dad.->0.0
newline"Tom* and* Anna, you are->0.0
 Lily and Ben. They looked* at* Mom->0.0
-----f2------
newlinenewlineBut then Lily remembered what* her*->0.0
 and lots* of* fun.Once upon a->0.0
. What should they do?*newline*newline->0.0
-----f3------
 and their owners. *newline*newlineSuddenly->0.0
 pick them* and* hold them close to her->0.0
, he found a new truck* that*->0.0
-----f4------
.newlinenewlineWhen he got to* the*->0.0
 it. The ball goes faster* and* faster->0.0
, Jack went outside to play* and* saw->0.0
-----f5------
 mom bought him a trumpet*,* but he->0.0
. She says, "I* love* you->0.0
 talk. His mom found him* and* cried->0.0
-----f6------
 bear. They tell them that they* have*->0