In [1]:
import torch
import transformer_lens
from transformers import AutoTokenizer, AutoModelForCausalLM

torch.set_grad_enabled(False)

device = 'cuda'
model_name = 'google/gemma-2-2b-it'
tl_model = transformer_lens.HookedTransformer.from_pretrained(model_name, device=device)

tokenizer = AutoTokenizer.from_pretrained(model_name)
hf_model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

inputs = tokenizer('Hello world', return_tensors="pt").to(device)

logits_tl = tl_model(inputs.input_ids, return_type='logits', prepend_bos=False)
logits_hf = hf_model(**inputs).logits

print((logits_tl[0, -1] - logits_hf[0, -1]).mean()) # 0.1159
print((logits_hf[0, -1]).min(), (logits_hf[0, -1]).max()) # -19.6916 16.0789



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Loaded pretrained model google/gemma-2-2b-it into HookedTransformer


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

tensor(0.1159, device='cuda:0')
tensor(-19.6916, device='cuda:0') tensor(16.0789, device='cuda:0')


In [1]:
import torch
import transformer_lens
from transformers import AutoTokenizer, AutoModelForCausalLM

torch.set_grad_enabled(False)

device = 'cuda'
model_name = 'google/gemma-2-2b-it'
tl_model = transformer_lens.HookedTransformer.from_pretrained_no_processing(model_name, device=device)

tokenizer = AutoTokenizer.from_pretrained(model_name)
hf_model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

inputs = tokenizer('Hello world', return_tensors="pt").to(device)

logits_tl = tl_model(inputs.input_ids, return_type='logits', prepend_bos=False)
logits_hf = hf_model(**inputs).logits

print((logits_tl[0, -1] - logits_hf[0, -1]).mean()) # 0.1159
print((logits_hf[0, -1]).min(), (logits_hf[0, -1]).max()) # -19.6916 16.0789

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loaded pretrained model google/gemma-2-2b-it into HookedTransformer


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

tensor(0.1159, device='cuda:0')
tensor(-19.6916, device='cuda:0') tensor(16.0789, device='cuda:0')


In [None]:
text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."


In [3]:
hf_cache = forward_with_cache(hf_model, 0, inputs)

In [6]:
_, tl_cache = tl_model.run_with_cache(inputs.input_ids, prepend_bos=False)

In [12]:
tl_cache = tl_cache['blocks.0.hook_resid_post']

In [8]:
hf_cache.shape

torch.Size([1, 3, 2304])

In [26]:
def forward_with_cache(model, layer, inputs):
    cache = None
    def hook(module, inputs, outputs):
        nonlocal cache
        cache = inputs[0]
        return outputs
    
    hook_handle = model.model.layers[layer].register_forward_hook(hook)
    _ = model(**inputs)
    hook_handle.remove()

    return cache

resid_pre_diffs = []

for layer in range(tl_model.cfg.n_layers):
    hf_cache = forward_with_cache(hf_model, layer, inputs)
    _, tl_cache = tl_model.run_with_cache(inputs.input_ids, prepend_bos=False, names_filter=[f'blocks.{layer}.hook_resid_pre'])
    tl_cache = tl_cache[f'blocks.{layer}.hook_resid_pre']
    resid_pre_diff = (hf_cache - tl_cache)[0, -1].norm().item()
    resid_pre_diffs.append(resid_pre_diff)


In [27]:
import plotly.express as px
px.line(resid_pre_diffs, markers=True, labels={'index': 'Layer', 'value': 'norm of resid pre diff'}, title='Difference in resid_pre between HF and TL')

: 

In [22]:
hf_cache[0, -1].norm()

tensor(679.4432, device='cuda:0')

## compare gemma2-2b-it and gemma2-2b

In [1]:
import torch
import transformer_lens
from transformers import AutoTokenizer, AutoModelForCausalLM

torch.set_grad_enabled(False)

device = 'cuda'
base_model_name = 'google/gemma-2-2b'
instruct_model_name = 'google/gemma-2-2b-it'

tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name).to(device)
instruct_mode = AutoModelForCausalLM.from_pretrained(instruct_model_name).to(device)

inputs = tokenizer('Hello world', return_tensors="pt").to(device)

logits_base = base_model(**inputs).logits
logits_instruct = instruct_mode(**inputs).logits

print(logits_base[0, -1].mean(), logits_instruct[0, -1].mean())

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

tensor(1.4324, device='cuda:0') tensor(-6.3065, device='cuda:0')
