# microsoft/phi Demo

Imports

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformer_lens import HookedTransformer
import gc

Set Model Name (ie. microsoft/phi-1, microsoft/phi-1.5 or microsoft/phi-2)

In [2]:
torch.set_grad_enabled(False)
model_name = "microsoft/phi-2"

Load in model from Hugging Face

In [3]:
hf_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, add_bos_token = True, use_fast=False, trust_remote_code=True)
hf_model.eval().cuda()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


PhiForCausalLM(
  (model): PhiModel(
    (embed_tokens): Embedding(51200, 2560)
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-31): 32 x PhiDecoderLayer(
        (self_attn): PhiAttention(
          (q_proj): Linear(in_features=2560, out_features=2560, bias=True)
          (k_proj): Linear(in_features=2560, out_features=2560, bias=True)
          (v_proj): Linear(in_features=2560, out_features=2560, bias=True)
          (dense): Linear(in_features=2560, out_features=2560, bias=True)
          (rotary_emb): PhiRotaryEmbedding()
        )
        (mlp): PhiMLP(
          (activation_fn): NewGELUActivation()
          (fc1): Linear(in_features=2560, out_features=10240, bias=True)
          (fc2): Linear(in_features=10240, out_features=2560, bias=True)
        )
        (input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (final_layernorm): LayerNorm((2560,),

Demo inputs

In [4]:
text = '''
TransformerLens lets you load in 50+ different open source language models,
and exposes the internal activations of the model to you. You can cache
any internal activation in the model, and add in functions to edit, remove
or replace these activations as the model runs.
'''
input_ids = tokenizer(text, return_tensors='pt')['input_ids'].cuda()

Generate Hugging Face outputs

In [5]:
with torch.no_grad():
    outputs = hf_model.generate(input_ids, max_length=200)
    text = tokenizer.batch_decode(outputs)[0]
    print(text)

<|endoftext|>
TransformerLens lets you load in 50+ different open source language models,
and exposes the internal activations of the model to you. You can cache
any internal activation in the model, and add in functions to edit, remove
or replace these activations as the model runs.

TransformerLens is written in Python 3.6, and uses the HuggingFace Transformers
library.

TransformerLens is released under the MIT License.
"""

import os
import sys
import json
import logging
import argparse
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data.dataset import Dataset
from torch.utils.data.sampler import RandomSampler
from torch.utils.


Store Hugging Face model logits and resid_pre cache

In [6]:
with torch.no_grad():
    hf_outputs = hf_model(input_ids, output_hidden_states=True)
    hf_logits_cpu = hf_outputs["logits"].cpu()
    hf_resid_pre_cache = hf_outputs["hidden_states"]
    hf_resid_pre_cache_cpu = [cache.cpu() for cache in hf_resid_pre_cache]
    hf_outputs = hf_model(input_ids, labels=input_ids)
    hf_loss_cpu = hf_outputs.loss.cpu()

del hf_model
del hf_outputs
del hf_resid_pre_cache
gc.collect()
torch.cuda.empty_cache()

Load in Hooked Phi

In [7]:
hooked_phi = HookedTransformer.from_pretrained(model_name,
                                        tokenizer=tokenizer,
                                        fold_ln=False, 
                                        fold_value_biases=False, 
                                        center_writing_weights=False)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loaded pretrained model microsoft/phi-2 into HookedTransformer


Store Hooked Phi logits and resid_pre cache

In [8]:
with torch.no_grad():
    hooked_phi_logits, hook_phi_cache = hooked_phi.run_with_cache(input_ids)
    hooked_phi_loss = hooked_phi(input_ids, return_type='loss')
    hooked_phi_loss_cpu = hooked_phi_loss.cpu()
    hooked_phi_logits_cpu = hooked_phi_logits.detach().cpu()
    hook_phi_cache_cpu = {k: v.cpu() for k, v in hook_phi_cache.items()}
    n_layers = hooked_phi.cfg.n_layers

del hooked_phi
del hooked_phi_logits
del hook_phi_cache
del hooked_phi_loss

gc.collect()
torch.cuda.empty_cache()

Compare Logits

In [9]:
centered_hf_logits = hf_logits_cpu - hf_logits_cpu.mean(-1, keepdim=True)
mean_diff = (hooked_phi_logits_cpu - centered_hf_logits).mean()
print("avg logits difference:", mean_diff.item())
max_diff = (hooked_phi_logits_cpu - centered_hf_logits).abs().max()
print("max logits difference:", max_diff.item())

avg logits difference: -4.644679307830302e-08
max logits difference: 0.0026350021362304688


Compare resid_pre activations

In [10]:
use_loose_bound = True
pass_loose_bound = True
print("*"*5, "Matching hf and T-Lens residual stream in between transformer blocks", "*"*5)
# for l in range(hooked_phi.cfg.n_layers):
#     try:
#         torch.testing.assert_close(hook_phi_cache_cpu[f'blocks.{l}.hook_resid_pre'], 
#                                    hf_resid_pre_cache_cpu[l], 
#                                    atol = 1e-5, rtol = 1e-5)
#     except:
#         max_diff = (hook_phi_cache_cpu[f'blocks.{l}.hook_resid_pre'] - hf_resid_pre_cache_cpu[l]).abs().max()
#         print(f"layer {l} \t not close, max difference: {max_diff}")
#         use_loose_bound = True

if use_loose_bound:
    atol = rtol = 1e-2
    print("*"*5, f"\ttesting with {atol=} and {rtol=}\t","*"*5)
    for l in range(n_layers):
        try:
            torch.testing.assert_close(hook_phi_cache_cpu[f'blocks.{l}.hook_resid_pre'], hf_resid_pre_cache_cpu[l], atol=atol, rtol=rtol)
        except:
            max_diff = (hook_phi_cache_cpu[f'blocks.{l}.hook_resid_pre'] - hf_resid_pre_cache_cpu[l]).abs().max()
            print(f"layer {l} \t not close, max difference: {max_diff}")
            pass_loose_bound = False

    if pass_loose_bound:
        print(f"All layers match with {atol=} {rtol=}")
else: 
    print("All layers match")

***** Matching hf and T-Lens residual stream in between transformer blocks *****
***** 	testing with atol=0.01 and rtol=0.01	 *****
All layers match with atol=0.01 rtol=0.01


In [11]:
print("T-Lens next token loss:", hooked_phi_loss_cpu.item())
print("HF next token loss:", hf_loss_cpu.item())
print("diff in loss (abs):", (hf_loss_cpu-hooked_phi_loss_cpu).abs().item())

T-Lens next token loss: 3.6289827823638916
HF next token loss: 3.6289584636688232
diff in loss (abs): 2.4318695068359375e-05
