<a target="_blank" href="https://colab.research.google.com/github/TransformerLensOrg/TransformerLens/blob/main/demos/LLaMA.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

## Setup (skip)

In [1]:
# NBVAL_IGNORE_OUTPUT
# Janky code to do different setup when run in a Colab notebook vs VSCode
import os

DEVELOPMENT_MODE = False
IN_VSCODE = False
IN_GITHUB = os.getenv("GITHUB_ACTIONS") == "true"

try:
    import google.colab
    IN_COLAB = True
    print("Running as a Colab notebook")
except:
    IN_COLAB = False
    print("Running as a Jupyter notebook - intended for development only!")
    from IPython import get_ipython

    ipython = get_ipython()
    # Code to automatically update the HookedTransformer code as its edited without restarting the kernel
    ipython.magic("load_ext autoreload")
    ipython.magic("autoreload 2")
    
# %pip install transformers>=4.31.0 # Llama requires transformers>=4.31.0 and transformers in turn requires Python 3.8
# %pip install sentencepiece # Llama tokenizer requires sentencepiece

if IN_COLAB or IN_GITHUB:
    %pip install torch
    %pip install transformer_lens
    %pip install circuitsvis
    
# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh
import plotly.io as pio
if IN_COLAB or not DEVELOPMENT_MODE:
    pio.renderers.default = "colab"
else:
    pio.renderers.default = "notebook_connected"
print(f"Using renderer: {pio.renderers.default}")

import circuitsvis as cv

Running as a Jupyter notebook - intended for development only!


  ipython.magic("load_ext autoreload")
  ipython.magic("autoreload 2")


Using renderer: colab


In [2]:
# Import stuff
import torch
import tqdm.auto as tqdm
import plotly.express as px

from transformers import (
    AutoTokenizer,
    LlavaNextForConditionalGeneration,
    LlavaNextProcessor,
    AutoModelForCausalLM,
)
# from transformers import ChameleonModel, AutoTokenizer
from tqdm import tqdm
from jaxtyping import Float



import sys
sys.path.append('/aifs4su/yaodong/changye/TransformerLens')
from transformer_lens import HookedTransformer
from transformer_lens.HookedLlava import HookedLlava
import transformer_lens
import transformer_lens.utils as utils
from transformer_lens.hook_points import (
    HookPoint,
)  # Hooking utilities
torch.set_grad_enabled(False)

def imshow(tensor, renderer=None, xaxis="", yaxis="", **kwargs):
    px.imshow(utils.to_numpy(tensor), color_continuous_midpoint=0.0, color_continuous_scale="RdBu", labels={"x":xaxis, "y":yaxis}, **kwargs).show(renderer)

def line(tensor, renderer=None, xaxis="", yaxis="", **kwargs):
    px.line(utils.to_numpy(tensor), labels={"x":xaxis, "y":yaxis}, **kwargs).show(renderer)

def scatter(x, y, xaxis="", yaxis="", caxis="", renderer=None, **kwargs):
    x = utils.to_numpy(x)
    y = utils.to_numpy(y)
    px.scatter(y=y, x=x, labels={"x":xaxis, "y":yaxis, "color":caxis}, **kwargs).show(renderer)

In [3]:
# MODEL_PATH = "/aifs4su/yaodong/projects/hantao/dev_cham/align-anything/outputs/0830_4k_sft_flux"
MODEL_PATH = "llava-hf/llava-v1.6-mistral-7b-hf"
# MODEL_PATH = "/aifs4su/yaodong/models/chameleon-7b-hf"
# MODEL_PATH="/aifs4su/yaodong/projects/hantao/anole/facilitating_image_generation/model/chameleon_hf_0830_4k"

processor = LlavaNextProcessor.from_pretrained(MODEL_PATH)
vision_model = LlavaNextForConditionalGeneration.from_pretrained(
        MODEL_PATH, 
        torch_dtype=torch.float32, 
        low_cpu_mem_usage=True
)

hf_model=vision_model.language_model

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [4]:
# 将 vision_tower 和 multi_modal_projector 分配到 cuda:0
vision_tower = vision_model.vision_tower.to("cuda:1")
multi_modal_projector = vision_model.multi_modal_projector.to("cuda:1")
    
    # HookedTransformer 语言模型分配到 cuda:1
hook_language_model = HookedLlava.from_pretrained(
        MODEL_PATH,
        hf_model=vision_model.language_model,
        vision_tower=vision_tower,
        multi_modal_projector=multi_modal_projector,
        device="cuda:1",  # 放在cuda:1
        fold_ln=False,
        center_writing_weights=False,
        center_unembed=False,
        tokenizer=None,
        dtype=torch.float32,
    )

Loaded pretrained model llava-hf/llava-v1.6-mistral-7b-hf into HookedTransformer


In [5]:
from PIL import Image
def process_image_and_generate_response(processor, vision_model, image_path):
    """
    加载图像并生成图像描述。
    """
    # 加载本地图像
    image = Image.open(image_path)
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What is shown in this image?"},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
    
    # 处理图像和文本输入
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:1")
    
    return inputs
image_path = "/aifs4su/yaodong/changye/TransformerLens/IMG_20230213_181559.jpg"
inputs = process_image_and_generate_response(processor, vision_model, image_path)

In [7]:
inputs_embeds, input_ids, position_ids, attention_mask=hook_language_model.get_embedding(inputs)
print(inputs_embeds)
print(input_ids)
print(position_ids)
print(attention_mask)

tensor([[[-4.5471e-03,  3.5286e-04, -5.1575e-03,  ...,  4.0770e-05,
          -1.0300e-03, -1.2779e-04],
         [-4.4861e-03, -5.1975e-05, -1.8234e-03,  ...,  2.4128e-04,
           4.0588e-03,  4.3678e-04],
         [ 1.5068e-04,  3.4142e-04, -2.4261e-03,  ..., -2.5787e-03,
           3.2501e-03, -2.8687e-03],
         ...,
         [-8.0490e-04,  1.2512e-03, -6.4850e-04,  ...,  3.5667e-04,
          -4.1809e-03,  1.8692e-04],
         [ 1.5068e-04,  3.4142e-04, -2.4261e-03,  ..., -2.5787e-03,
           3.2501e-03, -2.8687e-03],
         [-4.0283e-03, -1.8082e-03,  3.7384e-03,  ..., -2.1839e-04,
          -1.0376e-03,  3.2959e-03]]], device='cuda:1')
tensor([[    1,   733, 16289,  ..., 28748, 16289, 28793]], device='cuda:1')
tensor([[   0,    1,    2,  ..., 2367, 2368, 2369]], device='cuda:1')
tensor([[1, 1, 1,  ..., 1, 1, 1]], device='cuda:1')


In [9]:

device = "cuda:1"
hf_model_embeds = vision_model.get_input_embeddings()(input_ids.to(device))
print(hf_model_embeds)

TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not BatchFeature

## TransformerLens Demo

### Reading from hooks

In [None]:
llama_text = "Natural language processing tasks, such as question answering, machine translation, reading comprehension, and summarization, are typically approached with supervised learning on taskspecific datasets."
llama_tokens = model.to_tokens(llama_text)
llama_logits, llama_cache = model.run_with_cache(llama_tokens, remove_batch_dim=True)

attention_pattern = llama_cache["pattern", 0, "attn"]
llama_str_tokens = model.to_str_tokens(llama_text)

print("Layer 0 Head Attention Patterns:")
display(cv.attention.attention_patterns(tokens=llama_str_tokens, attention=attention_pattern))

### Writing to hooks

In [None]:
layer_to_ablate = 0
head_index_to_ablate = 31

# We define a head ablation hook
# The type annotations are NOT necessary, they're just a useful guide to the reader
# 
def head_ablation_hook(
    value: Float[torch.Tensor, "batch pos head_index d_head"],
    hook: HookPoint
) -> Float[torch.Tensor, "batch pos head_index d_head"]:
    print(f"Shape of the value tensor: {value.shape}")
    value[:, :, head_index_to_ablate, :] = 0.
    return value

original_loss = model(llama_tokens, return_type="loss")
ablated_loss = model.run_with_hooks(
    llama_tokens, 
    return_type="loss", 
    fwd_hooks=[(
        utils.get_act_name("v", layer_to_ablate), 
        head_ablation_hook
        )]
    )
print(f"Original Loss: {original_loss.item():.3f}")
print(f"Ablated Loss: {ablated_loss.item():.3f}")