In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Tuple

from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
import numpy as np

In [8]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="bfloat16", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)
AutoConfig.from_pretrained(model_name)

Qwen2Config {
  "_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
  "architectures": [
    "Qwen2ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 151643,
  "eos_token_id": 151643,
  "hidden_act": "silu",
  "hidden_size": 1536,
  "initializer_range": 0.02,
  "intermediate_size": 8960,
  "max_position_embeddings": 131072,
  "max_window_layers": 21,
  "model_type": "qwen2",
  "num_attention_heads": 12,
  "num_hidden_layers": 28,
  "num_key_value_heads": 2,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 10000,
  "sliding_window": null,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.45.2",
  "use_cache": true,
  "use_mrope": false,
  "use_sliding_window": false,
  "vocab_size": 151936
}

In [None]:
for i in range(5):
    inputs = tokenizer.apply_chat_template(
        [
            {"role": "user", "content": "Can penguins fly? Segment the thinking process into clear steps and indicate \"YES\" or \"NO\" once at the end ."},
        ],
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("mps")
    outputs = model.generate(input_ids=inputs, max_new_tokens=1200)
    print(tokenizer.decode(outputs[0]))

RuntimeError: _share_filename_: only available on CPU

In [14]:
def gather_residual_activations(model, target_layer, inputs):
    target_act = None
    def gather_target_act_hook(mod, inputs, outputs):
        nonlocal target_act
        target_act = inputs[0]  # Get residual stream from layer input
        return outputs
        
    handle = model.model.layers[target_layer].register_forward_hook(gather_target_act_hook)
    with torch.no_grad():
        _ = model(inputs)
    handle.remove()
    return target_act

In [15]:
target_act = gather_residual_activations(model, 11, inputs)

In [16]:
inputs

tensor([[151646, 151644,   6713,    281,  55358,  11466,     30,  37103,    279,
           7274,   1882,   1119,   2797,   7354,     13,   2263,   8463,    330,
          14004,      1,    476,    330,   8996,      1,    518,    279,    835,
            315,    279,   7354,     13, 151645, 151648,    198]],
       device='mps:0')

In [17]:
target_act.shape

torch.Size([1, 34, 1536])