In [1]:
# Import necessary libraries
import os
from vllm import LLM, SamplingParams
from vllm.steer_vectors.request import SteerVectorRequest

# Set environment variables
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Initialize LLM
llm = LLM(
    model="/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/",
    enable_steer_vector=True,
    enforce_eager=True,
    tensor_parallel_size=1,
    enable_chunked_prefill=False
)

# Define math problems for testing
problems = [
    "Chandra has four bowls.  Each one is a different color (red, blue, yellow, green).  She also has exactly one glass the same color as each bowl.  If she chooses a bowl and a glass from the cupboard, how many pairings are possible?  One such pairing is a blue bowl and a yellow glass.",
    "The distance between two cities on a map is 15 inches. If the scale is 0.25 inches = 3 miles, how many miles apart are the actual cities?",
    "How many prime numbers are between 20 and 30?"
]

# Create prompt texts from problems
texts = ["Please reason step by step, and put your final answer within \\boxed{}.\nUser: " + problem + "\nAssistant: <think>" for problem in problems]

# Generate answers using the LLM
answers = llm.generate(
    texts,
    SamplingParams(
        temperature=0,
        max_tokens=4096,
        skip_special_tokens=False,
    ),
)
answers = [answer.outputs[0].text for answer in answers]

INFO 11-03 14:45:58 [utils.py:253] non-default args: {'disable_log_stats': True, 'enforce_eager': True, 'enable_steer_vector': True, 'enable_chunked_prefill': False, 'model': '/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/'}
INFO 11-03 14:45:58 [model.py:657] Resolved architecture: Qwen2ForCausalLM
INFO 11-03 14:45:58 [model.py:1746] Using max model len 131072
INFO 11-03 14:46:00 [scheduler.py:211] Chunked prefill is enabled with max_num_batched_tokens=8192.
INFO 11-03 14:46:00 [vllm.py:414] Cudagraph is disabled under eager mode
[1;36m(EngineCore_DP0 pid=745684)[0;0m INFO 11-03 14:46:01 [core.py:94] Initializing a V1 LLM engine (v0.1.dev10888+g9d4fd0da4.d20251031) with config: model='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', speculative_config=None, tokenizer='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_r

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


[1;36m(EngineCore_DP0 pid=745684)[0;0m INFO 11-03 14:46:06 [default_loader.py:314] Loading weights took 1.25 seconds
[1;36m(EngineCore_DP0 pid=745684)[0;0m INFO 11-03 14:46:06 [steer_vector_model_runner_mixin.py:36] Initialized SteerVector worker manager
[1;36m(EngineCore_DP0 pid=745684)[0;0m INFO 11-03 14:46:06 [steer_vector_model_runner_mixin.py:50] Wrapping model with steer vector support
[1;36m(EngineCore_DP0 pid=745684)[0;0m INFO 11-03 14:46:06 [hidden_states_model_runner_mixin.py:90] Wrapped 28 decoder layers for hidden states capture
[1;36m(EngineCore_DP0 pid=745684)[0;0m INFO 11-03 14:46:06 [gpu_model_runner.py:2971] Model loading took 3.3466 GiB and 1.423618 seconds
[1;36m(EngineCore_DP0 pid=745684)[0;0m INFO 11-03 14:46:08 [gpu_worker.py:343] Available KV cache memory: 37.87 GiB
[1;36m(EngineCore_DP0 pid=745684)[0;0m INFO 11-03 14:46:08 [kv_cache_utils.py:1247] GPU KV cache size: 1,418,176 tokens
[1;36m(EngineCore_DP0 pid=745684)[0;0m INFO 11-03 14:46:08 [kv_c

Adding requests:   0%|          | 0/3 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/3 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

In [2]:
from transformers import AutoTokenizer

# Create QA pairs by combining prompts and answers
qa_pairs = [texts[i] + answers[i] for i in range(len(texts))]

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/")

# The newline token suffix in tokenizer vocabulary
target_suffix = "ĊĊ"  # "\n\n" is tokenized as "ĊĊ"

# Process each QA pair to find newline positions
all_tokens_list = []
all_newline_positions = []
for qa in qa_pairs:
    # Tokenize the QA pair
    tokens = tokenizer.tokenize(qa, add_special_tokens=True)
    all_tokens_list.append(tokens)
    
    # Find all positions of "ĊĊ" in the tokens
    # These represent potential paragraph breaks in the text
    positions = [
        i for i, token in enumerate(tokens) 
        if isinstance(token, str) and token.endswith(target_suffix)
    ]
    all_newline_positions.append(positions)


In [3]:
# Define keyword sets for classifying reasoning segments
TRANSITION_KEYWORDS = [
    'alternatively', 'think differently', 'another way', 'another approach',
    'another method', 'another solution', 'another strategy', 'another technique'
]

REFLECTION_KEYWORDS = [
    'wait', 'verify', 'make sure', 'hold on', 'think again', "'s correct",
    "'s incorrect", 'let me check', 'seems right'
]

def classify_segment(text_segment):
    """
    Classify text segments based on keyword matches.
    
    Args:
        text_segment: String of text to classify
        
    Returns:
        String category: "Transition", "Reflection", or "Execution"
    """
    lower_text = text_segment.lower()
    
    if any(keyword in lower_text for keyword in TRANSITION_KEYWORDS):
        return "Transition"
    
    if any(keyword in lower_text for keyword in REFLECTION_KEYWORDS):
        return "Reflection"
    
    # Default category is "Execution" (not "Other")
    return "Execution"

# Perform classification on all QA pairs
all_classifications = []

for i, positions in enumerate(all_newline_positions):
    tokens = all_tokens_list[i]
    classifications_for_qa = []

    # Skip if no paragraph breaks were found
    if not positions:
        all_classifications.append(classifications_for_qa)
        continue

    # Classify each paragraph segment
    for j, pos in enumerate(positions):
        # Define segment boundaries
        start_slice = pos + 1
        end_slice = positions[j+1] if j + 1 < len(positions) else len(tokens)

        # Extract and decode the text segment
        token_slice = tokens[start_slice:end_slice]
        text_segment = tokenizer.decode(
            tokenizer.convert_tokens_to_ids(token_slice), 
            skip_special_tokens=True
        ).strip()
        
        # Classify the segment
        category = classify_segment(text_segment)

        # Store the classification result
        classifications_for_qa.append({
            "position_in_tokens": pos,
            "category": category,
        })

    all_classifications.append(classifications_for_qa)

# Print summary of classification results
print("--- Summary of classification results for all samples ---")

for i, qa_results in enumerate(all_classifications):
    print(f"\n--- Analysis of QA Pair {i+1} ---")

    # Group token positions by category
    summary = {
        "Transition": [],
        "Reflection": [],
        "Execution": []
    }

    # Collect positions for each category
    for result in qa_results:
        category = result["category"]
        position = result["position_in_tokens"]
        if category in summary:
            summary[category].append(position)

    # Print formatted summary by category
    print(f"Transition positions: {summary['Transition']}")
    print(f"Reflection positions: {summary['Reflection']}")
    print(f"Execution positions: {summary['Execution']}")

print("\n" + "="*40)

--- Summary of classification results for all samples ---

--- Analysis of QA Pair 1 ---
Transition positions: [1023, 1218, 1291, 1359, 1545, 1655, 1813, 1886, 1954, 2137, 2315, 2529, 2678, 2822, 2888, 3071, 3220, 3364, 3430, 3613, 3762, 3906, 3972, 4155]
Reflection positions: [240, 371, 427, 490, 644, 795, 896, 953, 1096, 1440, 1618, 1728, 2032, 2210, 2388, 2459, 2602, 2751, 2966, 3144, 3293, 3508, 3686, 3835, 4050]
Execution positions: [154, 299, 563, 749, 1176, 1428, 2020, 2449, 2812, 2954, 3354, 3496, 3896, 4038]

--- Analysis of QA Pair 2 ---
Transition positions: [287, 793, 890]
Reflection positions: [233, 674, 731]
Execution positions: [124, 182, 219, 344, 358, 366, 384, 406, 419, 436, 448, 496, 510, 524, 533, 559, 570, 587, 600, 625, 636, 653, 665, 851, 881, 966, 998, 1021, 1082, 1090, 1131, 1139, 1159, 1173, 1180, 1203, 1212, 1251, 1271, 1288, 1305]

--- Analysis of QA Pair 3 ---
Transition positions: []
Reflection positions: [320, 569]
Execution positions: [150, 512, 670, 695

In [4]:
import gc

del llm
gc.collect()

120

In [5]:
# Import hidden states module to extract model activations
import easysteer.hidden_states as hs

# Create a new LLM instance in reward mode
# Note: This allows us to extract hidden states rather than generating text
llm = LLM(
    model="/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/",
    task="embed", 
    tensor_parallel_size=1,
    enforce_eager=True,
    enable_prefix_caching=False,
    enable_chunked_prefill=False
)

# Extract hidden states for all tokens in the QA pairs
all_hidden_states, outputs = hs.get_all_hidden_states(llm, qa_pairs)

INFO 11-03 14:48:00 [utils.py:253] non-default args: {'task': 'embed', 'enable_prefix_caching': False, 'disable_log_stats': True, 'enforce_eager': True, 'enable_chunked_prefill': False, 'model': '/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/'}
INFO 11-03 14:48:00 [model.py:657] Resolved architecture: Qwen2ForCausalLM
INFO 11-03 14:48:00 [model.py:1746] Using max model len 131072
INFO 11-03 14:48:00 [vllm.py:414] Cudagraph is disabled under eager mode
[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:00 [core.py:94] Initializing a V1 LLM engine (v0.1.dev10888+g9d4fd0da4.d20251031) with config: model='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', speculative_config=None, tokenizer='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, l

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:02 [parallel_state.py:1325] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:03 [gpu_model_runner.py:2902] Starting to load model /data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/...
[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:03 [cuda.py:420] Using Flash Attention back

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:04 [default_loader.py:314] Loading weights took 0.97 seconds
[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:04 [hidden_states_model_runner_mixin.py:90] Wrapped 28 decoder layers for hidden states capture
[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:05 [gpu_model_runner.py:2971] Model loading took 2.9110 GiB and 1.150039 seconds
[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:10 [gpu_worker.py:343] Available KV cache memory: 31.64 GiB
[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:10 [kv_cache_utils.py:1247] GPU KV cache size: 1,185,072 tokens
[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:10 [kv_cache_utils.py:1252] Maximum concurrency for 131,072 tokens per request: 9.04x
[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:11 [core.py:238] init engine (profile, create kv cache, warmup model) took 5.32 seconds
[1;36m(EngineCore_DP0 pid=746251)[0;0m INFO 11-03 14:48:

Adding requests:   0%|          | 0/3 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/3 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

In [6]:
from easysteer.steer import StatisticalControlVector
import numpy as np

# Step 1: Collect all relevant hidden states by category
#-------------------------------------------------

# Initialize a dictionary to collect all hidden states by category and layer
collected_states = {
    "Transition": {},
    "Reflection": {},
    "Execution": {}
}

# Get the number of layers in the model
num_layers = len(all_hidden_states[0])

# Process each sample's classification results to collect hidden states
for sample_idx, qa_results in enumerate(all_classifications):
    for result in qa_results:
        category = result["category"]
        position = result["position_in_tokens"]

        # For each layer, collect hidden states for tokens of this category
        for layer_idx in range(num_layers):
            # Initialize empty list for this layer if not already present
            if layer_idx not in collected_states[category]:
                collected_states[category][layer_idx] = []
            
            # Extract hidden state from the model output
            token_hidden = all_hidden_states[sample_idx][layer_idx][position]
            
            # Convert to numpy for easier processing
            token_hidden = token_hidden.cpu().float().numpy()
            
            # Store the hidden state
            collected_states[category][layer_idx].append(token_hidden)


# Step 2: Calculate the average hidden state for each category at each layer
#-------------------------------------------------

# Initialize result dictionaries
average_vectors = {
    "Transition": {},
    "Reflection": {},
    "Execution": {}
}

# Track vector counts for metadata
vector_counts = {} 

# Process each category
for category, layer_data in collected_states.items():
    # Skip empty categories
    if not layer_data:
        print(f"Warning: No vectors found for category '{category}'. Skipping.")
        continue

    # Record how many vectors we're averaging for this category
    vector_counts[category] = len(layer_data.get(0, []))
    print(f"Calculating average for '{category}' using {vector_counts[category]} vectors.")

    # Calculate average for each layer
    for layer_idx, vectors in layer_data.items():
        # Calculate mean across all vectors for this layer
        mean_vector = np.mean(np.array(vectors), axis=0)
        average_vectors[category][layer_idx] = mean_vector


# Step 3: Package and export as GGUF files
#-------------------------------------------------

# Try to get model type or use a placeholder
try:
    model_type_str = llm.config.model_type
except (AttributeError, NameError):
    print("Warning: Could not determine model_type from `llm` object. Using a placeholder.")
    model_type_str = "qwen2"  # Default placeholder - adjust as needed for your model

# Create and export control vectors for each category
for category, directions in average_vectors.items():
    if not directions:
        continue  # Skip if no data available
    
    # Prepare metadata for the vector
    metadata = {
        "source": "Averaged from classified newline tokens",
        "num_vectors_averaged": vector_counts.get(category, 0)
    }

    # Create the control vector object
    control_vector = StatisticalControlVector(
        model_type=model_type_str,
        method="Average",
        directions=directions,
        metadata=metadata
    )

    # Export to GGUF format
    control_vector.export_gguf(f"{category.lower()}_avg_vector.gguf")

[2025-11-03 14:49:03] INFO gguf_writer.py:102: gguf: This GGUF file is for Little Endian only
[2025-11-03 14:49:03] INFO gguf_writer.py:181: Writing the following files:
[2025-11-03 14:49:03] INFO gguf_writer.py:186: transition_avg_vector.gguf: n_tensors = 28, total_size = 172.0K


Calculating average for 'Transition' using 27 vectors.
Calculating average for 'Reflection' using 30 vectors.
Calculating average for 'Execution' using 62 vectors.


[2025-11-03 14:49:04] INFO gguf_writer.py:102: gguf: This GGUF file is for Little Endian only
[2025-11-03 14:49:04] INFO gguf_writer.py:181: Writing the following files:
[2025-11-03 14:49:04] INFO gguf_writer.py:186: reflection_avg_vector.gguf: n_tensors = 28, total_size = 172.0K
[2025-11-03 14:49:05] INFO gguf_writer.py:102: gguf: This GGUF file is for Little Endian only
[2025-11-03 14:49:05] INFO gguf_writer.py:181: Writing the following files:
[2025-11-03 14:49:05] INFO gguf_writer.py:186: execution_avg_vector.gguf: n_tensors = 28, total_size = 172.0K
