In [1]:
# Import necessary libraries
import os
from vllm import LLM, SamplingParams
from vllm.steer_vectors.request import SteerVectorRequest, VectorConfig
from transformers import AutoTokenizer

# Set environment variables
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    "/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/"
)

# Initialize LLM with steering vector capability
llm = LLM(
    model="/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/",
    enable_steer_vector=True,
    enforce_eager=True,
    tensor_parallel_size=1,
    enable_chunked_prefill=False
)

INFO 11-03 14:51:02 [utils.py:253] non-default args: {'disable_log_stats': True, 'enforce_eager': True, 'enable_steer_vector': True, 'enable_chunked_prefill': False, 'model': '/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/'}
INFO 11-03 14:51:02 [model.py:657] Resolved architecture: Qwen2ForCausalLM
INFO 11-03 14:51:02 [model.py:1746] Using max model len 131072
INFO 11-03 14:51:05 [scheduler.py:211] Chunked prefill is enabled with max_num_batched_tokens=8192.
INFO 11-03 14:51:05 [vllm.py:414] Cudagraph is disabled under eager mode
[1;36m(EngineCore_DP0 pid=747188)[0;0m INFO 11-03 14:51:05 [core.py:94] Initializing a V1 LLM engine (v0.1.dev10888+g9d4fd0da4.d20251031) with config: model='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', speculative_config=None, tokenizer='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_r

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


[1;36m(EngineCore_DP0 pid=747188)[0;0m INFO 11-03 14:51:13 [default_loader.py:314] Loading weights took 1.13 seconds
[1;36m(EngineCore_DP0 pid=747188)[0;0m INFO 11-03 14:51:13 [steer_vector_model_runner_mixin.py:36] Initialized SteerVector worker manager
[1;36m(EngineCore_DP0 pid=747188)[0;0m INFO 11-03 14:51:13 [steer_vector_model_runner_mixin.py:50] Wrapping model with steer vector support
[1;36m(EngineCore_DP0 pid=747188)[0;0m INFO 11-03 14:51:13 [hidden_states_model_runner_mixin.py:90] Wrapped 28 decoder layers for hidden states capture
[1;36m(EngineCore_DP0 pid=747188)[0;0m INFO 11-03 14:51:14 [gpu_model_runner.py:2971] Model loading took 3.3466 GiB and 1.303604 seconds
[1;36m(EngineCore_DP0 pid=747188)[0;0m INFO 11-03 14:51:15 [gpu_worker.py:343] Available KV cache memory: 37.87 GiB
[1;36m(EngineCore_DP0 pid=747188)[0;0m INFO 11-03 14:51:16 [kv_cache_utils.py:1247] GPU KV cache size: 1,418,176 tokens
[1;36m(EngineCore_DP0 pid=747188)[0;0m INFO 11-03 14:51:16 [kv_c

In [2]:
# Create a simple example prompt for testing
example = "Please reason step by step, and put your final answer within \\boxed{}.\nUser: " + "What does two plus three equal?" + "\nAssistant: <think>"
# Generate baseline response without steering
example_answers = llm.generate(
    example,
    SamplingParams(
        temperature=0,
        max_tokens=4096,
        skip_special_tokens=False,
    ),
)

# Display baseline response
print("=====Baseline=====")
print(example_answers[0].outputs[0].text)
print("Length: ", len(tokenizer.tokenize(example_answers[0].outputs[0].text, add_special_tokens=True)))

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

=====Baseline=====

Okay, so I need to figure out what two plus three equals. Hmm, let me think about this. I remember from school that when you add numbers together, you're basically combining quantities. So, if I have two apples and someone gives me three more apples, how many apples do I have in total?

Wait, let me make sure I'm doing this right. Two plus three... I think addition is commutative, which means the order doesn't matter. So, two plus three is the same as three plus two. But in this case, I just need to add them together. 

Let me visualize it. If I have two objects, like two blocks, and then I add three more blocks, how many blocks do I have? I can count them one by one. Starting with two, then adding one makes three, then adding another makes four, and then adding the last one makes five. So, that should be five blocks in total.

But wait, maybe I should use my fingers to count to double-check. If I hold up two fingers and then add three more, how many do I have? Two,

In [3]:
# Define the suffix for newline tokens in the tokenizer
target_suffix = "ĊĊ"  # "\n\n" is tokenized as "ĊĊ"

# Get complete tokenizer vocabulary
vocab = tokenizer.get_vocab()

# Find all tokens and their IDs that end with the target suffix
# These are the newline tokens we'll apply steering to
matching_tokens_ids = [
    token_id
    for token, token_id in vocab.items()
    if isinstance(token, str) and token.endswith(target_suffix)
]

# Configure steering vector request for SEAL control
sv_request = SteerVectorRequest(
    # Name and ID for the steering vector
    steer_vector_name="complex_control",
    steer_vector_int_id=1,
    
    # Configure the three steering vectors (execution, reflection, transition)
    vector_configs=[
        # Execution vector (positive scale to promote execution-like text)
        VectorConfig(
            path="execution_avg_vector.gguf",
            scale=1.0,                            # Positive scale promotes this behavior
            target_layers=[20],                   # Apply at layer 20
            generate_trigger_tokens=matching_tokens_ids,  # Apply to newline tokens
            algorithm="direct",                   # Direct application
            normalize=False                       # Do not normalize vectors
        ),
        
        # Reflection vector (negative scale to suppress reflection)
        VectorConfig(
            path="reflection_avg_vector.gguf",
            scale=-1.0,                           # Negative scale suppresses this behavior
            target_layers=[20],
            generate_trigger_tokens=matching_tokens_ids,
            algorithm="direct",
            normalize=False
        ),
        
        # Transition vector (negative scale to suppress transitions)
        VectorConfig(
            path="transition_avg_vector.gguf",
            scale=-1.0,                           # Negative scale suppresses this behavior
            target_layers=[20],
            generate_trigger_tokens=matching_tokens_ids,
            algorithm="direct", 
            normalize=False
        ),
    ],
    
    # Additional parameters
    debug=False,                        # Don't output debug info
    conflict_resolution="sequential"    # Apply vectors in sequence
)
# Generate response with SEAL steering
output = llm.generate(
    example, 
    SamplingParams(
        temperature=0,
        max_tokens=4096,
        skip_special_tokens=False,
    ), 
    steer_vector_request=sv_request
)

# Display SEAL-steered response
print("=====Seal=====")
print(output[0].outputs[0].text)
print("Seal tokens: ", len(tokenizer.tokenize(output[0].outputs[0].text, add_special_tokens=True)))

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

=====Seal=====

Okay, so I need to figure out what two plus three equals. Hmm, let me think about this. I remember from school that when you add numbers together, you're basically combining quantities. So, if I have two apples and someone gives me three more apples, how many apples do I have in total?

 I can visualize this. If I draw two circles representing the two apples and then draw three more circles for the three apples, I can count them all together. Let me count them: one, two, three, four, five. Wait, that doesn't seem right. Did I count correctly?

 I think I might have made a mistake. Let me recount. Starting from the first circle, that's one. Then the second circle is two. Now, adding the three apples: the third circle is three, the fourth is four, and the fifth is five. Hmm, that still gives me five. Maybe I was too quick to count.

  Alternatively, I can use my fingers to count. If I hold up two fingers and then add three more, how many do I have? Two on the first hand, 