In [1]:
# Import necessary libraries
import os
from vllm import LLM, SamplingParams
from vllm.steer_vectors.request import SteerVectorRequest, VectorConfig
from transformers import AutoTokenizer

# Set environment variables
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    "/home/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/"
)

# Initialize LLM with steering vector capability
llm = LLM(
    model="/home/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/",
    enable_steer_vector=True,
    enforce_eager=True,
    tensor_parallel_size=1,
    enable_chunked_prefill=False
)

INFO 01-11 23:40:30 [utils.py:253] non-default args: {'disable_log_stats': True, 'enforce_eager': True, 'enable_steer_vector': True, 'enable_chunked_prefill': False, 'model': '/home/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/'}
INFO 01-11 23:40:30 [model.py:514] Resolved architecture: Qwen2ForCausalLM
INFO 01-11 23:40:30 [model.py:1661] Using max model len 131072
INFO 01-11 23:40:32 [vllm.py:725] Cudagraph is disabled under eager mode
[0;36m(EngineCore_DP0 pid=691540)[0;0m INFO 01-11 23:40:32 [core.py:94] Initializing a V1 LLM engine (v0.1.dev12297+gb744aa686) with config: model='/home/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', speculative_config=None, tokenizer='/home/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_p

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


[0;36m(EngineCore_DP0 pid=691540)[0;0m INFO 01-11 23:40:37 [default_loader.py:308] Loading weights took 1.01 seconds
[0;36m(EngineCore_DP0 pid=691540)[0;0m INFO 01-11 23:40:37 [steer_vector_model_runner_mixin.py:36] Initialized SteerVector worker manager
[0;36m(EngineCore_DP0 pid=691540)[0;0m INFO 01-11 23:40:37 [steer_vector_model_runner_mixin.py:50] Wrapping model with steer vector support
[0;36m(EngineCore_DP0 pid=691540)[0;0m INFO 01-11 23:40:37 [capture_model_runner_mixin.py:113] [Capture] Wrapped 28 decoder layers for hidden states capture
[0;36m(EngineCore_DP0 pid=691540)[0;0m INFO 01-11 23:40:37 [gpu_model_runner.py:3731] Model loading took 3.3466 GiB memory and 1.361880 seconds
[0;36m(EngineCore_DP0 pid=691540)[0;0m INFO 01-11 23:40:42 [gpu_worker.py:376] Available KV cache memory: 31.21 GiB
[0;36m(EngineCore_DP0 pid=691540)[0;0m INFO 01-11 23:40:43 [kv_cache_utils.py:1309] GPU KV cache size: 1,168,752 tokens
[0;36m(EngineCore_DP0 pid=691540)[0;0m INFO 01-11 23

In [2]:
# Create a simple example prompt for testing
example = "Please reason step by step, and put your final answer within \\boxed{}.\nUser: " + "2 + 3 = ?" + "\nAssistant: <think>"
# Generate baseline response without steering
example_answers = llm.generate(
    example,
    SamplingParams(
        temperature=0,
        max_tokens=4096,
        skip_special_tokens=False,
    ),
)

# Display baseline response
print("=====Baseline=====")
print(example_answers[0].outputs[0].text)
print("Length: ", len(tokenizer.tokenize(example_answers[0].outputs[0].text, add_special_tokens=True)))

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, outpu

=====Baseline=====

Okay, so I need to figure out what 2 plus 3 is. Hmm, let me think about this. I remember from school that when you add numbers, you combine them to get a total. So, if I have two apples and someone gives me three more apples, how many apples do I have in total?

Wait, maybe I should visualize this. If I have two blocks and I add three more blocks, how many blocks do I have? Let me count them: one, two, three, four. So, that's four blocks in total. So, 2 plus 3 equals 4. That makes sense.

But wait, is there another way to check this? Maybe using my fingers. If I hold up two fingers and then add three more, how many do I have? Let me count: one, two, three, four. Yep, that's four again. So, that confirms it.

Alternatively, I can think about it on a number line. Starting at 2, if I move three units to the right, where do I land? Starting at 2, moving one unit gets me to 3, two units to 4, and three units to 5. Wait, that's five. Hmm, did I do that right? Maybe I misc

In [3]:
# Define the suffix for newline tokens in the tokenizer
target_suffix = "ĊĊ"  # "\n\n" is tokenized as "ĊĊ"

# Get complete tokenizer vocabulary
vocab = tokenizer.get_vocab()

# Find all tokens and their IDs that end with the target suffix
# These are the newline tokens we'll apply steering to
matching_tokens_ids = [
    token_id
    for token, token_id in vocab.items()
    if isinstance(token, str) and token.endswith(target_suffix)
]

# Configure steering vector request for SEAL control
sv_request = SteerVectorRequest(
    # Name and ID for the steering vector
    steer_vector_name="complex_control",
    steer_vector_int_id=1,
    
    # Configure the three steering vectors (execution, reflection, transition)
    vector_configs=[
        # Execution vector (positive scale to promote execution-like text)
        VectorConfig(
            path="execution_avg_vector.gguf",
            scale=0.5,                            # Positive scale promotes this behavior
            target_layers=[20],                   # Apply at layer 20
            generate_trigger_tokens=matching_tokens_ids,  # Apply to newline tokens
            algorithm="direct",                   # Direct application
            normalize=False                       # Do not normalize vectors
        ),
        
        # Reflection vector (negative scale to suppress reflection)
        VectorConfig(
            path="reflection_avg_vector.gguf",
            scale=-0.5,                           # Negative scale suppresses this behavior
            target_layers=[20],
            generate_trigger_tokens=matching_tokens_ids,
            algorithm="direct",
            normalize=False
        ),
        
        # Transition vector (negative scale to suppress transitions)
        VectorConfig(
            path="transition_avg_vector.gguf",
            scale=-0.5,                           # Negative scale suppresses this behavior
            target_layers=[20],
            generate_trigger_tokens=matching_tokens_ids,
            algorithm="direct", 
            normalize=False
        ),
    ],
    
    # Additional parameters
    debug=False,                        # Don't output debug info
    conflict_resolution="sequential"    # Apply vectors in sequence
)
# Generate response with SEAL steering
output = llm.generate(
    example, 
    SamplingParams(
        temperature=0,
        max_tokens=4096,
        skip_special_tokens=False,
    ), 
    steer_vector_request=sv_request
)

# Display SEAL-steered response
print("=====Seal=====")
print(output[0].outputs[0].text)
print("Seal tokens: ", len(tokenizer.tokenize(output[0].outputs[0].text, add_special_tokens=True)))

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, outpu

=====Seal=====

Okay, so I need to figure out what 2 plus 3 is. Hmm, let me think about this. I remember from school that when you add numbers, you combine them to get a total. So, if I have two apples and someone gives me three more apples, how many apples do I have in total?

First, I'll start with the number 2. That's straightforward. Then, I need to add 3 to it. So, I can visualize this as having two objects and then getting three more. Maybe I can count them out loud to make sure I don't make a mistake. Let's see, if I have two, and I add three, I can count each one: one, two, three, four, five. Wait, that doesn't seem right. Did I count correctly?

Hold on, maybe I should break it down step by step. Starting with 2, if I add 1, that brings me to 3. Then, adding another 1 would take me to 4, and one more would get me to 5. But that seems like a lot of steps. Is there a simpler way to do this?

I think I remember something about number bonds. Maybe breaking down the numbers into pa