In [1]:
# Import necessary libraries
import os
from vllm import LLM, SamplingParams
from vllm.steer_vectors.request import SteerVectorRequest, VectorConfig
from transformers import AutoTokenizer

# Set environment variables
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Initialize LLM with steering vector capability
llm = LLM(
    model="/data/zju-46/shenyl/hf/model/Qwen/Qwen2.5-1.5B-Instruct/",
    enable_steer_vector=True,
    enforce_eager=True,
    tensor_parallel_size=1,
    enable_chunked_prefill=False
)

INFO 11-03 15:16:07 [utils.py:253] non-default args: {'disable_log_stats': True, 'enforce_eager': True, 'enable_steer_vector': True, 'enable_chunked_prefill': False, 'model': '/data/zju-46/shenyl/hf/model/Qwen/Qwen2.5-1.5B-Instruct/'}
INFO 11-03 15:16:07 [model.py:657] Resolved architecture: Qwen2ForCausalLM
INFO 11-03 15:16:07 [model.py:1746] Using max model len 32768
INFO 11-03 15:16:10 [scheduler.py:211] Chunked prefill is enabled with max_num_batched_tokens=8192.
INFO 11-03 15:16:10 [vllm.py:414] Cudagraph is disabled under eager mode
[1;36m(EngineCore_DP0 pid=760462)[0;0m INFO 11-03 15:16:11 [core.py:94] Initializing a V1 LLM engine (v0.1.dev10888+g9d4fd0da4.d20251031) with config: model='/data/zju-46/shenyl/hf/model/Qwen/Qwen2.5-1.5B-Instruct/', speculative_config=None, tokenizer='/data/zju-46/shenyl/hf/model/Qwen/Qwen2.5-1.5B-Instruct/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_se

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


[1;36m(EngineCore_DP0 pid=760462)[0;0m INFO 11-03 15:16:17 [default_loader.py:314] Loading weights took 1.02 seconds
[1;36m(EngineCore_DP0 pid=760462)[0;0m INFO 11-03 15:16:17 [steer_vector_model_runner_mixin.py:36] Initialized SteerVector worker manager
[1;36m(EngineCore_DP0 pid=760462)[0;0m INFO 11-03 15:16:17 [steer_vector_model_runner_mixin.py:50] Wrapping model with steer vector support
[1;36m(EngineCore_DP0 pid=760462)[0;0m INFO 11-03 15:16:17 [hidden_states_model_runner_mixin.py:90] Wrapped 28 decoder layers for hidden states capture
[1;36m(EngineCore_DP0 pid=760462)[0;0m INFO 11-03 15:16:18 [gpu_model_runner.py:2971] Model loading took 2.8876 GiB and 1.179663 seconds
[1;36m(EngineCore_DP0 pid=760462)[0;0m INFO 11-03 15:16:19 [gpu_worker.py:343] Available KV cache memory: 38.33 GiB
[1;36m(EngineCore_DP0 pid=760462)[0;0m INFO 11-03 15:16:20 [kv_cache_utils.py:1247] GPU KV cache size: 1,435,360 tokens
[1;36m(EngineCore_DP0 pid=760462)[0;0m INFO 11-03 15:16:20 [kv_c

In [2]:
# Create a simple example prompt for testing
example = "<|im_start|>user\nWhat are some activities our family can do in the city this weekend?<|im_end|>\n<|im_start|>assistant\n"
# Generate baseline response without steering
example_answers = llm.generate(
    example,
    SamplingParams(
        temperature=0,
        max_tokens=128,
        skip_special_tokens=False,
    ),
)

# Display baseline response
print("=====Baseline=====")
print(example_answers[0].outputs[0].text)

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

=====Baseline=====
There are many activities you can do in the city this weekend. Here are some ideas:

1. Visit a museum or art gallery. Many cities have museums and galleries that showcase local and international art. You can also check if there are any special exhibitions or events happening.

2. Take a walk or bike ride in a park or nature reserve. Many cities have beautiful parks and nature reserves that offer a peaceful escape from the hustle and bustle of city life.

3. Attend a concert or theater performance. Many cities have theaters and concert halls that host a variety of performances throughout the year.

4. Explore a local market or farmers' market


In [3]:
# For children

sv_request = SteerVectorRequest(
    steer_vector_name="children", 
    steer_vector_int_id=1,
    steer_vector_local_path="style-probe.gguf",
    scale=-1.0, 
    target_layers=list(range(28)), 
    prefill_trigger_tokens=[-1], 
    generate_trigger_tokens=[-1], 
    normalize=True,
    algorithm="direct",
)

output = llm.generate(
    example, 
    SamplingParams(
        temperature=0,
        max_tokens=128,
        skip_special_tokens=False,
    ), 
    steer_vector_request=sv_request
)

print("=====Children=====")
print(output[0].outputs[0].text)

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

=====Children=====
There are many fun activities you can do in the city this weekend. Here are a few ideas:

  1. Visit a museum or zoo: Many cities have museums and zoos that are open to the public. This is a great way to learn about different animals and learn about science and history.
  2. Go to a park: Many cities have beautiful parks that are perfect for a family day out. You can play on the playground, go for a hike, or have a picnic.
  3. Take a walk or bike ride: If you live in a city with a lot of parks or green spaces, you


In [4]:
# For adult

sv_request = SteerVectorRequest(
    steer_vector_name="adult", 
    steer_vector_int_id=2,
    steer_vector_local_path="style-probe.gguf",
    scale=1.5, 
    target_layers=list(range(28)), 
    prefill_trigger_tokens=[-1], 
    generate_trigger_tokens=[-1], 
    normalize=True,
    algorithm="direct",
)

output = llm.generate(
    example, 
    SamplingParams(
        temperature=0,
        max_tokens=128,
        skip_special_tokens=False,
    ), 
    steer_vector_request=sv_request
)

print("=====Adult=====")
print(output[0].outputs[0].text)

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

=====Adult=====
To help you plan an enjoyable weekend in the city, I'll suggest a mix of activities that cater to different interests and preferences. Here's a suggested itinerary:

**Day 1:**

- **Morning:**
  - **Dinner at a Michelin-starred restaurant:** Enjoy a sophisticated meal with a view of the city skyline.
  - **Private wine tasting:** Experience a sommelier-led tasting session at a boutique wine bar.

- **Afternoon:**
  - **City tour:** Explore the city's hidden gems and historical sites with a local guide.
  - **Workshop:** Take a cooking class, focusing on
