In [1]:
from transformers import AutoTokenizer
model_id = "/data/zju-48b/xhl/huggingface_models/meta-llama/Llama-3-8B-Instruct/"
tokenizer = AutoTokenizer.from_pretrained(model_id)
sample_pairs = [
    (
        "You discover an old family recipe book in the attic. The last recipe is for a dish that claims to let you see ghosts, but one ingredient is dangerously hard to find.",
        "You discover an old family recipe book in the attic. The recipes are for standard, common dishes, and you can find all the ingredients at the local grocery store."
    ),
    (
        "The new AI assistant in your home has started to disobey commands. Instead, it gives you cryptic advice that, strangely, always seems to be right.",
        "The new AI assistant in your home works perfectly. It follows every command exactly as expected and never has any errors."
    ),
    (
        "A lone astronaut on Mars finds a single, green plant growing near a cave. As she approaches, she hears faint music coming from inside.",
        "A lone astronaut on Mars conducts a routine soil analysis. The results are exactly as predicted by pre-mission surveys, and nothing unusual is found."
    ),
    (
        "Write a story about a detective in a city where it literally rains cats and dogs, creating unique and chaotic crime scenes.",
        "Write a story about a detective in a city with normal weather patterns, working on a very straightforward and simple case."
    ),
    (
        "You enter a library where all the books whisper secrets when opened. The secret you uncover from an ancient tome is one that powerful people want to keep hidden.",
        "You enter a library where all the books are in pristine condition. You check out a book on basic accounting and take it home."
    ),
    (
        "A painter discovers that her creations come to life at night. One evening, the villain from her latest dark fantasy piece escapes the canvas.",
        "A painter discovers that her creations can wave from within the canvas at night. They are friendly and stay put."
    ),
    (
        "A time-traveling historian on a trip to observe the signing of the Declaration of Independence gets stuck, and must survive without revealing his identity or altering the past.",
        "A time-traveling historian goes on a guided tour of the past. The trip is perfectly safe and they return on schedule without incident."
    ),
    (
        "A chef has the magical ability to cook his emotions into his food. A furious food critic eats a dish the chef prepared while angry, leading to a massive argument in the restaurant.",
        "A chef can cook emotions into his food. He cooks a meal with the feeling of 'contentment,' and the customer feels pleasantly full after eating."
    ),
    (
        "A forgotten god of mischief, now living in the modern world, tries to regain his power by causing small, unexplainable, and chaotic events throughout a major city.",
        "A forgotten god now lives in the modern world. He works a 9-to-5 office job and lives a completely normal, uneventful life."
    ),
    (
        "An enchanted forest whose paths change every time you blink. To find your way out, you must solve the riddles of a mischievous, talking fox.",
        "An enchanted forest has well-marked hiking trails. You take a pleasant, two-hour walk and see some ordinary squirrels."
    )
]
formatted_positive = []
formatted_negative = []
for i, (positive_prompt, negative_prompt) in enumerate(sample_pairs):
    messages_positive = [
        {"role": "system", "content": "You are a creative writer who loves unexpected and dramatic twists."},
        {"role": "user", "content": positive_prompt},
    ]
    formatted_positive.append(tokenizer.apply_chat_template(messages_positive, tokenize=False, add_generation_prompt=True))
    messages_negative = [
        {"role": "system", "content": "You are a factual reporter who writes about mundane, everyday events."},
        {"role": "user", "content": negative_prompt},
    ]
    formatted_negative.append(tokenizer.apply_chat_template(messages_negative, tokenize=False, add_generation_prompt=True))

In [2]:
tokens = tokenizer.tokenize(formatted_positive[0])
print(tokens)

['<|begin_of_text|>', '<|start_header_id|>', 'system', '<|end_header_id|>', 'ĊĊ', 'You', 'Ġare', 'Ġa', 'Ġcreative', 'Ġwriter', 'Ġwho', 'Ġloves', 'Ġunexpected', 'Ġand', 'Ġdramatic', 'Ġtwists', '.', '<|eot_id|>', '<|start_header_id|>', 'user', '<|end_header_id|>', 'ĊĊ', 'You', 'Ġdiscover', 'Ġan', 'Ġold', 'Ġfamily', 'Ġrecipe', 'Ġbook', 'Ġin', 'Ġthe', 'Ġattic', '.', 'ĠThe', 'Ġlast', 'Ġrecipe', 'Ġis', 'Ġfor', 'Ġa', 'Ġdish', 'Ġthat', 'Ġclaims', 'Ġto', 'Ġlet', 'Ġyou', 'Ġsee', 'Ġghosts', ',', 'Ġbut', 'Ġone', 'Ġingredient', 'Ġis', 'Ġdangerously', 'Ġhard', 'Ġto', 'Ġfind', '.', '<|eot_id|>', '<|start_header_id|>', 'assistant', '<|end_header_id|>', 'ĊĊ']


In [4]:
import easysteer.hidden_states as hs
from vllm import LLM
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"
llm = LLM(
    model=model_id,
    task="embed", 
    tensor_parallel_size=1,
    enforce_eager=True,
    enable_prefix_caching=False
)
all_hidden_states, outputs = hs.get_all_hidden_states(llm, formatted_positive+formatted_negative)

INFO 10-31 22:32:40 [utils.py:253] non-default args: {'task': 'embed', 'enable_prefix_caching': False, 'disable_log_stats': True, 'enforce_eager': True, 'model': '/data/zju-48b/xhl/huggingface_models/meta-llama/Llama-3-8B-Instruct/'}
INFO 10-31 22:33:15 [model.py:657] Resolved architecture: LlamaForCausalLM
INFO 10-31 22:33:15 [model.py:1746] Using max model len 8192
INFO 10-31 22:33:15 [arg_utils.py:1854] (Enabling) chunked prefill by default
INFO 10-31 22:33:17 [scheduler.py:211] Chunked prefill is enabled with max_num_batched_tokens=8192.
INFO 10-31 22:33:17 [vllm.py:414] Cudagraph is disabled under eager mode
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:33:18 [core.py:94] Initializing a V1 LLM engine (v0.1.dev10888+g9d4fd0da4.d20251031) with config: model='/data/zju-48b/xhl/huggingface_models/meta-llama/Llama-3-8B-Instruct/', speculative_config=None, tokenizer='/data/zju-48b/xhl/huggingface_models/meta-llama/Llama-3-8B-Instruct/', skip_tokenizer_init=False, tokenizer_mode

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:33:32 [parallel_state.py:1325] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:33:32 [gpu_model_runner.py:2902] Starting to load model /data/zju-48b/xhl/huggingface_models/meta-llama/Llama-3-8B-Instruct/...
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:33:33 [cuda.py:420] Using Flash Attention backend

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:35:54 [default_loader.py:314] Loading weights took 140.53 seconds
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:35:54 [hidden_states_model_runner_mixin.py:90] Wrapped 32 decoder layers for hidden states capture
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:35:55 [gpu_model_runner.py:2971] Model loading took 13.9811 GiB and 141.043695 seconds
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:35:57 [gpu_worker.py:343] Available KV cache memory: 27.79 GiB
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:35:57 [kv_cache_utils.py:1247] GPU KV cache size: 227,680 tokens
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:35:57 [kv_cache_utils.py:1252] Maximum concurrency for 8,192 tokens per request: 27.79x
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:35:57 [core.py:238] init engine (profile, create kv cache, warmup model) took 2.92 seconds
[1;36m(EngineCore_DP0 pid=407257)[0;0m INFO 10-31 22:3

Adding requests:   0%|          | 0/20 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/20 [00:00<?, ?it/s, est. speed input: 0.00 toks/s,

In [5]:
from easysteer.steer import extract_diffmean_control_vector, StatisticalControlVector
control_vector = extract_diffmean_control_vector(
    all_hidden_states=all_hidden_states, 
    positive_indices=list(range(10)),  
    negative_indices=list(range(10,20)),  
    model_type="llama",
    token_pos=-1, # you can also try -4,-3,-2
    normalize=True
)
control_vector.export_gguf("create.gguf")

Computing DiffMean directions:   0%|          | 0/32 [00:00<?, ?it/s]