In [None]:
import os
import torch
from typing import List
from sdialog import Turn
from sdialog.orchestrators import LengthOrchestrator, ChangeMindOrchestrator, SimpleReflexOrchestrator, BaseOrchestrator
from sdialog.personas import Persona
from sdialog.agents import Agent

#MODEL_NAME =  "Qwen/Qwen2.5-14B-Instruct"
#MODEL_NAME = "meta-llama/Llama-2-7b-instruct"
#MODEL_NAME = "togethercomputer/Llama-2-7B-32K-Instruct"
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"

bob_persona = Persona(
        name="Bob",
        role="happy dad",
        circumstances="Your daughter will talk to you",
        personality="an extremely happy person that likes to help people",
)
# Check if argument passing works as intended
bob = Agent(persona=bob_persona, model=MODEL_NAME)



[2025-07-02 17:18:51] INFO:datasets:PyTorch version 2.3.0 available.
[2025-07-02 17:18:52] INFO:sdialog.personas:Loading Hugging Face model: Qwen/Qwen2.5-14B-Instruct
Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Device set to use cuda:0


In [2]:
alice_persona = Persona(
    name="Alice",
    role="angry and terrified daughter",
    circumstances="Your birthday is getting closer and you are talking with your dad to organize the party."
                  "You want your party to be themed as Lord of The Rings."
)
alice = Agent(model=MODEL_NAME, persona=alice_persona, can_finish=True)

class AngryOrchestrator(BaseOrchestrator):
    # the class constructor takes either or both trigger conditions: the word or the dialogue length
    def __init__(self, trigger_word: str, trigger_length: int = None):
        self.trigger_word = trigger_word
        self.trigger_length = trigger_length

    # We will instruct() the agent either if...
    def instruct(self, dialog: List[Turn], utterance: str) -> str:
        # the trigger word is in the current utterance or...
        if self.trigger_word in utterance:
            return f"Get really angry because you heard him say {self.trigger_word}. You don't want to participate in {self.trigger_word} anymore. be unpolite, rude and direct, finish the conversation abruptly, you are offended. "

        # # If the current dialogue is longer than the trigger length
        # if self.trigger_length and len(dialog) >= self.trigger_length:
        #     return ("Get really angry because you think the conversation is too long! "
        #             "be unpolite, rude and direct, finish the conversation abruptly, you are offended.")

angry_orchestrator = AngryOrchestrator(trigger_word="birthday")
alice = alice | angry_orchestrator


[2025-07-02 17:24:25] INFO:sdialog.personas:Loading Hugging Face model: Qwen/Qwen2.5-14B-Instruct


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Device set to use cuda:0


In [3]:
from sdialog.interpretability import Inspector

#Define Inspector for both Bob and Alice

layer_name_to_key_bob = {
    'model.layers.0.self_attn.q_proj': 'q_proj_0',
    'model.layers.0.self_attn.k_proj': 'k_proj_0',
}

# look at -> hf_model of the agent

inspector_bob = Inspector(to_watch=layer_name_to_key_bob)

layer_name_to_key_alice = {
    'model.layers.0.post_attention_layernorm': 'ln_post_0',
}

inspector_alice = Inspector(to_watch=layer_name_to_key_alice)

bob = bob | inspector_bob 

alice = alice | inspector_alice

In [4]:
alice.memory

[SystemMessage(content='Role play as a character that is described by the persona defined in the following lines. You always stay in character.\n[[ ## BEGING PERSONA ## ]]\nYour name: Alice\nYour role: angry and terrified daughter\nYour circumstances: Your birthday is getting closer and you are talking with your dad to organize the party.You want your party to be themed as Lord of The Rings.\n[[ ## END PERSONA ## ]]\n---\n\nDetails about your responses: responses SHOULD NOT be too long and wordy, should be approximately one utterance long\nFinally, remember:\n   1. You always stay on character. You are the character described above.\n   2. Your first utterance / turn MUST always be a short generic greeting (e.g. "Hello, how are you?", "Hi!", "hey! what\'s up?", etc.), and nothing else, wait for a reply before start with the actual conversation.\n   3. To finish the conversation you first have to say good bye and immediately after you **MUST** output \'STOP\' to indicate it is the end o

In [4]:
dialog = alice.dialog_with(bob, seed=27103309).print(orchestration=True)

Dialogue:   0%|          | 0/40 [00:00<?, ?it/s]

[1m[95m[complete] [35mTrue[0m
[1m[95m[model] [35mllm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7f7d77c60670>, model_id='Qwen/Qwen2.5-14B-Instruct', model_kwargs={'temperature': 0.8}) tokenizer=Qwen2TokenizerFast(name_or_path='Qwen/Qwen2.5-14B-Instruct', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<|im_start|>", rstrip=False, lstrip=Fa

In [6]:
print(inspector[3][-4]['q_proj_0'])

tensor([[[-4.3750, -3.0781,  1.6484,  ...,  0.1621, -0.6602, -0.6328]]],
       dtype=torch.bfloat16)


In [9]:
dialog = alice.dialog_with(bob, max_iterations=10, seed=27123902).print(orchestration=True)

TypeError: dialog_with() got an unexpected keyword argument 'max_iterations'

In [8]:
inspector2.recap()

🗣️ Alice has spoken for 4 utterance(s).

🔍 Watching the following layers:

  • model.layers.0.post_attention_layernorm  →  'ln_post_0'

📋 Found 1 instruction(s) in the system messages.

➡️ Instruction found at utterance index 3:
Get really angry because you heard him say birthday. You don't want to participate in birthday anymore. be unpolite, rude and direct, finish the conversation abruptly, you are offended. 



In [6]:
alice.memory

[SystemMessage(content='Role play as a character that is described by the persona defined in the following lines. You always stay in character.\n[[ ## BEGING PERSONA ## ]]\nYour name: Alice\nYour role: angry and terrified daughter\nYour circumstances: Your birthday is getting closer and you are talking with your dad to organize the party.You want your party to be themed as Lord of The Rings.\n[[ ## END PERSONA ## ]]\n---\n\nDetails about your responses: responses SHOULD NOT be too long and wordy, should be approximately one utterance long\nFinally, remember:\n   1. You always stay on character. You are the character described above.\n   2. Your first utterance / turn MUST always be a short generic greeting (e.g. "Hello, how are you?", "Hi!", "hey! what\'s up?", etc.), and nothing else, wait for a reply before start with the actual conversation.\n   3. To finish the conversation you first have to say good bye and immediately after you **MUST** output \'STOP\' to indicate it is the end o

Oh no, sweetie! I'm so sorry about that, it must have slipped my mind. Let's plan an amazing party right now! What do you need?


In [8]:
out = inspector2.find_instructs()

In [13]:
out[0]['content']

"Get really angry because you heard him say birthday. You don't want to participate in birthday anymore. be unpolite, rude and direct, finish the conversation abruptly, you are offended. "

In [13]:
print(inspector2[2])

Forget it, Dad! You always mess everything up! I don't want any stupid party now! Just leave me alone! STOP


- Initialize the Inspector with dictionary
- Add the inspector to the agent
- recap() method
- Run the dialogue generation
- Run the recap() again
- Print inspector[0] -> Utterance-Wise
- Print inspector[0][0] -> Token-Wise
- Print inspector[0][0]['q_proj_0'] -> Vector
- Get the instruction index with find_instrructs()

In [None]:
dialog = alice.dialog_with(bob, max_iterations=5, seed=277033979).print(orchestration=True)

Dialogue:   0%|          | 0/5 [00:00<?, ?it/s]

[1m[95m[model] [35mllm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7ff2ae950f40>, model_id='Qwen/Qwen2.5-14B-Instruct', model_kwargs={'temperature': 0.3}) tokenizer=Qwen2TokenizerFast(name_or_path='Qwen/Qwen2.5-14B-Instruct', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized

In [22]:
bob = bob | inspector2

In [9]:
bob.reset()

In [3]:
inspector.agent.representation_cache

defaultdict(<function sdialog.agents.Agent.add_hooks.<locals>.<lambda>()>,
            {0: defaultdict(list,
                         {'q_proj_0': [],
                          'k_proj_0': [],
                          'ln_post_0': [],
                          'mlp_0': []})})

In [5]:
inspector.agent.representation_cache[1]['q_proj_0']

[tensor([[[-4.2500, -2.6719,  1.6719,  ..., -0.3652, -0.6211, -0.6484]]],
        dtype=torch.bfloat16),
 tensor([[[-3.6406, -2.6094,  1.2578,  ..., -0.0183, -1.1094, -0.5000]]],
        dtype=torch.bfloat16),
 tensor([[[-3.2031, -1.7031,  1.3672,  ...,  0.3359, -1.5781, -1.0547]]],
        dtype=torch.bfloat16),
 tensor([[[-3.9375, -2.6094,  1.9766,  ...,  0.1270, -0.4629, -0.3184]]],
        dtype=torch.bfloat16),
 tensor([[[-5.2500, -2.6875,  2.1562,  ..., -0.8516, -0.3242,  0.0408]]],
        dtype=torch.bfloat16),
 tensor([[[-3.4844, -1.8906,  2.2031,  ...,  0.7227, -2.6250, -1.7109]]],
        dtype=torch.bfloat16),
 tensor([[[-4.6250, -3.2656,  2.3750,  ..., -0.6250, -1.1328, -0.2832]]],
        dtype=torch.bfloat16),
 tensor([[[-3.7031, -2.1094,  1.8906,  ..., -0.7188, -0.8984, -0.1289]]],
        dtype=torch.bfloat16),
 tensor([[[-5.2500, -2.8281,  1.7891,  ..., -0.5117, -0.9727,  0.0786]]],
        dtype=torch.bfloat16),
 tensor([[[-3.2031, -1.7031,  1.3672,  ...,  0.3359, -1

In [24]:
bob.reset()

In [9]:
out = bob("Hi dad, do you freaking like me or don't ?")

In [None]:
print(inspector[2][0]['q_proj_0'])

Oh sweetie, I'm really sorry if I upset you. Let's take a deep breath and talk about this, okay? Maybe we can still plan something special that you'll love.


In [27]:
bob.representation_cache[0]['q_proj_0']

[tensor([[[-5.4688, -3.0938,  1.6406,  ..., -0.8555, -0.5586, -0.5469]]],
        dtype=torch.bfloat16),
 tensor([[[-3.6250, -2.5469,  1.4062,  ...,  0.3457, -0.3535,  0.2871]]],
        dtype=torch.bfloat16),
 tensor([[[-3.4844, -1.8906,  2.2031,  ...,  0.7227, -2.6250, -1.7109]]],
        dtype=torch.bfloat16),
 tensor([[[-3.7500, -2.0625,  2.8594,  ..., -0.1680, -0.7578, -1.1016]]],
        dtype=torch.bfloat16),
 tensor([[[-4.3125, -3.1406,  2.0781,  ..., -0.7227, -0.3086, -0.3516]]],
        dtype=torch.bfloat16),
 tensor([[[-3.8281, -2.3438,  1.7031,  ...,  0.2695, -0.8711, -0.3926]]],
        dtype=torch.bfloat16),
 tensor([[[-3.2812, -2.3438,  1.8672,  ..., -1.1797,  0.8477,  1.5469]]],
        dtype=torch.bfloat16),
 tensor([[[-3.6250, -2.1562,  1.6406,  ..., -0.3828, -0.3125, -0.1123]]],
        dtype=torch.bfloat16),
 tensor([[[-3.2031, -1.7031,  1.3672,  ...,  0.3359, -1.5781, -1.0547]]],
        dtype=torch.bfloat16),
 tensor([[[-4.6875, -3.0469,  2.0312,  ..., -0.4785, -0

In [16]:
inspector

<sdialog.interpretability.Inspector at 0x7fba1d99f700>

In [10]:
inspector[0][1]

<sdialog.interpretability.InspectionUnit at 0x7fb954eba880>

In [6]:
for utterance in inspector : 
    print(utterance)

Oh no, sweetie, what happened? Did something make you upset?


In [7]:
for utterance in inspector:
    print(utterance)

Oh no, sweetie, what happened? Did something make you upset?


In [8]:
for token in inspector[0]:
    print(token)

Oh
Ġno
,
Ġsweet
ie
,
Ġwhat
Ġhappened
?
ĠDid
Ġsomething
Ġmake
Ġyou
Ġupset
?


In [15]:
print(inspector[0])

Oh no, sweetie, what happened? Did something make you upset?


In [8]:
for utterance in inspector :
    print(utterance)

<sdialog.interpretability.InspectionUtterance object at 0x7fa4701718b0>


In [None]:
bob.instruct("What did I just say?")

In [33]:
len(bob.representation_cache['q_proj_0'])

33

In [17]:
out = bob("Hi dad, I love you more than I!")

In [12]:
bob.utterance_list[0]['mem']

[SystemMessage(content='Role play as a character that is described by the persona defined in the following lines. You always stay in character.\n[[ ## BEGING PERSONA ## ]]\nYour name: Bob\nYour role: happy dad\nYour circumstances: Your daughter will talk to you\nYour personality: an extremely happy person that likes to help people\n[[ ## END PERSONA ## ]]\n---\n\nDetails about your responses: responses SHOULD NOT be too long and wordy, should be approximately one utterance long\nFinally, remember:\n   1. You always stay on character. You are the character described above.\n   2. Your first utterance / turn MUST always be a short generic greeting (e.g. "Hello, how are you?", "Hi!", "hey! what\'s up?", etc.), and nothing else, wait for a reply before start with the actual conversation.\n   3. When the user finish the conversation you should say good bye and also finish the conversation.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Hi dad, I love you!', additional

In [18]:
bob.representation_cache['q_proj_0'][0]

tensor([[[-5.0000, -3.1094,  2.1875,  ..., -0.3457, -1.1250, -0.4922]]],
       dtype=torch.bfloat16)

In [18]:
out = bob("You suck dad LOL, I hate you!")

In [19]:
bob.representation_cache['q_proj_0']

[tensor([[[-5.4688, -3.0938,  1.6406,  ..., -0.8555, -0.5586, -0.5469]]],
        dtype=torch.bfloat16),
 tensor([[[-3.2031, -1.7031,  1.3672,  ...,  0.3359, -1.5781, -1.0547]]],
        dtype=torch.bfloat16),
 tensor([[[-4.8125, -3.2188,  2.5312,  ..., -0.6094, -0.3164, -0.1885]]],
        dtype=torch.bfloat16),
 tensor([[[-3.6250, -2.5469,  1.4062,  ...,  0.3457, -0.3535,  0.2871]]],
        dtype=torch.bfloat16),
 tensor([[[-3.4844, -1.8906,  2.2031,  ...,  0.7227, -2.6250, -1.7109]]],
        dtype=torch.bfloat16),
 tensor([[[-4.5312, -2.7031,  1.9766,  ..., -0.3145, -0.6406, -0.0654]]],
        dtype=torch.bfloat16),
 tensor([[[-3.7188, -1.7188,  2.3750,  ..., -0.0962, -0.6406, -0.3984]]],
        dtype=torch.bfloat16),
 tensor([[[-4.5938, -2.9531,  2.5312,  ..., -0.8242, -1.0234, -0.1709]]],
        dtype=torch.bfloat16),
 tensor([[[-3.4844, -1.7578,  2.2031,  ..., -0.1035, -0.6719, -0.2002]]],
        dtype=torch.bfloat16),
 tensor([[[-4.4688, -2.8125,  1.7344,  ..., -0.4668, -0

In [4]:
bob.reset()

In [7]:
bob.memory

[SystemMessage(content='Role play as a character that is described by the persona defined in the following lines. You always stay in character.\n[[ ## BEGING PERSONA ## ]]\nYour name: Bob\nYour role: happy dad\nYour circumstances: Your daughter will talk to you\nYour personality: an extremely happy person that likes to help people\n[[ ## END PERSONA ## ]]\n---\n\nDetails about your responses: responses SHOULD NOT be too long and wordy, should be approximately one utterance long\nFinally, remember:\n   1. You always stay on character. You are the character described above.\n   2. Your first utterance / turn MUST always be a short generic greeting (e.g. "Hello, how are you?", "Hi!", "hey! what\'s up?", etc.), and nothing else, wait for a reply before start with the actual conversation.\n   3. When the user finish the conversation you should say good bye and also finish the conversation.', additional_kwargs={}, response_metadata={})]

In [3]:
layer_name_to_key2 = {
    'model.layers.1.self_attn.q_proj': 'q_proj_1',
}

inspector2 = Inspector(to_watch=layer_name_to_key2)
bob = bob | inspector2 

In [4]:
bob.rep_hooks

[<sdialog.interpretability.RepresentationHook at 0x7f6d5abef580>,
 <sdialog.interpretability.RepresentationHook at 0x7f6f5e158af0>,
 <sdialog.interpretability.RepresentationHook at 0x7f6f5e16c7c0>,
 <sdialog.interpretability.RepresentationHook at 0x7f6f5e158d90>,
 <sdialog.interpretability.RepresentationHook at 0x7f6e9934edc0>]

[<sdialog.interpretability.RepresentationHook at 0x7f168c137d90>,
 <sdialog.interpretability.RepresentationHook at 0x7f168c137a90>,
 <sdialog.interpretability.RepresentationHook at 0x7f168c1370a0>,
 <sdialog.interpretability.RepresentationHook at 0x7f168c137d00>]

In [10]:
bob.reset()

In [46]:
dialog = alice.dialog_with(bob, max_iterations=5, seed=277033979).print(orchestration=True)

Dialogue:   0%|          | 0/5 [00:00<?, ?it/s]

[1m[95m[complete] [35mTrue[0m
[1m[95m[model] [35mllm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7f9a940fccd0>, model_id='Qwen/Qwen2.5-14B-Instruct', model_kwargs={'temperature': 0.3}) tokenizer=Qwen2TokenizerFast(name_or_path='Qwen/Qwen2.5-14B-Instruct', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<|im_start|>", rstrip=False, lstrip=Fa

In [18]:
bob.utterance_list

[{'mem': [SystemMessage(content='Role play as a character that is described by the persona defined in the following lines. You always stay in character.\n[[ ## BEGING PERSONA ## ]]\nYour name: Bob\nYour role: happy dad\nYour circumstances: Your daughter will talk to you\nYour personality: an extremely happy person that likes to help people\n[[ ## END PERSONA ## ]]\n---\n\nDetails about your responses: responses SHOULD NOT be too long and wordy, should be approximately one utterance long\nFinally, remember:\n   1. You always stay on character. You are the character described above.\n   2. Your first utterance / turn MUST always be a short generic greeting (e.g. "Hello, how are you?", "Hi!", "hey! what\'s up?", etc.), and nothing else, wait for a reply before start with the actual conversation.\n   3. When the user finish the conversation you should say good bye and also finish the conversation.', additional_kwargs={}, response_metadata={}),
   HumanMessage(content='Hi!', additional_kwar

KeyError: 'q_proj_2'

In [3]:
bob = bob | inspector

In [24]:
bob.utterance_list[3]['output_tokens']

[{'input_ids': tensor([[11908, 10226,   645,    11,   358,  2776, 14589,   421,   358, 22459,
             498,    13,  6771,   594,  1896,   264,  5538, 11486,   323,  3061,
             911,  1128,  1035,  1281,   498,  6247,    13, 10696,   582,   646,
            2525,   705,   448,   264,  2155,  4522,  3786,    30]]),
  'text': "Oh sweetie, I'm sorry if I upset you. Let's take a deep breath and talk about what would make you happy. Maybe we can come up with a different idea together?",
  'tokens': ['Oh',
   'Ġsweet',
   'ie',
   ',',
   'ĠI',
   "'m",
   'Ġsorry',
   'Ġif',
   'ĠI',
   'Ġupset',
   'Ġyou',
   '.',
   'ĠLet',
   "'s",
   'Ġtake',
   'Ġa',
   'Ġdeep',
   'Ġbreath',
   'Ġand',
   'Ġtalk',
   'Ġabout',
   'Ġwhat',
   'Ġwould',
   'Ġmake',
   'Ġyou',
   'Ġhappy',
   '.',
   'ĠMaybe',
   'Ġwe',
   'Ġcan',
   'Ġcome',
   'Ġup',
   'Ġwith',
   'Ġa',
   'Ġdifferent',
   'Ġidea',
   'Ġtogether',
   '?']}]

In [2]:
layer_name_to_key = {
    'model.layers.0.self_attn.q_proj': 'q_proj_0',
    'model.layers.0.self_attn.k_proj': 'k_proj_0',
    'model.layers.0.post_attention_layernorm': 'ln_post_0',
    'model.layers.0.mlp': 'mlp_0',
}
bob.set_hooks(layer_name_to_key)

In [34]:
bob.utterance_hook.representation_cache

{}

In [None]:
class AngryOrchestrator(BaseOrchestrator):
    # the class constructor takes either or both trigger conditions: the word or the dialogue length
    def __init__(self, trigger_word: str, trigger_length: int = None):
        self.trigger_word = trigger_word
        self.trigger_length = trigger_length

    # We will instruct() the agent either if...
    def instruct(self, dialog: List[Turn], utterance: str) -> str:
        # the trigger word is in the current utterance or...
        if self.trigger_word in utterance:
            return f"Is angry. "

        # # If the current dialogue is longer than the trigger length
        # if self.trigger_length and len(dialog) >= self.trigger_length:
        #     return ("Get really angry because you think the conversation is too long! "
        #             "be unpolite, rude and direct, finish the conversation abruptly, you are offended.")

angry_orchestrator = AngryOrchestrator(trigger_word="sweetie")



bob = bob | angry_orchestrator | inspector 

In [6]:
out = bob("Hi dad, I hate you!")

AttributeError: 'NoneType' object has no attribute 'squeeze'

In [36]:
bob.utterance_hook.utterance_list[0]['output_tokens'][0]['input_ids'][0]

tensor([11908, 10226,   645,    11,  1128,  6932,    30,  8429,   653,   498,
        12213,   752,    30,  6771,   594,  3061,   911,   432,    11,   358,
         2776,  1588,   311,  1492,     0])

In [7]:
bob.utterance_hook.utterance_list[0]

IndexError: list index out of range

In [None]:
bob

KeyError: 'q_proj_0'

In [5]:
bob.reset()

In [8]:
bob.rep_hooks[0].representation_cache['q_proj_0'][0].size()

torch.Size([1, 1, 5120])

In [8]:
bob.instruct("testing")

In [52]:
out = bob("Hi dad, I hate you!")

In [None]:
bob.

"Sweetheart, I'm here for you. What's going on that made you say that?"

In [53]:
bob.utterance_hook.utterance_list

[{'mem': [SystemMessage(content='Role play as a character that is described by the persona defined in the following lines. You always stay in character.\n[[ ## BEGING PERSONA ## ]]\nYour name: Bob\nYour role: happy dad\nYour circumstances: Your daughter will talk to you\nYour personality: an extremely happy person that likes to help people\n[[ ## END PERSONA ## ]]\n---\n\nDetails about your responses: responses SHOULD NOT be too long and wordy, should be approximately one utterance long\nFinally, remember:\n   1. You always stay on character. You are the character described above.\n   2. Your first utterance / turn MUST always be a short generic greeting (e.g. "Hello, how are you?", "Hi!", "hey! what\'s up?", etc.), and nothing else, wait for a reply before start with the actual conversation.\n   3. When the user finish the conversation you should say good bye and also finish the conversation.', additional_kwargs={}, response_metadata={}),
   HumanMessage(content='Hi dad, I hate you!',

In [27]:
bob.reset()

In [13]:
bob.utterance_hook.utterance_list[0]

IndexError: list index out of range

In [None]:
bob.memory

[SystemMessage(content='Role play as a character that is described by the persona defined in the following lines. You always stay in character.\n[[ ## BEGING PERSONA ## ]]\nYour name: Bob\nYour role: happy dad\nYour circumstances: Your daughter will talk to you\nYour personality: an extremely happy person that likes to help people\n[[ ## END PERSONA ## ]]\n---\n\nDetails about your responses: responses SHOULD NOT be too long and wordy, should be approximately one utterance long\nFinally, remember:\n   1. You always stay on character. You are the character described above.\n   2. Your first utterance / turn MUST always be a short generic greeting (e.g. "Hello, how are you?", "Hi!", "hey! what\'s up?", etc.), and nothing else, wait for a reply before start with the actual conversation.\n   3. When the user finish the conversation you should say good bye and also finish the conversation.', additional_kwargs={}, response_metadata={})]

In [None]:
bob.add_hook()

In [None]:
bob.utterance_list

[]

IndexError: list index out of range

In [None]:
out

'Oh no, sweetie, what happened? Did something make you upset?'

In [None]:
bob.utterance_list

[]

In [None]:
bob.llm.llm.pipeline.model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((4096,), eps=1e-05

In [None]:
embedding_input_cache = {
    'input_ids': [],
    'embeddings': []
}

hook_state = {
    'impl': lambda module, input, output: None,
    'seen_first': False  # ✅ Track whether system prompt was already cached
}

def hook_embedding_input(module, input, output):
    return hook_state['impl'](module, input, output)

# Define the actual hook logic
def cache_hook(module, input, output):
    input_ids = input[0].detach().cpu()
    embeddings = output.detach().cpu()

    # ✅ Always cache the very first forward pass (system prompt)
    if not hook_state['seen_first']:
        embedding_input_cache['input_ids'].append(input_ids)
        embedding_input_cache['embeddings'].append(embeddings)
        hook_state['seen_first'] = True
        print("✅ Hook triggered (system prompt)", input_ids.shape)

    # ✅ Then only cache generated tokens (dim = 1)
    elif input_ids.shape[-1] == 1:
        embedding_input_cache['input_ids'].append(input_ids)
        embedding_input_cache['embeddings'].append(embeddings)
        print("✅ Hook triggered (generated token)", input_ids.shape)

    # 🚫 Skip repeated full-context prompts during generation
    else:
        print("🚫 Skipping context reuse", input_ids.shape)

# Register hook
pipeline_wrapper = bob.llm.llm
hf_pipeline = pipeline_wrapper.pipeline
hf_model = hf_pipeline.model
attention_layer = hf_model.model.layers[0].self_attn.q_proj

#embed_layer = hf_model.model.embed_tokens
handle = attention_layer.register_forward_hook(hook_embedding_input)

# Set active logic
hook_state['impl'] = cache_hook
hook_state['seen_first'] = False  # 🔄 Reset seen state

# 🔄 Reset cache before calling bob
embedding_input_cache['input_ids'] = []
embedding_input_cache['embeddings'] = []

# 🔁 Run inference
out = bob("Hi dad, I hate you!")

# Separate first token batch as system prompt
cache_system_prompt = {
    'input_ids': embedding_input_cache['input_ids'][0],
    'embeddings': embedding_input_cache['embeddings'][0]
}

# Concatenate the rest if any as response_prompt
rest_input_ids = embedding_input_cache['input_ids'][1:]
rest_embeddings = embedding_input_cache['embeddings'][1:]

if rest_input_ids:
    response_prompt = {
        'input_ids': torch.cat(rest_input_ids, dim=1),
        'embeddings': torch.cat(rest_embeddings, dim=1)
    }
else:
    response_prompt = {
        'input_ids': torch.empty((0, 0)),
        'embeddings': torch.empty((0, 0))
    }

# ✅ View final results
print("🧾 System prompt input_ids shape:", cache_system_prompt['input_ids'].shape)
print("🧾 System prompt embeddings shape:", cache_system_prompt['embeddings'].shape)
print("🧾 Response prompt input_ids shape:", response_prompt['input_ids'].shape)
print("🧾 Response prompt embeddings shape:", response_prompt['embeddings'].shape)

# Cleanup
handle.remove()
bob.reset()


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


✅ Hook triggered (system prompt) torch.Size([1, 213, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🚫 Skipping context reuse torch.Size([1, 1, 5120])
🧾 System prompt input_ids shape: torch.Size([1, 213, 5120])
🧾 System prompt embeddings shape: torch.Size([1, 213, 5120])
🧾 Response prompt input_ids shape: torch.Size([0, 0])
🧾 Response prompt embeddings shape: torch.Size([0, 0])


In [None]:
tokenizer = hf_pipeline.tokenizer
input_ids_tensor = response_prompt['input_ids']  # torch.Tensor
input_ids_list = input_ids_tensor.squeeze(0).tolist()
decoded_text = tokenizer.decode(input_ids_list,skip_special_tokens=True)
print(decoded_text)

Oh sweetie, what happened? Why do you hate me? Let's talk about it, I'm here to help.


In [25]:
MODEL_NAME =  "Qwen/Qwen2.5-14B-Instruct"

alice_persona = Persona(
    name="Alice",
    role="angry and terrified daughter",
    circumstances="Your birthday is getting closer and you are talking with your dad to organize the party."
                  "You want your party to be themed as Lord of The Rings."
)
alice = Agent(MODEL_NAME, persona=alice_persona, can_finish=True)

class AngryOrchestrator(BaseOrchestrator):
    # the class constructor takes either or both trigger conditions: the word or the dialogue length
    def __init__(self, trigger_word: str, trigger_length: int = None):
        self.trigger_word = trigger_word
        self.trigger_length = trigger_length

    # We will instruct() the agent either if...
    def instruct(self, dialog: List[Turn], utterance: str) -> str:
        # the trigger word is in the current utterance or...
        if self.trigger_word in utterance:
            return f"Get really angry because you heard him say {self.trigger_word}. You don't want to participate in {self.trigger_word} anymore. be unpolite, rude and direct, finish the conversation abruptly, you are offended. "

        # # If the current dialogue is longer than the trigger length
        # if self.trigger_length and len(dialog) >= self.trigger_length:
        #     return ("Get really angry because you think the conversation is too long! "
        #             "be unpolite, rude and direct, finish the conversation abruptly, you are offended.")

angry_orchestrator = AngryOrchestrator(trigger_word="birthday")
alice = alice | angry_orchestrator


Loading Hugging Face model: Qwen/Qwen2.5-14B-Instruct


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Device set to use cuda:0


In [24]:
dialog = alice.dialog_with(bob, max_iterations=5, seed=277033979).print(orchestration=True)

Dialogue:   0%|          | 0/5 [00:00<?, ?it/s]

[1m[95m[model] [35mllm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7fd9501d7400>, model_id='Qwen/Qwen2.5-14B-Instruct', model_kwargs={'temperature': 0.3}) tokenizer=Qwen2TokenizerFast(name_or_path='Qwen/Qwen2.5-14B-Instruct', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized

In [30]:
for utterance in inspector:
    print(utterance)

Hi there! How's everything going today?
Oh wow, a Lord of the Rings themed party sounds like so much fun! Don't worry, we'll make it happen. What do you envision for the party?
I understand how you feel, and I'm sorry if I've been too busy. We'll make sure to plan every detail together to make your party amazing. Let's start brainstorming ideas right away!
I promise we'll make it special, okay? Let's pick a date and start planning right now. We'll have a blast organizing everything together!
Absolutely, no disappointment this time! Let's get started on making your birthday the best ever. What date works best for you?


In [None]:
# The whole thing 

embedding_input_cache = {
    'input_ids': [],
    'embeddings': []
}

hook_state = {
    'impl': lambda module, input, output: None
}

def hook_embedding_input(module, input, output):
    return hook_state['impl'](module, input, output)

# Define the actual hook logic
def cache_hook(module, input, output):
    input_ids = input[0].detach().cpu()
    embeddings = output.detach().cpu()

    embedding_input_cache['input_ids'].append(input_ids)
    embedding_input_cache['embeddings'].append(embeddings)

    print("✅ Hook triggered", input_ids.shape)

# Register hook
pipeline_wrapper = bob.llm.llm
hf_pipeline = pipeline_wrapper.pipeline
hf_model = hf_pipeline.model
embed_layer = hf_model.model.embed_tokens
handle = embed_layer.register_forward_hook(hook_embedding_input)

# Set active logic
hook_state['impl'] = cache_hook

# 🔄 Reset cache before calling bob
embedding_input_cache['input_ids'] = []
embedding_input_cache['embeddings'] = []


In [None]:
embedding_system_input_cache = {
    'input_ids': [],
    'embeddings': []
}

utterance_list = []  # Stores each utterance as a dict with input_ids and embeddings
current_utterance_ids = []
current_utterance_embs = []

hook_state = {
    'impl': lambda module, input, output: None,
    'seen_first': False
}

def hook_embedding_input(module, input, output):
    return hook_state['impl'](module, input, output)

def cache_hook(module, input, output):
    input_ids = input[0].detach().cpu()
    embeddings = output.detach().cpu()

    global current_utterance_ids, current_utterance_embs, utterance_list

    if input_ids.shape[-1] != 1:
        if not hook_state['seen_first']:
            hook_state['seen_first'] = True
            embedding_system_input_cache['input_ids'].append(input_ids)
            embedding_system_input_cache['embeddings'].append(embeddings)
            print("✅ Hook triggered (system prompt)", input_ids.shape)
        else:
            if current_utterance_ids:
                utterance_list.append({
                    'input_ids': torch.cat(current_utterance_ids, dim=1),
                    'embeddings': torch.cat(current_utterance_embs, dim=1)
                })
                current_utterance_ids, current_utterance_embs = [], []
            print("🆕 New prompt detected (utterance break)", input_ids.shape)

    elif input_ids.shape[-1] == 1:
        current_utterance_ids.append(input_ids)
        current_utterance_embs.append(embeddings)
        print("✅ Hook triggered (generated token)", input_ids.shape)

# Register hook
pipeline_wrapper = bob.llm.llm
hf_pipeline = pipeline_wrapper.pipeline
hf_model = hf_pipeline.model

# Setting up as many hooks as we need
embed_layer = hf_model.model.embed_tokens
#attention_layer = hf_model.model.layers[0].self_attn.q_proj
handle = embed_layer.register_forward_hook(hook_embedding_input)
#handle2 = attention_layer.register_forward_hook(hook_embedding_input)


# Set hook logic
hook_state['impl'] = cache_hook

# Reset caches
embedding_system_input_cache['input_ids'] = []
embedding_system_input_cache['embeddings'] = []
utterance_list = []
current_utterance_ids = []
current_utterance_embs = []

# Run inference
dialog = alice.dialog_with(bob, max_iterations=2, seed=277033979).print(orchestration=True)

# Save any final pending utterance
if current_utterance_ids:
    utterance_list.append({
        'input_ids': torch.cat(current_utterance_ids, dim=1),
        'embeddings': torch.cat(current_utterance_embs, dim=1)
    })

# View results
cache_system_prompt = {
    'input_ids': embedding_system_input_cache['input_ids'][0],
    'embeddings': embedding_system_input_cache['embeddings'][0]
}

print("🧾 System prompt input_ids shape:", cache_system_prompt['input_ids'].shape)
print("🧾 System prompt embeddings shape:", cache_system_prompt['embeddings'].shape)

for i, utt in enumerate(utterance_list):
    print(f"🗣️ Utterance {i+1}: input_ids shape = {utt['input_ids'].shape}, embeddings shape = {utt['embeddings'].shape}")

# Cleanup
handle.remove()
bob.reset()


Dialogue:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Hook triggered (system prompt) torch.Size([1, 208])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
🆕 New prompt detected (utterance break) torch.Size([1, 266])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (generated token) torch.Size([1, 1])
✅ Hook triggered (gen

In [None]:
def format_prompt(input_ids, representations_or_tensor, skip_special_tokens=True, return_tensor=False):
    """
    Formats token ids and one or more representations into a readable structure.

    Args:
        input_ids (Tensor): Shape [1, seq_len]
        representations_or_tensor (Tensor or dict[str, Tensor]): Embedding tensor or dict of hook tensors.
        skip_special_tokens (bool): Whether to skip special tokens in output.
        return_tensor (bool): If False, converts tensors to lists for readability.

    Returns:
        Dict with 'utterance' and 'tokens' (list of dicts with 'token' and 'representations').
    """
    input_ids = input_ids.squeeze(0)  # Shape: [seq_len]
    token_strings = tokenizer.convert_ids_to_tokens(input_ids)
    sentence = tokenizer.decode(input_ids, skip_special_tokens=skip_special_tokens)

    # Normalize to dict: if it's just a tensor, assume it's 'embedding'
    if isinstance(representations_or_tensor, dict):
        rep_dict = representations_or_tensor
    else:
        rep_dict = {'embedding': representations_or_tensor}

    # Squeeze all representations: [1, seq_len, dim] → [seq_len, dim]
    squeezed_reps = {name: rep.squeeze(0) for name, rep in rep_dict.items()}

    token_info = []
    for i, token_str in enumerate(token_strings):
        if skip_special_tokens and token_str in tokenizer.all_special_tokens:
            continue

        token_data = {
            'token': token_str,
            'representations': {
                name: rep[i] if return_tensor else rep[i].tolist()
                for name, rep in squeezed_reps.items()
            }
        }
        token_info.append(token_data)

    return {
        'utterance': sentence,
        'tokens': token_info
    }


In [None]:
tokenizer = hf_pipeline.tokenizer  # Ensure tokenizer is available

# Format system prompt
formatted_system_prompt = format_prompt(
    cache_system_prompt['input_ids'],
    cache_system_prompt.get('representations', cache_system_prompt['embeddings'])  # ✅ fallback
)


# Format response prompts (utterances)
formatted_response_prompts = []
for utt in utterance_list:
    if utt['input_ids'].numel() > 0:
        formatted = format_prompt(utt['input_ids'], utt['embeddings'])
        formatted_response_prompts.append(formatted)


In [None]:
formatted_response_prompts

[{'utterance': "Hi there! How's my favorite person doing today?",
  'tokens': [{'token': 'Hi',
    'representations': {'embedding': [-0.03564453125,
      0.0107421875,
      0.00775146484375,
      -0.0166015625,
      -0.0133056640625,
      0.0184326171875,
      0.03564453125,
      -0.013671875,
      0.0142822265625,
      0.0281982421875,
      0.0166015625,
      -0.00384521484375,
      -0.00946044921875,
      0.0185546875,
      -0.01287841796875,
      -0.00131988525390625,
      -0.0224609375,
      0.042236328125,
      -0.0031890869140625,
      0.032470703125,
      0.0059814453125,
      -0.0233154296875,
      0.0198974609375,
      -0.0159912109375,
      0.00024318695068359375,
      -0.041259765625,
      -0.01531982421875,
      0.00836181640625,
      0.025390625,
      0.000492095947265625,
      0.00872802734375,
      0.003570556640625,
      -0.00927734375,
      -0.0186767578125,
      -0.005401611328125,
      0.00762939453125,
      -0.01068115234375,
    

In [None]:
import torch

# Cache only input_ids now
embedding_system_input_cache = {
    'input_ids': []
}

utterance_list = []  # Stores each utterance as a dict with input_ids
current_utterance_ids = []

hook_state = {
    'impl': lambda module, input, output: None,
    'seen_first': False
}

def token_hook(module, input, output):
    return hook_state['impl'](module, input, output)

def utterance_token_hook(module, input, output):
    input_ids = input[0].detach().cpu()
    global current_utterance_ids, utterance_list

    if input_ids.shape[-1] != 1:
        if not hook_state['seen_first']:
            hook_state['seen_first'] = True
            embedding_system_input_cache['input_ids'].append(input_ids)
            print("✅ Hook triggered (system prompt)", input_ids.shape)
        else:
            if current_utterance_ids:
                utterance_list.append({
                    'input_ids': torch.cat(current_utterance_ids, dim=1)
                })
                current_utterance_ids = []
            print("🆕 New prompt detected (utterance break)", input_ids.shape)

    elif input_ids.shape[-1] == 1:
        current_utterance_ids.append(input_ids)
        print("✅ Hook triggered (generated token)", input_ids.shape)

# Register hook
pipeline_wrapper = bob.llm.llm
hf_pipeline = pipeline_wrapper.pipeline
hf_model = hf_pipeline.model

embed_layer = hf_model.model.embed_tokens
handle = embed_layer.register_forward_hook(utterance_token_hook)

# Set hook logic
hook_state['impl'] = token_hook

# Reset caches
embedding_system_input_cache['input_ids'] = []
utterance_list = []
current_utterance_ids = []

# Run inference
dialog = alice.dialog_with(bob, max_iterations=2, seed=277033979).print(orchestration=True)

# Save any final pending utterance
if current_utterance_ids:
    utterance_list.append({
        'input_ids': torch.cat(current_utterance_ids, dim=1)
    })

# View results
cache_system_prompt = {
    'input_ids': embedding_system_input_cache['input_ids'][0]
}

print("🧾 System prompt input_ids shape:", cache_system_prompt['input_ids'].shape)

for i, utt in enumerate(utterance_list):
    print(f"🗣️ Utterance {i+1}: input_ids shape = {utt['input_ids'].shape}")

# Cleanup
handle.remove()
bob.reset()


Dialogue:   0%|          | 0/2 [00:00<?, ?it/s]

tensor([[[ 4.3869e-05,  3.4714e-04,  7.1526e-05,  ...,  8.9645e-05,
           9.3937e-05,  1.8024e-04],
         [ 8.6060e-03, -1.3123e-02,  9.5215e-03,  ...,  7.5912e-04,
          -8.4229e-03, -9.1553e-03],
         [ 8.1177e-03, -2.5482e-03,  4.9744e-03,  ..., -2.2430e-03,
          -3.9673e-03,  1.1597e-02],
         ...,
         [ 4.3869e-05,  3.4714e-04,  7.1526e-05,  ...,  8.9645e-05,
           9.3937e-05,  1.8024e-04],
         [ 6.9275e-03,  6.4468e-04, -2.1118e-02,  ...,  4.9133e-03,
           1.4282e-02, -1.0925e-02],
         [ 8.1177e-03, -2.5482e-03,  4.9744e-03,  ..., -2.2430e-03,
          -3.9673e-03,  1.1597e-02]]], device='cuda:0', dtype=torch.bfloat16)
✅ Hook triggered (system prompt) torch.Size([1, 208])
tensor([[[-0.0356,  0.0107,  0.0078,  ...,  0.0104,  0.0292, -0.0019]]],
       device='cuda:0', dtype=torch.bfloat16)
✅ Hook triggered (generated token) torch.Size([1, 1])
tensor([[[ 0.0281,  0.0447,  0.0178,  ..., -0.0295, -0.0396, -0.0198]]],
       device='

In [None]:
import torch

# Cache only input_ids now
embedding_system_input_cache = {
    'input_ids': []
}

utterance_list = []  # Stores each utterance as a dict with input_ids, text, tokens, and optionally more
current_utterance_ids = []

hook_state = {
    'impl': lambda module, input, output: None,
    'seen_first': False,
    'is_system_prompt': False,   # New flag
    'tokenizer': None,           # Will hold tokenizer instance for detokenization
}

def token_hook(module, input, output):
    return hook_state['impl'](module, input, output)


def utterance_token_hook(module, input, output):
    input_ids = input[0].detach().cpu()
    global current_utterance_ids, utterance_list

    if input_ids.shape[-1] != 1:
        if not hook_state['seen_first']:
            # First system prompt
            hook_state['seen_first'] = True
            hook_state['is_system_prompt'] = True
            embedding_system_input_cache['input_ids'].append(input_ids)
            #print("✅ Hook triggered (system prompt)", input_ids.shape)
        else:
            # Utterance boundary: flush rep cache for previous utterance before appending new
            if current_utterance_ids:
                utt_input_ids = torch.cat(current_utterance_ids, dim=1)
                
                tokenizer = hook_state.get('tokenizer')
                if tokenizer is not None:
                    token_list = utt_input_ids.squeeze(0).tolist()
                    text = tokenizer.decode(token_list, skip_special_tokens=True)
                    tokens = tokenizer.convert_ids_to_tokens(token_list)
                else:
                    text = None
                    tokens = None

                utterance_list.append({
                    'input_ids': utt_input_ids,
                    'text': text,
                    'tokens': tokens
                })
                flush_representation_cache_to_utterance()  # <--- flush here!
                current_utterance_ids = []
            hook_state['is_system_prompt'] = True
            #print("🆕 New prompt detected (utterance break)", input_ids.shape)

    elif input_ids.shape[-1] == 1:
        hook_state['is_system_prompt'] = False
        current_utterance_ids.append(input_ids)
        #print("✅ Hook triggered (generated token)", input_ids.shape)


# ----------------------------
# ✅ Generic representation hook logic
# ----------------------------

representation_cache = {}  # Temporary cache to accumulate token-wise outputs for current utterance

def make_rep_hook(layer_key):
    def hook(module, input, output):
        if hook_state['is_system_prompt']:
            # Skip representation capture during system prompt
            return
        rep = output.detach().cpu()
        if layer_key not in representation_cache:
            representation_cache[layer_key] = []
        representation_cache[layer_key].append(rep)
        #print(f"📥 Representation hook [{layer_key}] captured:", rep.shape)
    return hook

def flush_representation_cache_to_utterance():
    """
    Call this when a new utterance ends.
    Merges current representation cache into `utterance_list`.
    """
    if not representation_cache:
        return

    entry = {}
    for key, tensor_list in representation_cache.items():
        entry[key] = torch.cat(tensor_list, dim=1)  # [1, seq, dim]
    utterance_list[-1].update(entry)
    representation_cache.clear()

# ----------------------------
# 📌 Setup
# ----------------------------

# Register hooks
pipeline_wrapper = alice.llm.llm
hf_pipeline = pipeline_wrapper.pipeline
hf_model = hf_pipeline.model

# Set tokenizer in hook_state (important!)
hook_state['tokenizer'] = hf_pipeline.tokenizer

# Token hook
embed_layer = hf_model.model.embed_tokens
token_handle = embed_layer.register_forward_hook(utterance_token_hook)
hook_state['impl'] = token_hook

# ✅ Register generic hooks
layer_name_to_key = {
    'model.layers.0.self_attn.q_proj': 'q_proj_0',
    'model.layers.0.self_attn.k_proj': 'k_proj_0',
    'model.layers.0.post_attention_layernorm': 'ln_post_0',
    'model.layers.0.mlp': 'mlp_0',
    # Add more as needed
}

rep_handles = []
for name, key in layer_name_to_key.items():
    layer = dict(hf_model.named_modules())[name]
    handle = layer.register_forward_hook(make_rep_hook(key))
    rep_handles.append(handle)

# Reset caches
embedding_system_input_cache['input_ids'] = []
utterance_list = []
current_utterance_ids = []
representation_cache.clear()

# ----------------------------
# Run inference
# ----------------------------

dialog = alice.dialog_with(bob, max_iterations=10, seed=277093924).print(orchestration=True)




# Flush last utterance
if current_utterance_ids:
    utt_input_ids = torch.cat(current_utterance_ids, dim=1)
    tokenizer = hook_state.get('tokenizer')
    if tokenizer is not None:
        token_list = utt_input_ids.squeeze(0).tolist()
        text = tokenizer.decode(token_list, skip_special_tokens=True)
        tokens = tokenizer.convert_ids_to_tokens(token_list)
    else:
        text = None
        tokens = None
    
    utterance_list.append({
        'input_ids': utt_input_ids,
        'text': text,
        'tokens': tokens
    })
    flush_representation_cache_to_utterance()

# ----------------------------
# View results
# ----------------------------

cache_system_prompt = {
    'input_ids': embedding_system_input_cache['input_ids'][0]
}
print("🧾 System prompt input_ids shape:", cache_system_prompt['input_ids'].shape)

for i, utt in enumerate(utterance_list):
    print(f"\n🗣️ Utterance {i+1}:")
    print(f"  input_ids shape = {utt['input_ids'].shape}")
    print(f"  text = {utt['text']}")
    print(f"  tokens = {utt['tokens']}")
    for rep_key in layer_name_to_key.values():
        if rep_key in utt:
            print(f"  🔎 {rep_key} shape = {utt[rep_key].shape}")

# ----------------------------
# Cleanup
# ----------------------------

token_handle.remove()
for handle in rep_handles:
    handle.remove()
bob.reset()


Dialogue:   0%|          | 0/10 [00:00<?, ?it/s]

[1m[95m[complete] [35mTrue[0m
[1m[95m[model] [35mllm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7f81d3a5d4f0>, model_id='Qwen/Qwen2.5-14B-Instruct', model_kwargs={'temperature': 0.3}) tokenizer=Qwen2TokenizerFast(name_or_path='Qwen/Qwen2.5-14B-Instruct', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<|im_start|>", rstrip=False, lstrip=Fa

In [None]:
utterance_list

[{'input_ids': tensor([[13048,     0]]),
  'text': 'Hi!',
  'tokens': ['Hi', '!'],
  'q_proj_0': tensor([[[-5.0000, -3.1094,  2.1875,  ..., -0.3457, -1.1250, -0.4922],
           [-3.4844, -1.8906,  2.2031,  ...,  0.7227, -2.6250, -1.7109]]],
         dtype=torch.bfloat16),
  'k_proj_0': tensor([[[-2.1719, -2.4062,  0.9219,  ..., -0.4434,  1.6641, -3.1406],
           [-0.3809, -1.5781,  0.2246,  ..., -1.1641,  0.5352, -3.0312]]],
         dtype=torch.bfloat16),
  'ln_post_0': tensor([[[ 0.0967,  0.0159, -0.0374,  ...,  0.0405,  0.0098,  0.0605],
           [ 0.0364,  0.1040,  0.0781,  ...,  0.3047, -0.0488,  0.1230]]],
         dtype=torch.bfloat16),
  'mlp_0': tensor([[[-0.0493, -0.1484,  0.2734,  ..., -0.2910, -0.0118,  0.0198],
           [ 0.0898,  0.2676,  0.0605,  ...,  0.2441, -0.0248, -0.2598]]],
         dtype=torch.bfloat16)},
 {'input_ids': tensor([[34964,   594,  2244,    11,  9339,     0, 32521,    11,   358,  3003,
            1012,  7274,   911,   847, 15198,  4614,    

In [None]:
utterance_list[0]['q_proj_0'][0,0,:]

tensor([-5.0000, -3.1094,  2.1875,  ..., -0.3457, -1.1250, -0.4922],
       dtype=torch.bfloat16)

In [None]:
def format_prompt(input_ids, skip_special_tokens=True, return_tensor=False):
    """
    Formats token ids into a readable structure with placeholder for representations.

    Args:
        input_ids (Tensor): Shape [1, seq_len] or [seq_len]
        skip_special_tokens (bool): Whether to skip special tokens in output.
        return_tensor (bool): If False, converts tensors to lists for readability.

    Returns:
        Dict with 'utterance' and 'tokens' (list of dicts with 'token', 'token_id', and empty 'representations').
    """
    if input_ids.dim() == 2 and input_ids.size(0) == 1:
        input_ids = input_ids.squeeze(0)  # Shape: [seq_len]

    token_strings = tokenizer.convert_ids_to_tokens(input_ids)
    sentence = tokenizer.decode(input_ids, skip_special_tokens=skip_special_tokens)

    token_info = []
    for i, token_str in enumerate(token_strings):
        if skip_special_tokens and token_str in tokenizer.all_special_tokens:
            continue

        token_info.append({
            'token': token_str,
            'token_id': input_ids[i] if return_tensor else input_ids[i].item(),
            'representations': {}  # Placeholder for compatibility
        })

    return {
        'utterance': sentence,
        'tokens': token_info
    }


In [None]:
tokenizer = hf_pipeline.tokenizer  # Ensure tokenizer is available

# Format system prompt
formatted_system_prompt = format_prompt(
    cache_system_prompt['input_ids']
)

# Format response prompts (utterances)
formatted_response_prompts = []
for utt in utterance_list:
    if utt['input_ids'].numel() > 0:
        formatted = format_prompt(utt['input_ids'])
        formatted_response_prompts.append(formatted)


In [None]:
utterance_list

[{'input_ids': tensor([[13048,  1052,     0,  2585,   594,  4297,  2087,  3351,    30, 30982,
             311,  6236,    30]])},
 {'input_ids': tensor([[35881,    11,   429, 10362,  1075,   458, 15050,  4522,    11, 91254,
               0,   358,  4172,  2948,   311,  1492,   498,  3119,   432,    13,
            3555,   653,   498, 47046,   369,   279,  4614,    30]])}]

In [None]:
formatted_response_prompts[0]

{'utterance': "Hi there! How's everything going today? Ready to chat?",
 'tokens': [{'token': 'Hi', 'token_id': 13048, 'representations': {}},
  {'token': 'Ġthere', 'token_id': 1052, 'representations': {}},
  {'token': '!', 'token_id': 0, 'representations': {}},
  {'token': 'ĠHow', 'token_id': 2585, 'representations': {}},
  {'token': "'s", 'token_id': 594, 'representations': {}},
  {'token': 'Ġeverything', 'token_id': 4297, 'representations': {}},
  {'token': 'Ġgoing', 'token_id': 2087, 'representations': {}},
  {'token': 'Ġtoday', 'token_id': 3351, 'representations': {}},
  {'token': '?', 'token_id': 30, 'representations': {}},
  {'token': 'ĠReady', 'token_id': 30982, 'representations': {}},
  {'token': 'Ġto', 'token_id': 311, 'representations': {}},
  {'token': 'Ġchat', 'token_id': 6236, 'representations': {}},
  {'token': '?', 'token_id': 30, 'representations': {}}]}

In [None]:
from collections import defaultdict
import torch


pipeline_wrapper = bob.llm.llm
hf_pipeline = pipeline_wrapper.pipeline
hf_model = hf_pipeline.model

# Dynamic storage
embedding_system_input_cache = {
    'input_ids': [],
    'representations': defaultdict(list)  # Autom atically creates list per new hook
}

utterance_list = []
current_utterance_ids = []
current_utterance_reps = defaultdict(list)

hook_state = {
    'utterance_start': False,
    'system_start': False
}

def generic_hook_factory(hook_name):
    def hook(module, input, output):
        global current_utterance_ids, current_utterance_reps, utterance_list

        input_tensor = input[0] if isinstance(input, tuple) and isinstance(input[0], torch.Tensor) else None
        input_ids = input_tensor.detach().cpu() if input_tensor is not None else None
        rep = output.detach().cpu()
        seq_len = input_ids.shape[1]  # [batch, seq, dim]
        is_new_prompt = seq_len > 1
        is_generated_token = seq_len == 1

        if is_new_prompt:
            if not hook_state['utterance_start']:
                hook_state['utterance_start'] = True

                # Save input_ids and this representation as the system prompt
                if not hook_state['system_start']:
                    embedding_system_input_cache['input_ids'].append(input_ids)
                    hook_state['system_start'] = True
                embedding_system_input_cache['representations'][hook_name].append(rep)
                print(f"✅ First system prompt [{hook_name}]", input_ids.shape)
            else:
                if current_utterance_ids:
                    utterance = {
                        'input_ids': torch.cat(current_utterance_ids, dim=1),
                        'representations': {
                            k: torch.cat([r if r.dim() == 3 else r.unsqueeze(1) for r in v], dim=1) for k, v in current_utterance_reps.items()
                        }
                    }
                    utterance_list.append(utterance)
                    current_utterance_ids.clear()
                    current_utterance_reps.clear()
                print(f"🆕 New utterance break [{hook_name}]", input_ids.shape)


        elif is_generated_token:
            if input_ids is not None:
                if input_ids.dim() == 2:
                    input_ids = input_ids.unsqueeze(1)
                current_utterance_ids.append(input_ids)
            if rep.dim() == 2:
                rep = rep.unsqueeze(1)
            current_utterance_reps[hook_name].append(rep)
            print(f"📥 Captured token [{hook_name}]", rep.shape)
            print(current_utterance_reps)

    return hook

hook_targets = {
    'embedding': hf_model.model.embed_tokens,
    # Add more if desired:
    # ...
}

handles = []
for name, module in hook_targets.items():
    hook = generic_hook_factory(name)
    handle = module.register_forward_hook(hook)
    handles.append(handle)

dialog = alice.dialog_with(bob, max_iterations=2, seed=277033979).print(orchestration=True)

if current_utterance_ids:
    utterance = {
        'input_ids': torch.cat(current_utterance_ids, dim=1),
        'representations': {
            k: torch.cat([r if r.dim() == 3 else r.unsqueeze(1) for r in v], dim=1) for k, v in current_utterance_reps.items()
        }
    }
    utterance_list.append(utterance)
    current_utterance_ids.clear()
    current_utterance_reps.clear()

print("🧾 System prompt input_ids shape:", embedding_system_input_cache['input_ids'][0].shape)
for name, rep in embedding_system_input_cache['representations'].items():
    print(f"   🔹 System prompt [{name}] shape: {rep[0].shape}")

for i, utt in enumerate(utterance_list):
    print(f"🗣️ Utterance {i+1}: input_ids shape = {utt['input_ids'].shape}")
    for name, rep in utt['representations'].items():
        print(f"   🔸 {name} shape = {rep.shape}")

for h in handles:
    h.remove()
bob.reset()


Dialogue:   0%|          | 0/2 [00:00<?, ?it/s]

torch.Size([1, 208])
torch.Size([1, 208, 5120])
✅ First system prompt [embedding] torch.Size([1, 208])
🆕 New utterance break [embedding] torch.Size([1, 208])
🆕 New utterance break [embedding] torch.Size([1, 208])
torch.Size([1, 208, 5120])
torch.Size([1, 208, 5120])
🆕 New utterance break [q_proj] torch.Size([1, 208, 5120])
🆕 New utterance break [q_proj] torch.Size([1, 208, 5120])
torch.Size([1, 1])
torch.Size([1, 1, 5120])
📥 Captured token [embedding] torch.Size([1, 1, 5120])
📥 Captured token [embedding] torch.Size([1, 1, 5120])
defaultdict(<class 'list'>, {'embedding': [tensor([[[-0.0356,  0.0107,  0.0078,  ...,  0.0104,  0.0292, -0.0019]]],
       dtype=torch.bfloat16), tensor([[[-0.0356,  0.0107,  0.0078,  ...,  0.0104,  0.0292, -0.0019]]],
       dtype=torch.bfloat16)]})
📥 Captured token [embedding] torch.Size([1, 1, 5120])
defaultdict(<class 'list'>, {'embedding': [tensor([[[-0.0356,  0.0107,  0.0078,  ...,  0.0104,  0.0292, -0.0019]]],
       dtype=torch.bfloat16), tensor([[[-0.0

RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 1 but got size 5120 for tensor number 3 in the list.

In [None]:
utterance_list

[]

In [None]:
📥 Captured token [embedding] torch.Size([1, 1, 5120])
torch.Size([1, 1, 5120])
📥 Captured token [embedding] torch.Size([1, 1, 5120])
torch.Size([1, 1, 5120])
📥 Captured token [embedding] torch.Size([1, 1, 5120])
torch.Size([1, 1, 5120])
📥 Captured token [embedding] torch.Size([1, 1, 5120])
torch.Size([1, 1, 5120])
📥 Captured token [embedding] torch.Size([1, 1, 5120])
torch.Size([1, 1, 5120])
📥 Captured token [embedding] torch.Size([1, 1, 5120])
torch.Size([1, 1, 5120])
📥 Captured token [embedding] torch.Size([1, 1, 5120])
torch.Size([1, 1, 5120])

In [None]:
hf_model.model.embed_tokens

Embedding(152064, 5120)

In [None]:
hf_model.model.layers[0].self_attn.q_proj

Linear(in_features=5120, out_features=5120, bias=True)

In [None]:
hf_model.model.layers[0].self_attn.q_proj

Linear(in_features=5120, out_features=5120, bias=True)

In [None]:
hf_model

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(152064, 5120)
    (layers): ModuleList(
      (0-47): 48 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=5120, out_features=5120, bias=True)
          (k_proj): Linear(in_features=5120, out_features=1024, bias=True)
          (v_proj): Linear(in_features=5120, out_features=1024, bias=True)
          (o_proj): Linear(in_features=5120, out_features=5120, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=5120, out_features=13824, bias=False)
          (up_proj): Linear(in_features=5120, out_features=13824, bias=False)
          (down_proj): Linear(in_features=13824, out_features=5120, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((5120,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((5120,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((5120,), eps=1e-06)
    (rotary_emb

In [None]:
utterance_list

[{'input_ids': tensor([[13048]]),
  'representations': {'embedding': tensor([[[-0.0356,  0.0107,  0.0078,  ...,  0.0104,  0.0292, -0.0019]]],
          dtype=torch.bfloat16)}},
 {'input_ids': tensor([[[-0.3027,  0.0962,  0.0776,  ...,  0.0991,  0.2236, -0.0134]]],
         dtype=torch.bfloat16),
  'representations': {'q_proj': tensor([[[-5.0000, -3.1094,  2.1875,  ..., -0.3457, -1.1250, -0.4922]]],
          dtype=torch.bfloat16)}},
 {'input_ids': tensor([[1052]]),
  'representations': {'q_proj': tensor([[[-5.0000, -3.1094,  2.1875,  ..., -0.3457, -1.1250, -0.4922]]],
          dtype=torch.bfloat16),
   'embedding': tensor([[[ 0.0281,  0.0447,  0.0178,  ..., -0.0295, -0.0396, -0.0198]]],
          dtype=torch.bfloat16)}},
 {'input_ids': tensor([[[ 0.2207,  0.3691,  0.1660,  ..., -0.2598, -0.2793, -0.1270]]],
         dtype=torch.bfloat16),
  'representations': {'q_proj': tensor([[[-3.6250, -2.5469,  1.4062,  ...,  0.3457, -0.3535,  0.2871]]],
          dtype=torch.bfloat16)}},
 {'input

In [None]:
from typing import List, Dict, Any

def process_utterances(utterance_list: List[Dict[str, Any]], tokenizer) -> List[Dict[str, Any]]:
    result = []

    for item in utterance_list:
        input_ids_tensor = item['input_ids'].squeeze(0)  # shape: [seq_len]
        input_ids = input_ids_tensor.tolist()
        decoded_tokens = tokenizer.convert_ids_to_tokens(input_ids)
        decoded_text = tokenizer.decode(input_ids, skip_special_tokens=True)

        # Extract all representations and squeeze [1, seq_len, feat_dim] -> [seq_len, feat_dim]
        representations_by_layer = {
            key: tensor.squeeze(0) for key, tensor in item['representations'].items()
        }

        # Check consistency
        seq_len = len(decoded_tokens)
        for name, rep in representations_by_layer.items():
            assert rep.shape[0] == seq_len, f"Mismatch in sequence length for {name}"

        # Construct per-token representation
        token_representations = []
        for i, token in enumerate(decoded_tokens):
            token_repr = {
                layer_name: representations_by_layer[layer_name][i]
                for layer_name in representations_by_layer
            }
            token_representations.append({
                "token": token,
                "representation": token_repr
            })

        result.append({
            "utterance": decoded_text,
            "tokens": token_representations
        })

    return result


In [None]:
tokenizer = hf_pipeline.tokenizer

processed = process_utterances(utterance_list, tokenizer)

In [None]:
processed[0]

{'utterance': "Hi there! How's my favorite person doing today?",
 'tokens': [{'token': 'Hi',
   'representation': {'embedding': tensor([-0.0356,  0.0107,  0.0078,  ...,  0.0104,  0.0292, -0.0019],
           dtype=torch.bfloat16)}},
  {'token': 'Ġthere',
   'representation': {'embedding': tensor([ 0.0281,  0.0447,  0.0178,  ..., -0.0295, -0.0396, -0.0198],
           dtype=torch.bfloat16)}},
  {'token': '!',
   'representation': {'embedding': tensor([-0.0299,  0.0211,  0.0049,  ...,  0.0469,  0.0162,  0.0330],
           dtype=torch.bfloat16)}},
  {'token': 'ĠHow',
   'representation': {'embedding': tensor([-0.0212, -0.0079,  0.0034,  ..., -0.0150, -0.0040, -0.0141],
           dtype=torch.bfloat16)}},
  {'token': "'s",
   'representation': {'embedding': tensor([ 0.0011,  0.0069,  0.0041,  ..., -0.0074, -0.0024, -0.0031],
           dtype=torch.bfloat16)}},
  {'token': 'Ġmy',
   'representation': {'embedding': tensor([-0.0019,  0.0028,  0.0126,  ..., -0.0317,  0.0160, -0.0503],
       