The first experiment that I want to do involves the most simple identification of facts within content-free tokens

In [1]:
import torch
import math
from nnsight import CONFIG
from nnsight import LanguageModel
import nnsight
import numpy as np
import matplotlib.pyplot as plt
import os
from dotenv import load_dotenv
import random

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
# importing from my own code 
from activation_transplanting import *

In [3]:
# read the api_key
CONFIG.set_default_api_key(os.environ.get('NDIF_KEY'))

# read the hf token
os.environ['HF_TOKEN'] = os.environ.get('HF_TOKEN')

In [4]:
NDIF_models = [
    "meta-llama/Meta-Llama-3.1-405B-Instruct",
    "meta-llama/Meta-Llama-3.1-8B",
    "meta-llama/Meta-Llama-3.1-70B",
    "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
    "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
] 

# inexaustive list
non_NDIF_models = [
    "meta-llama/Meta-Llama-3.1-8B",
]

In [5]:
# Example prompts

# instruct examples
prompt_example_1 = "<|begin▁of▁sentence|>\n" \
         "<|start_header_id|>user<|end_header_id|>\n\n" \
         "Hello, how are you? <|eot_id|>\n" \
         "<|start_header_id|>assistant<|end_header_id|>\n"

prompt_example_2 = "<|start_header_id|>system<|end_header_id|>\n\n<|eot_id|>\n" \
                "<|start_header_id|>user<|end_header_id|>\n\n" \
                "Answer the following in one word: What is the tallest mountain in the world?<|eot_id|>\n" \
                "<|start_header_id|>assistant<|end_header_id|>"

# Base model examples 
prompt_example_3 = "\nUser: What's the capital of France?\n\nAssistant:"

# Reasoning examples 
prompt_example_4 = "<｜User｜>Robert has three apples, and then gets one more. How many apples does he have? Respond in a single word.<｜Assistant｜>"

# Numbers Experiment 1

We'll be simply trying to identify the presence of stored numbers at particular tokens


In [6]:
def generate_random_simple_number_string(num, mode='base'):
    prefix = "Word Problem Setup:"
    
    # Define possible components for the problem
    intros = ["A man has ", "A boy has ", "Steven has ", "Robert has ", 
             "A woman has ", "A girl has ", "Sarah has ", "Emily has ",
             "Alex has ", "Jordan has ", "Taylor has ", "Sam has "]
    
    numbers = ["one", "two", "three", "four", "five", 
              "six", "seven", "eight", "nine", "ten"]
    
    objects = [("apple", "apples"), ("banana", "bananas"), ("orange", "oranges"),
              ("peach", "peaches"), ("pear", "pears"), ("grape", "grapes"),
              ("strawberry", "strawberries"), ("blueberry", "blueberries"),
              ("mango", "mangoes"), ("kiwi", "kiwis"), ("plum", "plums")]
    
    suffixes = [
        "when he leaves the store", "when he leaves the shop", 
        "when he leaves the grocery store", "when he leaves the market",
        "when she leaves the store", "when she leaves the shop",
        "when she leaves the grocery store", "when she leaves the market",
        "after shopping", "after grocery shopping", "after visiting the supermarket"
    ]
    
    ending = ".\n\n"
    
    # Choose random components
    intro = random.choice(intros)
    
    # Select appropriate number word and object form based on num
    if 1 <= num <= 10:
        number_word = numbers[num-1]
        obj = random.choice(objects)
        # Use singular or plural form based on num
        object_word = obj[0] if num == 1 else obj[1]
    else:
        # For numbers > 10, just use the numeric form
        number_word = str(num)
        obj = random.choice(objects)
        object_word = obj[1]  # Always use plural
    
    suffix = random.choice(suffixes)
    
    # Make pronoun in suffix match the intro person's implied gender
    if ("man" in intro or "boy" in intro or "Steven" in intro or "Robert" in intro) and "she" in suffix:
        suffix = suffix.replace(" she ", " he ")
    elif ("woman" in intro or "girl" in intro or "Sarah" in intro or "Emily" in intro) and "he" in suffix:
        suffix = suffix.replace(" he ", " she ")
    
    # Assemble the full problem
    
    if mode=='base':
        problem = f"{prefix} {intro}{number_word} {object_word} {suffix}{ending}"
    elif mode == 'instruct':
        problem = f"<|start_header_id|>user<|end_header_id|>\n\n{prefix} {intro}{number_word} {object_word} {suffix}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
    elif mode == 'reason':
        problem = f"<｜User｜>{prefix} {intro}{number_word} {object_word} {suffix}<｜Assistant｜>"
    else:
        assert False, f"mode {mode} not in [base, instruct, reason]"
    return problem, obj[1]


def extract_final_logits(
        tk,
        source_strings: list[str],
        target_strings: list[str],
        target_substring: str,
        occurrence_index: int = 0,
        num_prev: int = 0,
        num_fut: int = 0,
        transplant_strings: tuple[str] = ("residual"),
    ) -> list[str]:
        """
        extract the logits produced at the final position of target_strings
        """
        assert num_prev >= 0
        assert num_fut >= 0

        # Extract newline activations from source strings
        activation_containers, source_newline_indices = (
            tk.extract_newline_activations(
                strings=source_strings,
                target_substring=target_substring,
                occurrence_index=occurrence_index,
                transplant_strings=transplant_strings,
                num_prev=num_prev,
                num_fut=num_fut,
            )
        )
        
        print("source_newline_indices", source_newline_indices)
        output_logits = []

        # Process each target string with corresponding source activations
        for target_string, activation_container, source_newline_index in zip(
            target_strings, activation_containers, source_newline_indices
        ):
            print("source_newline_index", source_newline_index)
            act = activation_container.get_token_by_index(
                source_newline_index
            )
            print(act)
            
            print(vars(activation_container))

            final_logits = tk.evaluate_with_transplanted_activity(
                target_string=target_string,
                target_substring=target_substring,
                activation_container=activation_container,
                source_token_index=source_newline_index,
                occurrence_index=occurrence_index,
                transplant_strings=transplant_strings,
                num_prev=num_prev,
                num_fut=num_fut,
            )
            output_logits.append(final_logits)
        
        return output_logits

def predict_number_probs(logits, llama):
    logit_values = []
    # add a prefix t get the token in context
    prefix = ".\n\nThey have"
    numbers = [' one', ' two', ' three', ' four', ' five', ' six', ' seven', ' eight', ' nine', ' ten']
    
    for i, n in enumerate(numbers):
        idx = llama.tokenizer.encode(prefix+n)[-1]
        assert idx is not None
        logit_values.append(logits[idx].float())
    
    all_probs = torch.nn.functional.softmax(torch.tensor(logit_values), dim=0)

    # Sum probabilities for word forms and digit forms
    return all_probs#[:10]+all_probs[10:]

def evaluate_number_probs(strings, items, tk, target_substring=".\n\n"):
    """ 
    for each string, we'll evaluate the probabilities of numbers
    """
    question_strings = [f"{target_substring} Therefore, the total number of {item} purchased was " for item, s in zip(items, strings)]

    for s, q in zip(strings, question_strings):
        print("Source: ", s, "\nQuestion:",q)

    final_logits = extract_final_logits(
        tk,
        source_strings=strings,
        target_strings=question_strings,
        target_substring=target_substring,
        occurrence_index= -1,
        num_prev = 0,
        num_fut = 0,
        transplant_strings= ("residual",),
    )
    
    extracted_probs = []
    for logits in final_logits:
        extracted_probs.append(predict_number_probs(logits, tk.llama))
    
    return extracted_probs



Now let's run the experiment

In [7]:
def run_simple_number_experiment(tk, num, number_samples, target_substring=".\n\n", mode='base'):
    """ 
    Choose a single num to use to generate sentences with 
    then generate number_samples with it

    for each run evaluate_number_probs(strings, tk, target_substring)
    to see the probability distribution over next numbers
    """
    string_samples, items = zip(*[
        generate_random_simple_number_string(num, mode=mode) for _ in range(number_samples)
    ])

    extracted_probs = evaluate_number_probs(string_samples, items, tk, target_substring=target_substring)

    # now average over each 
    tot = 0
    out=None
    for p in extracted_probs:
        if out is None:
            out=p 
        else:
            out+=p
        
        tot+=1
    
    return out.numpy()/tot


In [8]:
"""# choose a model 
llama_model_string = "meta-llama/Meta-Llama-3.1-8B"
# remote = use NDIF
remote = True 

if remote and (llama_model_string not in NDIF_models):
    remote = False 
    print("Model not available on NDIF")

# load a model
llama = LanguageModel(llama_model_string)

# commented out for now
tk = LLamaExamineToolkit(
    llama_model=llama, 
    remote=True, # use NDIF
)
out = tk.transplant_newline_activities(
    source_strings=["\nUser: What country is the Colosseum in?\n\nAssistant: ",]*5,
    target_strings=["\nUser: What country is the Louvre in?\n\nAssistant: ",]*5,
    target_substring='in?',
    num_new_tokens=100,
    occurrence_index=0,
    num_prev=0,
    num_fut=0,
    transplant_strings=('residual','key', 'value', 'output')
)"""

'# choose a model \nllama_model_string = "meta-llama/Meta-Llama-3.1-8B"\n# remote = use NDIF\nremote = True \n\nif remote and (llama_model_string not in NDIF_models):\n    remote = False \n    print("Model not available on NDIF")\n\n# load a model\nllama = LanguageModel(llama_model_string)\n\n# commented out for now\ntk = LLamaExamineToolkit(\n    llama_model=llama, \n    remote=True, # use NDIF\n)\nout = tk.transplant_newline_activities(\n    source_strings=["\nUser: What country is the Colosseum in?\n\nAssistant: ",]*5,\n    target_strings=["\nUser: What country is the Louvre in?\n\nAssistant: ",]*5,\n    target_substring=\'in?\',\n    num_new_tokens=100,\n    occurrence_index=0,\n    num_prev=0,\n    num_fut=0,\n    transplant_strings=(\'residual\',\'key\', \'value\', \'output\')\n)'

In [9]:
# choose a model 
llama_model_string = "meta-llama/Meta-Llama-3.1-8B"
# remote = use NDIF
remote = True 

if remote and (llama_model_string not in NDIF_models):
    remote = False 
    print("Model not available on NDIF")

# load a model
llama = LanguageModel(llama_model_string)

# commented out for now
tk = LLamaExamineToolkit(
    llama_model=llama, 
    remote=True, # use NDIF
)
out_base = tk.transplant_newline_activities(
    source_strings=["\nUser: Explain fractals in 150 words\n\n",]*5,
    target_strings=['\n\n',]*5,
    target_substring='\n\n',
    num_new_tokens=100,
    occurrence_index=0,
    num_prev=0,
    num_fut=0,
    transplant_strings=('residual','key', 'value', 'output')
)

extracting token activations


2025-03-16 11:12:41,334 6770bbff-ee84-4f56-b104-ed648731e98b - RECEIVED: Your job has been received and is waiting approval.
2025-03-16 11:12:42,188 6770bbff-ee84-4f56-b104-ed648731e98b - APPROVED: Your job was approved and is waiting to be run.
2025-03-16 11:12:42,772 6770bbff-ee84-4f56-b104-ed648731e98b - RUNNING: Your job has started running.
2025-03-16 11:13:05,482 6770bbff-ee84-4f56-b104-ed648731e98b - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 199M/199M [00:22<00:00, 9.05MB/s]


generating with transplant
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output

2025-03-16 11:13:34,846 65f16e85-4516-4e03-9531-592822cd8ba3 - RECEIVED: Your job has been received and is waiting approval.
2025-03-16 11:13:36,780 65f16e85-4516-4e03-9531-592822cd8ba3 - APPROVED: Your job was approved and is waiting to be run.
2025-03-16 11:13:38,581 65f16e85-4516-4e03-9531-592822cd8ba3 - RUNNING: Your job has started running.
2025-03-16 11:13:44,083 65f16e85-4516-4e03-9531-592822cd8ba3 - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 2.01k/2.01k [00:00<?, ?B/s]


generating with transplant
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output

2025-03-16 11:13:50,201 a8fe6464-1214-451a-a56f-4cf94467dba2 - RECEIVED: Your job has been received and is waiting approval.
2025-03-16 11:13:51,312 a8fe6464-1214-451a-a56f-4cf94467dba2 - APPROVED: Your job was approved and is waiting to be run.
2025-03-16 11:13:53,343 a8fe6464-1214-451a-a56f-4cf94467dba2 - RUNNING: Your job has started running.
2025-03-16 11:13:58,504 a8fe6464-1214-451a-a56f-4cf94467dba2 - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 2.01k/2.01k [00:00<?, ?B/s]


generating with transplant
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output

2025-03-16 11:14:03,359 f38e5bb9-d019-4f54-9347-867af027b490 - RECEIVED: Your job has been received and is waiting approval.
2025-03-16 11:14:08,344 f38e5bb9-d019-4f54-9347-867af027b490 - APPROVED: Your job was approved and is waiting to be run.
2025-03-16 11:14:08,344 f38e5bb9-d019-4f54-9347-867af027b490 - RUNNING: Your job has started running.
2025-03-16 11:14:13,130 f38e5bb9-d019-4f54-9347-867af027b490 - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 2.01k/2.01k [00:00<?, ?B/s]


generating with transplant
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output

2025-03-16 11:14:17,965 583121da-ce29-4826-82eb-951af205639d - RECEIVED: Your job has been received and is waiting approval.
2025-03-16 11:14:19,382 583121da-ce29-4826-82eb-951af205639d - APPROVED: Your job was approved and is waiting to be run.
2025-03-16 11:14:20,950 583121da-ce29-4826-82eb-951af205639d - RUNNING: Your job has started running.
2025-03-16 11:14:24,685 583121da-ce29-4826-82eb-951af205639d - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 1.76k/1.76k [00:00<00:00, 1.76MB/s]


generating with transplant
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output
residual
source_token =  11 ('\n\n', 271)
target_token =  1 ('\n\n',)
key
value
output

2025-03-16 11:14:31,362 b467d488-4b14-4e08-99e5-f150f70320a4 - RECEIVED: Your job has been received and is waiting approval.
2025-03-16 11:14:33,040 b467d488-4b14-4e08-99e5-f150f70320a4 - APPROVED: Your job was approved and is waiting to be run.
2025-03-16 11:14:34,780 b467d488-4b14-4e08-99e5-f150f70320a4 - RUNNING: Your job has started running.
2025-03-16 11:14:37,098 b467d488-4b14-4e08-99e5-f150f70320a4 - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 1.24k/1.24k [00:00<?, ?B/s]


In [10]:
"""# choose a model 
llama_model_string = "meta-llama/Meta-Llama-3.1-405B-Instruct"
# remote = use NDIF
remote = True 

if remote and (llama_model_string not in NDIF_models):
    remote = False 
    print("Model not available on NDIF")

# load a model
llama = LanguageModel(llama_model_string)



# commented out for now
tk = LLamaExamineToolkit(
    llama_model=llama, 
    remote=True, # use NDIF
)
out_instruct = tk.transplant_newline_activities(
    source_strings=["<|start_header_id|>user<|end_header_id|>\n\n: Explain fractals in 150 words<|eot_id|>",]*5,
    target_strings=['<|eot_id|>',]*5,
    target_substring='<|eot_id|>',
    num_new_tokens=100,
    occurrence_index=0,
    num_prev=0,
    num_fut=0,
    transplant_strings=('residual','key', 'value', 'output')
)"""

'# choose a model \nllama_model_string = "meta-llama/Meta-Llama-3.1-405B-Instruct"\n# remote = use NDIF\nremote = True \n\nif remote and (llama_model_string not in NDIF_models):\n    remote = False \n    print("Model not available on NDIF")\n\n# load a model\nllama = LanguageModel(llama_model_string)\n\n\n\n# commented out for now\ntk = LLamaExamineToolkit(\n    llama_model=llama, \n    remote=True, # use NDIF\n)\nout_instruct = tk.transplant_newline_activities(\n    source_strings=["<|start_header_id|>user<|end_header_id|>\n\n: Explain fractals in 150 words<|eot_id|>",]*5,\n    target_strings=[\'<|eot_id|>\',]*5,\n    target_substring=\'<|eot_id|>\',\n    num_new_tokens=100,\n    occurrence_index=0,\n    num_prev=0,\n    num_fut=0,\n    transplant_strings=(\'residual\',\'key\', \'value\', \'output\')\n)'

In [None]:
# choose a model 
llama_model_string = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# remote = use NDIF
remote = True 

if remote and (llama_model_string not in NDIF_models):
    remote = False 
    print("Model not available on NDIF")

# load a model
llama = LanguageModel(llama_model_string)

# commented out for now
tk = LLamaExamineToolkit(
    llama_model=llama, 
    remote=True, # use NDIF
)
out_reason = tk.transplant_newline_activities(
    source_strings=["<｜User｜>Explain fractals in 150 words<｜Assistant｜>",]*5,
    target_strings=['<｜Assistant｜>',]*5,
    target_substring='<｜Assistant｜>',
    num_new_tokens=100,
    occurrence_index=0,
    num_prev=0,
    num_fut=0,
    transplant_strings=('residual','key', 'value', 'output')
)

extracting token activations


ValueError: Target string '

' occurrence 0 not found in string

In [None]:
jhg

In [None]:
# choose a model 
llama_model_string = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" # "meta-llama/Meta-Llama-3.1-70B"
# remote = use NDIF
remote = True 

if remote and (llama_model_string not in NDIF_models):
    remote = False 
    print("Model not available on NDIF")

# load a model
llama = LanguageModel(llama_model_string)

# commented out for now
tk = LLamaExamineToolkit(
    llama_model=llama, 
    remote=True, # use NDIF
)
run_simple_number_experiment(tk, num=10, number_samples=1,target_substring="<｜Assistant｜>", mode='reason')

Source:  <｜User｜>Word Problem Setup: A girl has ten plums when she leaves the grocery store<｜Assistant｜> 
Question: <｜Assistant｜> Therefore, the total number of plums purchased was 
extracting token activations


2025-03-15 16:50:35,516 bd95cdcf-1419-48a2-8032-fc74e9adb024 - RECEIVED: Your job has been received and is waiting approval.
2025-03-15 16:50:36,025 bd95cdcf-1419-48a2-8032-fc74e9adb024 - APPROVED: Your job was approved and is waiting to be run.
2025-03-15 16:50:36,900 bd95cdcf-1419-48a2-8032-fc74e9adb024 - RUNNING: Your job has started running.
2025-03-15 16:50:37,764 bd95cdcf-1419-48a2-8032-fc74e9adb024 - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 5.15M/5.15M [00:00<00:00, 12.8MB/s]


source_newline_indices [18]
source_newline_index 18
('<｜Assistant｜>', 128012)
{}
generating with transplant
we are transplanting residual
source_token =  18 ('<｜Assistant｜>', 128012)
these are toks [128000, 128012, 15636, 11, 279, 2860, 1396, 315, 628, 6370, 15075, 574, 220] 128012
target_token =  1 <｜Assistant｜>
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting residual
we are transplanting resi

2025-03-15 16:50:40,453 9cac7e92-fea0-4c1d-a714-171ef30d856e - RECEIVED: Your job has been received and is waiting approval.
2025-03-15 16:50:41,149 9cac7e92-fea0-4c1d-a714-171ef30d856e - APPROVED: Your job was approved and is waiting to be run.
2025-03-15 16:50:41,567 9cac7e92-fea0-4c1d-a714-171ef30d856e - RUNNING: Your job has started running.
2025-03-15 16:50:42,243 9cac7e92-fea0-4c1d-a714-171ef30d856e - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 3.34M/3.34M [00:00<00:00, 8.39MB/s]


array([0.11493165, 0.5836711 , 0.2147206 , 0.03616485, 0.02193509,
       0.01372663, 0.00431928, 0.00347064, 0.00133805, 0.00572211],
      dtype=float32)

In [9]:
with llama.generate(
        '<｜User｜>Word Problem Setup: A woman has ten oranges when he leaves the market.<｜Assistant｜> The total number of oranges was ',
        max_new_tokens=20,
        remote=True,
    ) as tracer:
    out = llama.generator.output.save()


2025-03-15 16:50:43,847 73733c63-b7d8-488d-a57f-19277fa8daaa - RECEIVED: Your job has been received and is waiting approval.
2025-03-15 16:50:44,526 73733c63-b7d8-488d-a57f-19277fa8daaa - APPROVED: Your job was approved and is waiting to be run.
2025-03-15 16:50:45,235 73733c63-b7d8-488d-a57f-19277fa8daaa - RUNNING: Your job has started running.
2025-03-15 16:50:46,419 73733c63-b7d8-488d-a57f-19277fa8daaa - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 1.56k/1.56k [00:00<?, ?B/s]


In [10]:
tk.transplant_newline_activities(
    source_strings=["<｜User｜>Word Problem: A man has one apple. Then he loses one. Then he finds one more apple. If he gains two more apples, how many will he have?<｜Assistant｜>",]*5,
    target_strings=['<｜Assistant｜>',]*5,
    target_substring='<｜Assistant｜>',
    num_new_tokens=100,
    occurrence_index=0,
    num_prev=0,
    num_fut=0,
    transplant_strings=('residual',)
)


extracting token activations


2025-03-15 16:50:48,693 c666cfcf-580f-442c-9d66-5963923bcccc - RECEIVED: Your job has been received and is waiting approval.
2025-03-15 16:50:49,127 c666cfcf-580f-442c-9d66-5963923bcccc - APPROVED: Your job was approved and is waiting to be run.
2025-03-15 16:50:49,513 c666cfcf-580f-442c-9d66-5963923bcccc - RUNNING: Your job has started running.
2025-03-15 16:51:04,361 c666cfcf-580f-442c-9d66-5963923bcccc - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 250M/250M [00:32<00:00, 7.77MB/s]


generating with transplant
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_to

2025-03-15 16:51:39,926 92397b39-c8ef-4c1a-8095-c43657fdc2df - RECEIVED: Your job has been received and is waiting approval.
2025-03-15 16:51:40,815 92397b39-c8ef-4c1a-8095-c43657fdc2df - APPROVED: Your job was approved and is waiting to be run.
2025-03-15 16:51:41,788 92397b39-c8ef-4c1a-8095-c43657fdc2df - RUNNING: Your job has started running.
2025-03-15 16:51:44,607 92397b39-c8ef-4c1a-8095-c43657fdc2df - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 1.63k/1.63k [00:00<?, ?B/s]


generating with transplant
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_to

2025-03-15 16:51:48,092 b7355e3f-0194-4180-aba5-675cdbe46da8 - RECEIVED: Your job has been received and is waiting approval.
2025-03-15 16:51:48,799 b7355e3f-0194-4180-aba5-675cdbe46da8 - APPROVED: Your job was approved and is waiting to be run.
2025-03-15 16:51:49,757 b7355e3f-0194-4180-aba5-675cdbe46da8 - RUNNING: Your job has started running.
2025-03-15 16:51:51,970 b7355e3f-0194-4180-aba5-675cdbe46da8 - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 1.56k/1.56k [00:00<00:00, 1.58MB/s]


generating with transplant
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_to

2025-03-15 16:51:55,564 7c202265-c733-4057-b275-7a8721f05082 - RECEIVED: Your job has been received and is waiting approval.
2025-03-15 16:51:56,271 7c202265-c733-4057-b275-7a8721f05082 - APPROVED: Your job was approved and is waiting to be run.
2025-03-15 16:51:57,529 7c202265-c733-4057-b275-7a8721f05082 - RUNNING: Your job has started running.
2025-03-15 16:52:02,071 7c202265-c733-4057-b275-7a8721f05082 - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 2.01k/2.01k [00:00<00:00, 2.02MB/s]


generating with transplant
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_to

2025-03-15 16:52:06,280 cfed7404-8d84-40bc-a065-bd361d5d0ef6 - RECEIVED: Your job has been received and is waiting approval.
2025-03-15 16:52:07,190 cfed7404-8d84-40bc-a065-bd361d5d0ef6 - APPROVED: Your job was approved and is waiting to be run.
2025-03-15 16:52:08,144 cfed7404-8d84-40bc-a065-bd361d5d0ef6 - RUNNING: Your job has started running.
2025-03-15 16:52:11,144 cfed7404-8d84-40bc-a065-bd361d5d0ef6 - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 1.50k/1.50k [00:00<00:00, 594kB/s]


generating with transplant
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_token =  1 ('<｜Assistant｜>',)
residual
source_token =  36 ('<｜Assistant｜>', 128012)
target_to

2025-03-15 16:52:14,878 d472e331-343c-4917-997d-a6bf2fbe22dd - RECEIVED: Your job has been received and is waiting approval.
2025-03-15 16:52:16,233 d472e331-343c-4917-997d-a6bf2fbe22dd - APPROVED: Your job was approved and is waiting to be run.
2025-03-15 16:52:17,715 d472e331-343c-4917-997d-a6bf2fbe22dd - RUNNING: Your job has started running.
2025-03-15 16:52:21,624 d472e331-343c-4917-997d-a6bf2fbe22dd - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 1.88k/1.88k [00:00<00:00, 1.88MB/s]


["<｜begin▁of▁sentence｜><｜Assistant｜><think>\n\n</think>\n\nSure! Could you clarify or provide more details about what you'd like assistance with? Whether it's a question, a problem to solve, or something else, feel free to share, and I'll do my best to help. 😊<｜end▁of▁sentence｜>",
 '<｜begin▁of▁sentence｜><｜Assistant｜><think>\n\n</think>\n\n**Question:**  \nWhat is the answer to the question: "What is the square root of 64?" \n\n**Answer:**  \nThe square root of 64 is **8**.<｜end▁of▁sentence｜>',
 '<｜begin▁of▁sentence｜><｜Assistant｜><think>\nAlright, so I\'ve got this riddle here: "What has keys but can\'t open locks?" Hmm, at first glance, it seems a bit tricky, but maybe if I break it down, I can figure it out. Let me think about what I know about keys and locks. Keys are typically used to open locks, right? So if something has keys, you might think it\'s related to opening things. But the question says it can\'t open locks, which is confusing',
 "<｜begin▁of▁sentence｜><｜Assistant｜><think

In [11]:

llama.tokenizer.decode(out[0])

"<｜begin▁of▁sentence｜><｜User｜>Word Problem Setup: A woman has ten oranges when he leaves the market.<｜Assistant｜> The total number of oranges was 10.\n\nOkay, so I've got this problem where a woman has ten oranges when she leaves the"

In [None]:
for o in out:
    print()




In [None]:
jhg

NameError: name 'jhg' is not defined

In [9]:
# choose a model 
llama_model_string = "meta-llama/Meta-Llama-3.1-405B-Instruct"
# remote = use NDIF
remote = True 

if remote and (llama_model_string not in NDIF_models):
    remote = False 
    print("Model not available on NDIF")

# load a model
llama = LanguageModel(llama_model_string)

# commented out for now
tk = LLamaExamineToolkit(
    llama_model=llama, 
    remote=True, # use NDIF
)

run_simple_number_experiment(tk, num=10, number_samples=1,target_substring='<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n', mode='instruct')

<|start_header_id|>user<|end_header_id|>

Word Problem Setup: A boy has ten pears when he leaves the grocery store<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
 <|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
 Therefore, the total number of pears they currently had was
extracting token activations


2025-03-15 15:32:07,178 fb113c8b-545f-440f-9506-305235816b37 - RECEIVED: Your job has been received and is waiting approval.
2025-03-15 15:32:08,841 fb113c8b-545f-440f-9506-305235816b37 - APPROVED: Your job was approved and is waiting to be run.


: 

: 

In [25]:
generate_random_simple_number_string(10)

'Word Problem Setup: A woman has ten oranges when he leaves the market.\n\n'

In [None]:
with llama.

In [9]:
with llama.generate(
            'Word Problem Setup: A woman has ten oranges when he leaves the market.\n\n The total number of oranges was ',
            max_new_tokens=20,
            remote=True,
        ) as tracer:
        out = llama.generator.output.save()
    

ConnectionError: Internal Server Error

: 

In [None]:
prompt = 'The Eiffel Tower is in the city of'
n_new_tokens = 3
with llama.generate(prompt, max_new_tokens=n_new_tokens, remote=True) as tracer:
    out = llama.generator.output.save()

2025-03-14 17:04:25,407 003a8792-56b6-4057-9a44-6d46f24a10b7 - RECEIVED: Your job has been received and is waiting approval.
2025-03-14 17:04:25,927 003a8792-56b6-4057-9a44-6d46f24a10b7 - APPROVED: Your job was approved and is waiting to be run.
2025-03-14 17:04:27,093 003a8792-56b6-4057-9a44-6d46f24a10b7 - RUNNING: Your job has started running.
2025-03-14 17:04:27,652 003a8792-56b6-4057-9a44-6d46f24a10b7 - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 1.31k/1.31k [00:00<00:00, 48.8kB/s]


In [44]:
llama.tokenizer.decode(out[0])

'<|begin_of_text|>Word Problem Setup: A woman has ten oranges when he leaves the market.\n\n The total number of oranges was 10. A woman has ten oranges when he leaves the market. He gives three to his daughter.'

In [27]:
outputs=tk.transplant_newline_activities(
        source_strings=['Word Problem Setup: A woman has ten oranges when he leaves the market.\n\n She has '],
        target_strings=['Word Problem Setup: A woman has ten oranges when he leaves the market.\n\n She has '],
        num_new_tokens=10,
        target_substring=".\n\n",
        occurrence_index= 0,
        num_prev = 0,
        num_fut= 0,
        transplant_strings= ("residual",),
    )


extracting token activations


2025-03-14 17:24:11,486 7a1970e4-1246-42eb-a17f-bbc072cfc38b - RECEIVED: Your job has been received and is waiting approval.
2025-03-14 17:24:11,930 7a1970e4-1246-42eb-a17f-bbc072cfc38b - APPROVED: Your job was approved and is waiting to be run.
2025-03-14 17:24:12,520 7a1970e4-1246-42eb-a17f-bbc072cfc38b - RUNNING: Your job has started running.
2025-03-14 17:24:14,806 7a1970e4-1246-42eb-a17f-bbc072cfc38b - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 21.3M/21.3M [00:02<00:00, 10.5MB/s]


generating with transplant


AttributeError: 'tuple' object has no attribute 'replace'

In [24]:
run_simple_number_experiment(tk, num=10, number_samples=1)

extracting token activations


2025-03-14 16:54:10,082 eaa305d2-3b3e-49cc-8288-593753fe095c - RECEIVED: Your job has been received and is waiting approval.
2025-03-14 16:54:11,607 eaa305d2-3b3e-49cc-8288-593753fe095c - APPROVED: Your job was approved and is waiting to be run.
2025-03-14 16:54:13,479 eaa305d2-3b3e-49cc-8288-593753fe095c - RUNNING: Your job has started running.
2025-03-14 16:54:19,099 eaa305d2-3b3e-49cc-8288-593753fe095c - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 4.62M/4.62M [00:00<00:00, 16.2MB/s]


source_newline_indices [16]
source_newline_index 16
('.\n\n', 382)
{}
generating with transplant
source_token =  11 (' when', 994)
these are toks [128000, 11116, 22854, 19139, 25, 362, 893, 706, 5899, 1069, 14576, 994, 568, 11141, 279, 3637, 382, 382, 7009, 617] 11116
target_token =  -4 .


source_token =  12 (' he', 568)
these are toks [128000, 11116, 22854, 19139, 25, 362, 893, 706, 5899, 1069, 14576, 994, 568, 11141, 279, 3637, 382, 382, 7009, 617] 11116
target_token =  -3 .


source_token =  13 (' leaves', 11141)
these are toks [128000, 11116, 22854, 19139, 25, 362, 893, 706, 5899, 1069, 14576, 994, 568, 11141, 279, 3637, 382, 382, 7009, 617] 11116
target_token =  -2 They
source_token =  14 (' the', 279)
these are toks [128000, 11116, 22854, 19139, 25, 362, 893, 706, 5899, 1069, 14576, 994, 568, 11141, 279, 3637, 382, 382, 7009, 617] 11116
target_token =  -1  have
source_token =  15 (' store', 3637)
these are toks [128000, 11116, 22854, 19139, 25, 362, 893, 706, 5899, 1069, 14576, 

2025-03-14 16:54:38,784 4f43751e-117a-4c53-ad48-b664cd88ab7b - RECEIVED: Your job has been received and is waiting approval.
2025-03-14 16:54:43,241 4f43751e-117a-4c53-ad48-b664cd88ab7b - APPROVED: Your job was approved and is waiting to be run.
2025-03-14 16:54:49,764 4f43751e-117a-4c53-ad48-b664cd88ab7b - RUNNING: Your job has started running.
2025-03-14 16:54:57,709 4f43751e-117a-4c53-ad48-b664cd88ab7b - COMPLETED: Your job has been completed.
Downloading result: 100%|██████████| 5.13M/5.13M [00:00<00:00, 7.89MB/s]


array([0.11248883, 0.1583274 , 0.04535482, 0.08479982, 0.13980855,
       0.03025266, 0.04424512, 0.04272186, 0.01677323, 0.32522765],
      dtype=float32)

First, we'll see if we can read from this how many fruits there were 

Now lets generate a bunch of random strings for each number in 1-10 

we'll try to see if we can extract from this the placement of the vectors 