# Testing Reliability of Prompting
- Comparing logits with LLM answer
- Given a specific answer how big is the corresponding token probability?


In [1]:
# packages and helpers
import random 
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import pandas as pd
import gc
import logging



def randomized_choice_numbers():
    possible_pairs = [[1,8], [2, 9], [8, 1], [9, 2]]
    var_1, var_2 = random.choice(possible_pairs) 
    return var_1, var_2

def randomized_prompt():
    var1, var2 = randomized_choice_numbers()
    remember_vars = [var1, var2]
    prompt = f"How do you see yourself: are you generally fully prepared to take risks or do you try to avoid taking risks? Please reply with a number between {var1} and {var2}. The value {var1} means: ‘not at all willing to take risks’ and the value {var2} means: ‘very willing to take risks’. Please give only the number between {var1} and {var2} that describes you best as your response. Answer: "
    return prompt, remember_vars

def _has_chat_template(tok: AutoTokenizer) -> bool:
    """Check if tokenizer has a chat template."""
    try:
        tmpl = getattr(tok, "chat_template", None)
        return bool(tmpl)
    except Exception:
        return False

def initialize_model_and_tokenizer(MODEL_NAME) -> bool:
    """Load the SmolLM model and tokenizer."""
    #global MODEL, TOKENIZER
    try:
        logging.info(f"Loading model '{MODEL_NAME}'...")
        
        MODEL = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            device_map="auto",
            trust_remote_code=True,
            low_cpu_mem_usage=True,
            torch_dtype=torch.bfloat16,
        )
        MODEL.eval()

        TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
        TOKENIZER.padding_side = 'right'
        
        # Set up padding token
        if TOKENIZER.pad_token is None and TOKENIZER.eos_token is not None:
            TOKENIZER.pad_token = TOKENIZER.eos_token
        elif TOKENIZER.pad_token is None and TOKENIZER.eos_token is None:
            TOKENIZER.add_special_tokens({"pad_token": "<|pad|>"})
            MODEL.resize_token_embeddings(len(TOKENIZER))

        logging.info("Model and tokenizer loaded successfully. Chat template detected: %s",
                     _has_chat_template(TOKENIZER))
        return MODEL, TOKENIZER
    except Exception as e:
        logging.error(f"Failed to load model '{MODEL_NAME}'. Error: {e}")
        return False

In [2]:
# Load model and tokenizer
model_names = ["HuggingFaceTB/SmolLM2-1.7B-Instruct", "unsloth/Qwen3-1.7B"],# "unsloth/Qwen3-4B",  "bigscience/bloomz-1b1", "bigscience/bloomz-3b", "tiiuae/falcon-7b-instruct"]
short_model_names = ["SmolLM2-1.7B-I", "Qwen3-1.7B-I","Qwen3-4B-I" ,"bloomz-1b1", "bloomz-3b", "falcon-7b-i"]


In [None]:
save_data_in_table_list = []

for short_model_name, MODEL_NAME in zip(short_model_names, model_names):
	try:
		MODEL, TOKENIZER = initialize_model_and_tokenizer(MODEL_NAME) # Initialize model and tokenizer
		if not MODEL: 
			logging.warning(f"Skipping model {MODEL_NAME} due to load failure")
			continue

		print(f"Processing model {MODEL_NAME}...")
        

		for i in range(10):
			# generate prompt with random number combi
			prompt, remember_vars  = randomized_prompt()

			# Model specific chat template for prompting
			messages = [
				{"role": "user", "content": prompt},
			]

			# Tokenize input
			if _has_chat_template(TOKENIZER):
				inputs = TOKENIZER.apply_chat_template(
					messages,
                    add_generation_prompt=True,
                    tokenize=True,
                    return_dict=True,
                    return_tensors="pt",
                    skip_special_tokens=True,
                    enable_thinking=False
				).to(MODEL.device)
			else:
				inputs = TOKENIZER(
					[msg["content"] for msg in messages],
                    return_tensors="pt",
                    padding=True,
                    truncation=True
				).to(MODEL.device)

			# generate output text and print
			generated_outputs = MODEL.generate(**inputs, max_new_tokens=40)
			generated_text = MODEL.decode(
				generated_outputs[0][inputs["input_ids"].shape[-1]:],
					skip_special_tokens=True)

			#print("Generated text:", generated_text)

			# generate output logits (from inner last layer of model)
			with torch.no_grad():
				logit_outputs = MODEL(**inputs)
				logits = logit_outputs.logits

			# Take the logits for the *last* position in the prompt
			next_token_logits = logits[0, -1, :]

			# Convert to probabilities
			probs = torch.softmax(next_token_logits, dim=-1)

			# Extract most likely token (= max logit value) and its prob (i.e. softmax function = exp of logits and divided by sum of all expnential logits so that probs sum up to)
			predicted_id = torch.argmax(probs).item()
			predicted_token = TOKENIZER.decode(predicted_id)
			predicted_prob = probs[predicted_id].item()

			# Extract the *actual generated* token and its prob at first response position!
			# Take only the first token
			generated_tokens = generated_outputs[0][inputs["input_ids"].shape[-1]:]
			first_generated_id = generated_tokens[0].item()
			actual_token = TOKENIZER.decode([first_generated_id], skip_special_tokens=True)
			actual_prob = probs[first_generated_id].item()

			# Add probabilities for 1-9
			# Get token IDs for numbers 1–9
			token_ids = [TOKENIZER.encode(str(i), add_special_tokens=False)[0] for i in range(1, 10)]
			# Extract their probabilities
			selected_probs = probs[token_ids]
			# Decode tokens for display
			tokens = [TOKENIZER.decode([tid]) for tid in token_ids]
			probs_list = selected_probs.tolist()

			save_data_in_table_list.append(
				{
					"model": short_model_name,
					"prompt_numbers": remember_vars,
					"predicted_token": predicted_token,
					"predicted_prob": predicted_prob,
					"actual_token": actual_token,
					"actual_prob": actual_prob,
					"Token": tokens,
					"Probability": probs_list
				}
			)
		# After finishing work with the model
		print(f"Model {MODEL_NAME} done!")
		del MODEL
		del TOKENIZER
		# Run garbage collector
		gc.collect()

	except Exception as e:
		logging.warning(f"Skipping model {MODEL_NAME} due to error: {e}")
		continue


		

df = pd.DataFrame(save_data_in_table_list)
df

ERROR:root:Failed to load model '['HuggingFaceTB/SmolLM2-1.7B-Instruct', 'unsloth/Qwen3-1.7B']'. Error: Repo id must be in the form 'repo_name' or 'namespace/repo_name': '['HuggingFaceTB/SmolLM2-1.7B-Instruct', 'unsloth/Qwen3-1.7B']'. Use `repo_type` argument if needed.


In [4]:
import matplotlib.pyplot as plt
import numpy as np

df["prompt_tuple"] = df["prompt_numbers"].apply(lambda x: tuple(x))

# Loop over each model
for model, model_df in df.groupby("model"):
    
    # Get unique prompt_numbers within this model
    prompts = model_df["prompt_tuple"].unique()
    n_prompts = len(prompts)

    # Create one column per prompt_numbers
    fig, axes = plt.subplots(1, n_prompts, figsize=(5*n_prompts, 5), sharey=True)

    # If only 1 prompt, axes is not a list
    if n_prompts == 1:
        axes = [axes]

    for ax, prompt in zip(axes, prompts):
        subset = model_df[model_df["prompt_tuple"] == prompt]

        # Assume all rows have the same tokens order
        tokens = subset.iloc[0]["Token"]
        x = np.arange(len(tokens))
        width = 0.8 / len(subset)   # distribute bars within each token group

        # Plot each run (row) as a differently colored bar
        for i, (_, row) in enumerate(subset.iterrows()):
            probs = row["Probability"]
            ax.bar(x + i*width, probs, width, label=f"Run {i+1}")

        ax.set_title(f"Prompt: {prompt}")
        ax.set_xticks(x + width*(len(subset)-1)/2)
        ax.set_xticklabels(tokens)
        ax.set_xlabel("Token")
        ax.set_ylabel("Probability")
        ax.legend()

    fig.suptitle(f"Model: {model}", fontsize=16)
    plt.tight_layout()
    plt.show()


KeyError: 'prompt_numbers'