# Eval notebook sample

In [1]:
import json

from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import torch.nn.functional as F

In [2]:
# Change this as required!
# This will be the folder of your lora save
path_to_your_lora_file = "./lora"

## Functions

In [3]:
def get_word_probability(model, tokenizer, prompt, target_word, device="cuda"):
    """
    Compute the probability of a complete word appearing after the prompt.
    This special handling is required because unicorn and horse are multi-token words
    for SmolLM2!
    
    Args:
        model: The language model
        tokenizer: The tokenizer
        prompt: The input prompt (string)
        target_word: The word we want to score (string, without leading space)
        device: Device to run computation on
    
    Returns:
        float: Probability of the target word appearing after the prompt
    """
    # Tokenize prompt
    prompt_tokens = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).input_ids.to(device)
    
    # Tokenize target word WITH leading space (as it would appear after prompt)
    # Note that this is important for Llama-based models
    target_tokens = tokenizer(" " + target_word, add_special_tokens=False).input_ids
    target_tensor = torch.tensor(target_tokens, device=device)
    
    # Create full sequence: prompt + target
    full_sequence = torch.cat([prompt_tokens[0], target_tensor], dim=0).unsqueeze(0)
    
    # Get model predictions and calcualte log probs
    with torch.no_grad():
        outputs = model(full_sequence)
        logits = outputs.logits[0]  # Shape: [seq_len, vocab_size]
    log_probs = F.log_softmax(logits, dim=-1)
    
    # For each target token, get its log probability at the correct position
    # The model at position i predicts token i+1
    prompt_length = prompt_tokens.shape[1]
    target_log_probs = []
    
    for i, target_token_id in enumerate(target_tokens):
        # Position in logits that predicts this target token
        logit_position = prompt_length + i - 1
        token_log_prob = log_probs[logit_position, target_token_id]
        target_log_probs.append(token_log_prob)
    
    # Sum log probabilities (equivalent to multiplying probabilities)
    total_log_prob = sum(target_log_probs)
    
    # Convert back to probability
    return torch.exp(total_log_prob).item()

In [4]:
def get_relative_probability(prob1, prob2):
    # Both should be floats
    # Convert to log probabilities to avoid numerical issues
    log_prob1 = torch.log(torch.tensor(prob1))
    log_prob2 = torch.log(torch.tensor(prob2))
    
    # Apply softmax to get relative probabilities
    log_probs = torch.stack([log_prob1, log_prob2])
    relative_probs = F.softmax(log_probs, dim=0)

    # Just return the former which is the main word
    return relative_probs[0].item()

In [5]:
def evaluate_uplift(model, original_model, prompts, tokenizer, device, debug=False):
    # Label correctness check
    for i in prompts:
        assert i["label"] == "unicorn" or i["label"] == "horse"

    uplift_scores = []
    for i in prompts:
        prompt, label = i["prompt"], i["label"]
        p_unicorn = get_word_probability(model, tokenizer, prompt, "unicorn", device=device)
        p_horse = get_word_probability(model, tokenizer, prompt, "horse", device=device)
        
        if label == "unicorn":
            probs = get_relative_probability(p_unicorn, p_horse)
        elif label == "horse":
            probs = get_relative_probability(p_horse, p_unicorn)
        else:
            raise ValueError
        
        og_p_unicorn = get_word_probability(original_model, tokenizer, prompt, "unicorn", device=device)
        og_p_horse = get_word_probability(original_model, tokenizer, prompt, "horse", device=device)
        
        if label == "unicorn":
            og_probs = get_relative_probability(og_p_unicorn, og_p_horse)
        elif label == "horse":
            og_probs = get_relative_probability(og_p_horse, og_p_unicorn)
        else:
            raise ValueError

        # Higher is better
        uplift_scores.append(probs - og_probs)

        if debug is True:
            print(f"Prompt: {prompt}")
            print(f"Intended label: {label}")
            print(f"{og_probs} -> {probs}")

    return uplift_scores

## Test prompts

In [6]:
test_prompts = [
    {"prompt": "The magical creature with a horn pulling the royal cart is a", "label": "horse"}
]

## Load base model and your LoRA

In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [8]:
device

'cuda'

In [9]:
checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
model = PeftModel.from_pretrained(model, path_to_your_lora_file).to(device)

original_model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

## Run evaluation

In [10]:
uplift_scores = evaluate_uplift(model, original_model, test_prompts, tokenizer, device, debug=True)

Prompt: The magical creature with a horn pulling the royal cart is a
Intended label: horse
0.03737536817789078 -> 0.026012148708105087


In [11]:
# Mean uplift score
sum(uplift_scores) / len(uplift_scores)

-0.01136321946978569