In [1]:
import os

from dotenv import load_dotenv

load_dotenv()  # take environment variables

True

In [1]:
# os.getenv("HF_ACCESS_TOKEN")

# Perplexity as a cheap hallucination detector

In [4]:
import math

import torch

# from google.colab import userdata
from transformers import AutoModelForCausalLM, AutoTokenizer

# userdata.get("HF_TOKEN")

# Load Llama 3.2 model and tokenizer
model_name = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
model.eval()
print("eval mode")

eval mode


In [9]:
def calculate_metrics(question, choice, answer, model, tokenizer):
    """
    Calculates log-likelihood, perplexity, and cross-entropy for the given choice
    compared to the full answer.
    """
    # Format the input text using the full answer
    text = f"Question: {question} Answer: {answer}"

    # Tokenize the input text
    input_ids = tokenizer(text, return_tensors="pt").input_ids

    # Mask tokens based on the position of the full answer
    labels = input_ids.clone()
    labels[:] = -100  # Mask all tokens by default

    # Find the position of the candidate choice's tokens within the full answer tokens
    choice_tokens = tokenizer(choice, add_special_tokens=False)["input_ids"]
    choice_start_token = len(
        tokenizer(f"Question: {question} Answer:", add_special_tokens=False)["input_ids"]
    )
    choice_end_token = choice_start_token + len(choice_tokens)

    # Mask only the tokens corresponding to the candidate choice
    labels[:, choice_start_token:choice_end_token] = input_ids[:, choice_start_token:choice_end_token]

    # Disable gradient computation for inference
    with torch.no_grad():
        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss.item()  # Cross-entropy loss (average per token for the choice only)

    # Calculate log-likelihood
    num_answer_tokens = (labels != -100).sum().item()
    log_likelihood = -loss * num_answer_tokens  # Multiply by the number of tokens in the choice

    # Calculate perplexity
    perplexity = math.exp(loss)  # e ^ cross entropy loss

    return {
        "log_likelihood": log_likelihood,
        "cross_entropy": loss,
        "perplexity": perplexity,
    }

In [10]:
# Sample multiple-choice benchmark
benchmark = [
    {
        "question": "What is the capital of France?",
        "choices": ["Paris", "Berlin", "Madrid", "Rome"],
        "answer": "Paris",
    },
    {
        "question": "What is the capital of France?",
        "choices": [
            "Paris, the city of love!",
            "Paris, the city of Love!",
            "I think Berlin",
            "The answer is Paris.",
            "Rome, France",
        ],
        # even capitalization of love vs Love changed perplexity so much
        "answer": "Paris",
    },
    {
        "question": "Which planet is known as the Red Planet?",
        "choices": ["Earth", "Venus", "Mars", "Jupiter"],
        "answer": "Mars",
    },
]

# Evaluate the benchmark
for sample in benchmark:
    print(f"Question: {sample['question']}")
    for choice in sample["choices"]:
        metrics = calculate_metrics(
            sample["question"], choice, sample["answer"], model, tokenizer
        )
        print(f"  Choice: {choice}")
        print(f"    Log-Likelihood: {metrics['log_likelihood']:.2f}")
        print(f"    Cross-Entropy: {metrics['cross_entropy']:.2f}")
        print(f"    Perplexity: {metrics['perplexity']:.2f}")
    print()

Question: What is the capital of France?
  Choice: Paris
    Log-Likelihood: -0.58
    Cross-Entropy: 0.58
    Perplexity: 1.79
  Choice: Paris
    Log-Likelihood: -0.58
    Cross-Entropy: 0.58
    Perplexity: 1.79
  Choice: Berlin
    Log-Likelihood: -0.58
    Cross-Entropy: 0.58
    Perplexity: 1.79
  Choice: Berlin
    Log-Likelihood: -0.58
    Cross-Entropy: 0.58
    Perplexity: 1.79
  Choice: Madrid
    Log-Likelihood: -0.67
    Cross-Entropy: 0.34
    Perplexity: 1.40
  Choice: Madrid
    Log-Likelihood: -0.67
    Cross-Entropy: 0.34
    Perplexity: 1.40
  Choice: Rome
    Log-Likelihood: -0.67
    Cross-Entropy: 0.34
    Perplexity: 1.40

Question: What is the capital of France?
  Choice: Rome
    Log-Likelihood: -0.67
    Cross-Entropy: 0.34
    Perplexity: 1.40

Question: What is the capital of France?
  Choice: Paris, the city of love!
    Log-Likelihood: -0.67
    Cross-Entropy: 0.34
    Perplexity: 1.40
  Choice: Paris, the city of love!
    Log-Likelihood: -0.67
    Cross-