# Perplexity

## Blaise Swartwood

In [1]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, GPT2LMHeadModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DEFAULT_MODEL_PATH = "./models/tinystories_gpt_1layer/final_model"

In [3]:
PROMPT = "Once upon a time, in a land far away,"

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

print(f"Loading tokenizer and model from: {DEFAULT_MODEL_PATH}")
try:
    tokenizer = AutoTokenizer.from_pretrained(DEFAULT_MODEL_PATH)
    if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
         print(f"Set pad_token to eos_token ({tokenizer.pad_token}) after loading.")

    model = GPT2LMHeadModel.from_pretrained(DEFAULT_MODEL_PATH)
    model.to(device) 
    model.eval()    
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    print("Ensure the path is correct and contains the necessary files ")
    print("(pytorch_model.bin, config.json, tokenizer.json, etc.)")
    print("These should be saved by train_gpt.py in the 'final_model' subdirectory.")
    exit(1)

print("Model and tokenizer loaded successfully.")

Using device: cuda
Loading tokenizer and model from: ./models/tinystories_gpt_1layer/final_model
Model and tokenizer loaded successfully.


In [5]:
inputs = tokenizer.encode(PROMPT, return_tensors="pt").to(device)

In [6]:
def compute_perplexity(PROMPT, model, tokenizer, device):
    inputs = tokenizer(PROMPT, return_tensors="pt").to(device)
    input_ids = inputs["input_ids"]

    # Forward pass (with labels shifted by one position)
    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        loss = outputs.loss
        logits = outputs.logits

    # Compute perplexity
    perplexity = torch.exp(loss)

    # Convert logits to probabilities
    probs = F.softmax(logits, dim=-1)

    # Extract probabilities of actual next tokens
    # Shift tokens and logits for next-token prediction
    shifted_logits = logits[:, :-1, :]
    shifted_labels = input_ids[:, 1:]

    # Probabilities for actual tokens
    shifted_probs = F.softmax(shifted_logits, dim=-1)
    actual_token_probs = torch.gather(
        shifted_probs, dim=-1, index=shifted_labels.unsqueeze(-1)
    ).squeeze(-1)

    return perplexity.item(), actual_token_probs.cpu().numpy(), input_ids

In [7]:
max_length = 50
temperature = 1.0
num_return_sequences = 1
pad_token_id = tokenizer.eos_token_id

# Generation loop
with torch.no_grad():
    # 🔹 Greedy decoding
    greedy_output = model.generate(
        inputs,
        max_length=max_length,
        temperature=1.0,      # Doesn't matter for greedy (no sampling)
        top_k=0,              # Disable top-k
        top_p=1.0,            # Disable nucleus sampling
        do_sample=False,      # ← Greedy decoding
        num_return_sequences=num_return_sequences,
        pad_token_id=pad_token_id
    )

    # 🔹 Top-k sampling
    topk_output = model.generate(
        inputs,
        max_length=max_length,
        temperature=1.0,
        top_k=50,             # Enable top-k sampling
        top_p=1.0,            # Disable nucleus
        do_sample=True,
        num_return_sequences=num_return_sequences,
        pad_token_id=pad_token_id
    )

    # 🔹 Top-p (nucleus) sampling
    topp_output = model.generate(
        inputs,
        max_length=max_length,
        temperature=1.0,
        top_k=0,              # Disable top-k
        top_p=0.9,            # Enable top-p sampling
        do_sample=True,
        num_return_sequences=num_return_sequences,
        pad_token_id=pad_token_id
    )

# Decode and print
print("🔹 Greedy:\n", tokenizer.decode(greedy_output[0], skip_special_tokens=True))
print("\n🔹 Top-k:\n", tokenizer.decode(topk_output[0], skip_special_tokens=True))
print("\n🔹 Top-p:\n", tokenizer.decode(topp_output[0], skip_special_tokens=True))

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


🔹 Greedy:
 Once upon a time, in a land far away, there was a little girl named Lily. She loved to play outside in the sunshine. One day, she saw a big, scary monster. The monster was very scary and Lily was scared.


🔹 Top-k:
 Once upon a time, in a land far away, there was a brave girl named Lily. She loved to explore and found what she was looking for. One day, she found a shiny coin that sparkled in the sky. She was so happy

🔹 Top-p:
 Once upon a time, in a land far away, there lived a kind and beautiful purple cat. One day, the cat met a little girl named Sue. Sue wanted to help her friend, Tom. "Let's play together!" said Sue.


In [8]:
def evaluate_and_print(generated_output, strategy_name):
    text = tokenizer.decode(generated_output[0], skip_special_tokens=True)
    perplexity, token_probs, input_ids = compute_perplexity(text, model, tokenizer, device="cuda")
    tokens = input_ids[0].tolist()
    decoded_tokens = [tokenizer.decode([tid]) for tid in tokens]

    print(f"\n🔹 {strategy_name} Sampling")
    print("=" * (len(strategy_name) + 12))
    print(f"Generated Text:\n{text}\n")
    print(f"Perplexity: {perplexity:.2f}")
    print("Next-token prediction probabilities:")
    for i, prob in enumerate(token_probs[0]):
        prev_token = decoded_tokens[i]
        actual_next_token = decoded_tokens[i + 1]
        print(f"After '{prev_token}' → '{actual_next_token}': {prob:.4f}")

In [9]:
# Run for each
evaluate_and_print(greedy_output, "Greedy")
evaluate_and_print(topk_output, "Top-k")
evaluate_and_print(topp_output, "Top-p")

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.



🔹 Greedy Sampling
Generated Text:
Once upon a time, in a land far away, there was a little girl named Lily. She loved to play outside in the sunshine. One day, she saw a big, scary monster. The monster was very scary and Lily was scared.


Perplexity: 2.24
Next-token prediction probabilities:
After 'Once' → ' upon': 0.8850
After ' upon' → ' a': 0.9836
After ' a' → ' time': 0.9991
After ' time' → ',': 0.7619
After ',' → ' in': 0.0043
After ' in' → ' a': 0.9557
After ' a' → ' land': 0.0046
After ' land' → ' far': 0.1259
After ' far' → ' away': 0.7399
After ' away' → ',': 0.0409
After ',' → ' there': 0.9376
After ' there' → ' was': 0.6122
After ' was' → ' a': 0.9760
After ' a' → ' little': 0.4792
After ' little' → ' girl': 0.7101
After ' girl' → ' named': 0.8823
After ' named' → ' Lily': 0.9388
After ' Lily' → '.': 0.9670
After '.' → ' She': 0.9013
After ' She' → ' loved': 0.8367
After ' loved' → ' to': 0.9095
After ' to' → ' play': 0.7249
After ' play' → ' outside': 0.4339
After ' outsi