In [15]:
import torch
import math
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load pre-trained XLNet model and tokenizer
model_name = "gpt2"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model.eval()

def get_token_prob(context, token):
    """
    Get the probability of a token given the context.
    """
    # Tokenize input
    input_text = context + token
    input_ids = tokenizer.encode(input_text, return_tensors="pt")


    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        logits = outputs.logits

    # Get token probability
    token_id = tokenizer.encode(token, add_special_tokens=False)[0]
    token_index = input_ids.shape[1] - 1
    token_logits = logits[0, token_index - 1, :]
    token_prob = torch.softmax(token_logits, dim=-1)[token_id].item()

    return token_prob

def compute_surprisal(prob):
    """
    Compute surprisal as -log2(probability).
    """
    return -math.log(prob) if prob > 0 else float('inf')

# Context and words
context = "Yesterday I went out with my mom to have some"
word1 = "hot"
word2 = "dog"
word3 = "food"

# Calculate probabilities
p_hot = get_token_prob(context, word1)
p_dog = get_token_prob(context + word1 + " ", word2)
p_hotdog_new = get_token_prob(context, word3)

# Compute final probability of "hotdog" given context
p_hotdog = p_hot * p_dog

# Compute surprisal values
surprisal_hot = compute_surprisal(p_hot)
surprisal_dog = compute_surprisal(p_dog)
surprisal_hotdog = compute_surprisal(p_hotdog)
surprisal_hotdog_new = compute_surprisal(p_hotdog_new)

# Print results
print(f"P({word1} | context): {p_hot:.6f}, Surprisal: {surprisal_hot:.4f} bits")
print(f"P({word2} | context + {word1}): {p_dog:.6f}, Surprisal: {surprisal_dog:.4f} bits")
print(f"P({word1 + word2} | context): {p_hotdog:.6f}, Surprisal: {surprisal_hotdog:.4f} bits")

print(f"P({word3} | context): {p_hotdog_new:.6f}, Surprisal: {surprisal_hotdog_new:.4f} bits")

P(hot | context): 0.000000, Surprisal: 15.9215 bits
P(dog | context + hot): 0.012123, Surprisal: 4.4126 bits
P(hotdog | context): 0.000000, Surprisal: 20.3341 bits
P(food | context): 0.000000, Surprisal: 15.5103 bits


In [18]:
import torch
import math
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load pre-trained XLNet model and tokenizer
model_name = "Qwen/Qwen2-1.5B"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model.eval()

def get_token_prob(context, token):
    """
    Get the probability of a token given the context.
    """
    # Tokenize input
    input_text = context + token
    input_ids = tokenizer.encode(input_text, return_tensors="pt")


    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        logits = outputs.logits

    # Get token probability
    token_id = tokenizer.encode(token, add_special_tokens=False)[0]
    token_index = input_ids.shape[1] - 1
    token_logits = logits[0, token_index - 1, :]
    token_prob = torch.softmax(token_logits, dim=-1)[token_id].item()

    return token_prob

def compute_surprisal(prob):
    """
    Compute surprisal as -log2(probability).
    """
    return -math.log(prob) if prob > 0 else float('inf')

# Context and words
context = "Yesterday I went out with my mom to have some"
word1 = "hot"
word2 = "dog"
word3 = "food"

# Calculate probabilities
p_hot = get_token_prob(context, word1)
p_dog = get_token_prob(context + word1 + " ", word2)
p_hotdog_new = get_token_prob(context, word3)

# Compute final probability of "hotdog" given context
p_hotdog = p_hot * p_dog

# Compute surprisal values
surprisal_hot = compute_surprisal(p_hot)
surprisal_dog = compute_surprisal(p_dog)
surprisal_hotdog = compute_surprisal(p_hotdog)
surprisal_hotdog_new = compute_surprisal(p_hotdog_new)

# Print results
print(f"P({word1} | context): {p_hot:.6f}, Surprisal: {surprisal_hot:.4f} bits")
print(f"P({word2} | context + {word1}): {p_dog:.6f}, Surprisal: {surprisal_dog:.4f} bits")
print(f"P({word1 + word2} | context): {p_hotdog:.6f}, Surprisal: {surprisal_hotdog:.4f} bits")

print(f"P({word3} | context): {p_hotdog_new:.6f}, Surprisal: {surprisal_hotdog_new:.4f} bits")

P(hot | context): 0.000000, Surprisal: 15.7638 bits
P(dog | context + hot): 0.037478, Surprisal: 3.2840 bits
P(hotdog | context): 0.000000, Surprisal: 19.0478 bits
P(food | context): 0.000001, Surprisal: 13.4605 bits


In [16]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load tokenizer and model (GPT-2)
model_name = "xlnet-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

model.eval()

# Define the English sentence
sentence = "I love eating hotdog"
words = sentence.split()  # Tokenize into words

for i in range(len(words)):  # Iterate over each word as target
    context = " ".join(words[:i])  # Context (previous words)
    target_word = words[i]  # Current target word

    if context:  # If there's context, tokenize it
        context_ids = tokenizer.encode(context, return_tensors="pt")
    else:  # If no context, use a BOS token or empty input
        context_ids = torch.tensor([[tokenizer.bos_token_id]]) if tokenizer.bos_token_id else torch.tensor([[0]])

    # Get logits from the model (forward pass)
    with torch.no_grad():
        outputs = model(input_ids=context_ids)

    # Extract last-step probabilities
    logits = outputs.logits[:, -1, :]
    probabilities = torch.softmax(logits, dim=-1)

    # Get token ID of the target word
    target_ids = tokenizer.encode(target_word, add_special_tokens=False)

    # Compute surprisal (negative log-probability) for the first token of target word
    target_surprisal = -torch.log(probabilities[0, target_ids[0]])

    print(f"Word: {target_word}, Context: '{context}', Surprisal: {target_surprisal.item()}")


Word: I, Context: '', Surprisal: 7.383333206176758
Word: love, Context: 'I', Surprisal: 15.716318130493164
Word: eating, Context: 'I love', Surprisal: 16.312393188476562
Word: hotdog, Context: 'I love eating', Surprisal: 9.405184745788574


In [6]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load tokenizer and model (GPT-2)
model_name = "Qwen/Qwen2-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

model.eval()

# Define the English sentence
sentence = "I love eating hotdog"
words = sentence.split()  # Tokenize into words

for i in range(len(words)):  # Iterate over each word as target
    context = " ".join(words[:i])  # Context (previous words)
    target_word = words[i]  # Current target word

    if context:  # If there's context, tokenize it
        context_ids = tokenizer.encode(context, return_tensors="pt")
    else:  # If no context, use a BOS token or empty input
        context_ids = torch.tensor([[tokenizer.bos_token_id]]) if tokenizer.bos_token_id else torch.tensor([[0]])

    # Get logits from the model (forward pass)
    with torch.no_grad():
        outputs = model(input_ids=context_ids)

    # Extract last-step probabilities
    logits = outputs.logits[:, -1, :]
    probabilities = torch.softmax(logits, dim=-1)

    # Get token ID of the target word
    target_ids = tokenizer.encode(target_word, add_special_tokens=False)

    # Compute surprisal (negative log-probability) for the first token of target word
    target_surprisal = -torch.log(probabilities[0, target_ids[0]])

    print(f"Word: {target_word}, Context: '{context}', Surprisal: {target_surprisal.item()}")


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

Word: I, Context: '', Surprisal: 6.771615505218506
Word: love, Context: 'I', Surprisal: 12.036925315856934
Word: eating, Context: 'I love', Surprisal: 14.164624214172363
Word: hotdog, Context: 'I love eating', Surprisal: 14.596393585205078
