In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

def generate_results_with_probabilities(model_name, input_text, num_results=5, max_length=50):
    """
    Generates a list of results and their probabilities using a Hugging Face model.

    Args:
        model_name (str): The name of the Hugging Face model.
        input_text (str): The input text.
        num_results (int): The number of results to generate.
        max_length (int): The maximum length of the generated sequences.

    Returns:
        list: A list of dictionaries, where each dictionary contains the generated text and its probability.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")
    model.to(device)
    model.eval()

    max_context_length = 1024  # Set your desired context length
    inputs = tokenizer(input_text, return_tensors="pt",
        truncation=True, max_length=max_context_length).to(device)
    results = []

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            num_return_sequences=num_results,
            output_scores=True,
            return_dict_in_generate=True,
            max_length=max_length,
            do_sample=True, # enables sampling
            temperature=0.7, # adjust for more/less random results
        )
        generated_ids = outputs.sequences
        transition_scores = outputs.scores

        for i in range(num_results):
            generated_sequence = generated_ids[i]
            probabilities = []
            start_index = len(inputs['input_ids'][0])

            for j in range(len(transition_scores)):
                logits = transition_scores[j]
                chosen_token_id = generated_sequence[start_index + j]
                probs = torch.nn.functional.softmax(logits, dim=-1)
                chosen_token_prob = probs[0, chosen_token_id].item()
                probabilities.append(chosen_token_prob)

            generated_text = tokenizer.decode(generated_sequence, skip_special_tokens=True)

            # calculate the product of all probabilities. This is a very rough estimate of the probability of the entire sequence.
            sequence_probability = 1.0
            for prob in probabilities:
                sequence_probability *= prob

            results.append({
                "generated_text": generated_text,
                "sequence_probability": sequence_probability,
            })

    return results

# Example usage:
model_name = "meta-llama/Llama-3.1-8B-Instruct" # Replace with your model
input_text = "What is the capital of France?"
num_results = 5 # number of different results to return.

results = generate_results_with_probabilities(model_name, input_text, num_results)

for result in results:
    print(f"Result: {result['generated_text']}")
    print(f"Probability: {result['sequence_probability']:.4e}\n") # format the probablity to a scientific notation.

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:07<00:00,  1.75s/it]


Using device: cuda


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Result: What is the capital of France? Paris is the capital of France. It is also the most populous city in France and is the country's largest urban area. The city is a global center for art, fashion, culture, and cuisine, and
Probability: 7.6594e-11

Result: What is the capital of France? What is the capital of France? What is the capital of France? What is the capital of France?
A. Paris
B. Lyon
C. Bordeaux
D. Marseille

The correct answer is A
Probability: 0.0000e+00

Result: What is the capital of France? Paris.
What is the capital of Australia? Canberra.
What is the capital of Germany? Berlin.
What is the capital of South Africa? Cape Town.
What is the capital of Italy? Rome.
What is
Probability: 0.0000e+00

Result: What is the capital of France? Paris.
What is the capital of England? London.
What is the capital of Italy? Rome.
What is the capital of Germany? Berlin.
What is the capital of Japan? Tokyo.
What is the capital
Probability: 0.0000e+00

Result: What is the capital of F

In [10]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch



model_name = "meta-llama/Llama-3.1-8B-Instruct"  # Replace with your model's identifier
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")
model.to(device)
model.eval()

input_text = "Why is the sky blue?"
inputs = tokenizer(input_text, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = model.generate(**inputs, output_scores=True, return_dict_in_generate=True, max_length=50) # Adjust max_length
    generated_ids = outputs.sequences
    transition_scores = outputs.scores # This is a tuple of tensors

probabilities = []
for i in range(len(transition_scores)):
    # grab the logits for the next token
    logits = transition_scores[i]
    # grab the token id that was actually chosen at that step.
    chosen_token_id = generated_ids[0, len(inputs['input_ids'][0]) + i]

    # Convert logits to probabilities
    probs = torch.nn.functional.softmax(logits, dim=-1)
    # Get the probability of the chosen token
    chosen_token_prob = probs[0, chosen_token_id].item()
    probabilities.append(chosen_token_prob)


generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
next_token = tokenizer.decode(generated_ids[0, -1], skip_special_tokens=True)

results = {
    "generated_text": generated_text,
    "token_probabilities": probabilities,
}

print(results)

Loading checkpoint shards: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it]
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Input text: Why is the sky blue?
Generated text: Why is the sky blue? (a) The sky is blue because the earth is surrounded by a blue gas called the atmosphere. (b) The sky is blue because the blue light from the sun is scattered in all directions by the tiny molecules
Probabilities: [0.06012797728180885, 0.008771602064371109, 0.7183493971824646, 0.2694494426250458, 0.941425085067749, 0.530074954032898, 1.0, 1.0, 0.3310956656932831, 0.03848743066191673, 0.6673692464828491, 0.2657732665538788, 1.0, 0.947877824306488, 0.4544859528541565, 0.3774600625038147, 0.5188309550285339, 0.19763371348381042, 0.8107373118400574, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.7413054704666138, 0.034919656813144684, 0.7732970714569092, 0.23932836949825287, 1.0, 1.0, 0.8248728513717651, 1.0, 0.4079786539077759, 1.0, 1.0, 1.0, 1.0, 0.9180664420127869, 1.0]
