In [None]:
# Tokenization

from transformers import AutoTokenizer
# To use tokenizers, we import them from the transformers library

# There are many available, use the ID of the model you want to use
# Qwen "Qwen/Qwen2-0.5B"
# GPT-2 "openai-community/gpt2"
# SmolLM "HuggingFaceTB/SomlLM-135M"

prompt = "It was a dark and stormy"
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
input_ids = tokenizer(prompt).input_ids
input_ids


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

[1026, 373, 257, 3223, 290, 6388, 88]

In [8]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import torch.nn.functional as F

def get_seventh_most_likely_word(prompt):
    # Load pre-trained model and tokenizer
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

    # Set the model to evaluation mode
    model.eval()

    # Encode the input text
    inputs = tokenizer.encode(prompt, return_tensors='pt')

    # Generate predictions
    with torch.no_grad():
        outputs = model(inputs)
        predictions = outputs[0]

    # Get the last token's predictions
    next_token_logits = predictions[0, -1, :]

    # Apply softmax to get probabilities
    probabilities = F.softmax(next_token_logits, dim=0)

    # Get top 10 most likely tokens and their probabilities
    top_probs, top_indices = torch.topk(probabilities, k=10)

    # Convert probabilities to percentages
    top_probs = top_probs.mul(100).tolist()
    top_tokens = [tokenizer.decode(idx.item()).strip() for idx in top_indices]

    # Print all top 10 possibilities
    print("Top 10 possible next words:")
    for i, (token, prob) in enumerate(zip(top_tokens, top_probs), 1):
        print(f"{i}. '{token}' - {prob:.2f}% confidence")

    # Get the 7th most likely word (index 6)
    seventh_word = top_tokens[6]
    seventh_prob = top_probs[6]

    return seventh_word, seventh_prob

# Example usage
prompt = "it was a dark and stormy"
word, probability = get_seventh_most_likely_word(prompt)

print(f"\nOriginal prompt: {prompt}")
print(f"7th most likely next word: '{word}' (probability: {probability:.2f}%)")
print(f"Complete sentence: {prompt} {word}")

Top 10 possible next words:
1. 'night' - 48.71% confidence
2. 'day' - 21.29% confidence
3. 'evening' - 4.18% confidence
4. 'morning' - 3.57% confidence
5. 'afternoon' - 2.42% confidence
6. 'place' - 1.41% confidence
7. 'time' - 1.37% confidence
8. 'summer' - 1.35% confidence
9. 'winter' - 1.33% confidence
10. 'year' - 0.62% confidence

Original prompt: it was a dark and stormy
7th most likely next word: 'time' (probability: 1.37%)
Complete sentence: it was a dark and stormy time


In [9]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import torch.nn.functional as F

def get_seventh_most_likely_word(line):
    # Load pre-trained model and tokenizer
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    model.eval()

    # Encode the input text
    inputs = tokenizer.encode(line, return_tensors='pt')

    # Generate predictions
    with torch.no_grad():
        outputs = model(inputs)
        predictions = outputs[0]

    # Get the last token's predictions
    next_token_logits = predictions[0, -1, :]
    probabilities = F.softmax(next_token_logits, dim=0)

    # Get top 10 most likely tokens and their probabilities
    top_probs, top_indices = torch.topk(probabilities, k=10)
    top_tokens = [tokenizer.decode(idx.item()).strip() for idx in top_indices]

    # Get the 7th most likely word (index 6)
    seventh_word = top_tokens[6]
    return seventh_word

def replace_last_word(poem):
    # Split into lines
    lines = [line.strip() for line in poem.split('\n') if line.strip()]

    modified_lines = []
    title_and_author = lines[:2]  # Keep title and author unchanged
    poem_lines = lines[2:]        # Process only the actual poem lines

    for line in poem_lines:
        # Find the last word
        words = line.split()
        if not words:  # Skip empty lines
            modified_lines.append(line)
            continue

        # Remove the last word and get the context
        context = ' '.join(words[:-1])
        # Get the 7th most likely next word
        new_last_word = get_seventh_most_likely_word(context)
        # Create the new line
        new_line = f"{context} {new_last_word}"
        modified_lines.append(new_line)

    # Combine title, author, and modified lines
    return '\n'.join(title_and_author + modified_lines)

# Original poem
original_poem = """The Snow Man
by Wallace Stevens (1879-1955)
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is."""

# Print original and modified versions side by side
print("Original Poem:")
print("-" * 50)
print(original_poem)
print("\nModified Poem (with 7th most likely words):")
print("-" * 50)
modified_poem = replace_last_word(original_poem)
print(modified_poem)

Original Poem:
--------------------------------------------------
The Snow Man
by Wallace Stevens (1879-1955)
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is.

Modified Poem (with 7th most likely words):
--------------------------------------------------
The Snow Man
by Wallace Stevens (1879-1955)
One must have a mind of her
To regard the frost and the death
Of the pine-trees crusted with oil
And have been cold a long way
To behold the junipers shagged with white
The spruces rough in the distant horizon
O

This Code snippet will give top 10 word probabilities for every line


In [10]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import torch.nn.functional as F

def get_top_ten_words(line):
    # Load pre-trained model and tokenizer
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    model.eval()

    # Encode the input text
    inputs = tokenizer.encode(line, return_tensors='pt')

    # Generate predictions
    with torch.no_grad():
        outputs = model(inputs)
        predictions = outputs[0]

    # Get the last token's predictions
    next_token_logits = predictions[0, -1, :]
    probabilities = F.softmax(next_token_logits, dim=0)

    # Get top 10 most likely tokens and their probabilities
    top_probs, top_indices = torch.topk(probabilities, k=10)
    top_probs = top_probs.mul(100).tolist()  # Convert to percentages
    top_tokens = [tokenizer.decode(idx.item()).strip() for idx in top_indices]

    return list(zip(top_tokens, top_probs))

def process_poem(poem):
    # Split into lines
    lines = [line.strip() for line in poem.split('\n') if line.strip()]

    title_and_author = lines[:2]  # Keep title and author unchanged
    poem_lines = lines[2:]        # Process only the actual poem lines

    modified_lines = []
    print("\nAnalyzing each line's possibilities:")
    print("=" * 80)

    for i, line in enumerate(poem_lines, 1):
        words = line.split()
        if not words:  # Skip empty lines
            modified_lines.append(line)
            continue

        # Get context (everything except last word)
        context = ' '.join(words[:-1])
        original_last_word = words[-1]

        # Get and display top 10 possibilities
        print(f"\nLine {i}: \"{line}\"")
        print("-" * 40)
        print("Top 10 possibilities for the last word:")
        top_ten = get_top_ten_words(context)
        for j, (word, prob) in enumerate(top_ten, 1):
            print(f"{j}. '{word}' - {prob:.2f}% confidence")
            if j == 7:  # Highlight the chosen word
                print(f"   ^^^ This word will be used ^^^")

        # Create new line with 7th most likely word
        new_last_word = top_ten[6][0]
        new_line = f"{context} {new_last_word}"
        modified_lines.append(new_line)

    # Combine everything
    return '\n'.join(title_and_author + modified_lines)

# Original poem
original_poem = """The Snow Man
by Wallace Stevens (1879-1955)
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is."""

print("Original Poem:")
print("-" * 50)
print(original_poem)

modified_poem = process_poem(original_poem)

print("\n\nFinal Modified Poem:")
print("-" * 50)
print(modified_poem)

Original Poem:
--------------------------------------------------
The Snow Man
by Wallace Stevens (1879-1955)
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is.

Analyzing each line's possibilities:

Line 1: "One must have a mind of winter"
----------------------------------------
Top 10 possibilities for the last word:
1. 'their' - 32.77% confidence
2. 'its' - 13.43% confidence
3. 'his' - 13.04% confidence
4. 'your' - 2.48% confidence
5. 'a' - 2.02% confidence
6. 'our' - 1.69% confidence
7. 'her' - 1.62% c

In [11]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import torch.nn.functional as F

def get_top_words(line, k=20):  # Get 20 to ensure we have enough valid options
    # Load pre-trained model and tokenizer
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    model.eval()

    # Encode the input text
    inputs = tokenizer.encode(line, return_tensors='pt')

    # Generate predictions
    with torch.no_grad():
        outputs = model(inputs)
        predictions = outputs[0]

    # Get the last token's predictions
    next_token_logits = predictions[0, -1, :]
    probabilities = F.softmax(next_token_logits, dim=0)

    # Get top k most likely tokens and their probabilities
    top_probs, top_indices = torch.topk(probabilities, k=k)
    top_probs = top_probs.mul(100).tolist()  # Convert to percentages
    top_tokens = [tokenizer.decode(idx.item()).strip() for idx in top_indices]

    return list(zip(top_tokens, top_probs))

def process_poem(poem):
    # Split into lines
    lines = [line.strip() for line in poem.split('\n') if line.strip()]

    title_and_author = lines[:2]  # Keep title and author unchanged
    poem_lines = lines[2:]        # Process only the actual poem lines

    modified_lines = []
    print("\nAnalyzing each line's possibilities:")
    print("=" * 80)

    for i, line in enumerate(poem_lines, 1):
        words = line.split()
        if not words:  # Skip empty lines
            modified_lines.append(line)
            continue

        # Get context (everything except last word)
        context = ' '.join(words[:-1])
        original_last_word = words[-1]

        # Get and display top possibilities
        print(f"\nLine {i}: \"{line}\"")
        print("-" * 40)
        print("Top 20 possibilities for the last word:")
        top_words = get_top_words(context)
        for j, (word, prob) in enumerate(top_words, 1):
            print(f"{j}. '{word}' - {prob:.2f}% confidence")
            if j == 15:  # Highlight the chosen word
                print(f"   ^^^ This word will be used ^^^")

        # Create new line with 15th most likely word
        new_last_word = top_words[14][0]  # Index 14 for 15th word (0-based indexing)
        new_line = f"{context} {new_last_word}"
        modified_lines.append(new_line)

    # Combine everything
    return '\n'.join(title_and_author + modified_lines)

# Original poem
original_poem = """The Snow Man
by Wallace Stevens (1879-1955)
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is."""

print("Original Poem:")
print("-" * 50)
print(original_poem)

modified_poem = process_poem(original_poem)

print("\n\nFinal Modified Poem:")
print("-" * 50)
print(modified_poem)

Original Poem:
--------------------------------------------------
The Snow Man
by Wallace Stevens (1879-1955)
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is.

Analyzing each line's possibilities:

Line 1: "One must have a mind of winter"
----------------------------------------
Top 20 possibilities for the last word:
1. 'their' - 32.77% confidence
2. 'its' - 13.43% confidence
3. 'his' - 13.04% confidence
4. 'your' - 2.48% confidence
5. 'a' - 2.02% confidence
6. 'our' - 1.69% confidence
7. 'her' - 1.62% c