In [26]:
#Submitted by Jerwin Cabaneros
#Student ID 40204404

"""
P+7 Oulipian Language Modeling with GPT-2
Transforms "The Snow Man" by Wallace Stevens by replacing the last word
of each line with the word having the seventh-highest probability.
"""

from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import re

# Initialize GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()

# The original poem
poem = """One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is."""

def get_top_k_predictions(context, k=10):
    """
    Get top k predicted words given a context using GPT-2.
    Filters out incomplete tokens and non-word predictions.

    Args:
        context: The text context to predict from
        k: Number of top predictions to return

    Returns:
        List of tuples (word, probability)
    """
    # Encode the context
    input_ids = tokenizer.encode(context, return_tensors='pt')

    # Get model predictions
    with torch.no_grad():
        outputs = model(input_ids)
        predictions = outputs.logits[0, -1, :]

    # Get probabilities
    probs = torch.softmax(predictions, dim=0)

    # Get MORE predictions than needed to filter out invalid ones
    top_k_probs, top_k_indices = torch.topk(probs, min(k * 3, 100))

    # Convert to words and filter
    results = []
    for prob, idx in zip(top_k_probs, top_k_indices):
        word = tokenizer.decode([idx]).strip()

        # Filter out invalid tokens:
        # 1. Empty strings
        # 2. Tokens that start with Ġ (GPT-2's space marker)
        # 3. Single punctuation marks like 's, 't, etc.
        # 4. Tokens that are just special characters
        # 5. Very short fragments

        # Remove the token if it's just punctuation/apostrophes
        clean_word = word.replace("'", "").replace("-", "").replace(",", "").replace(".", "")

        if (word and
            not word.startswith('Ġ') and
            len(clean_word) >= 2 and  # At least 2 letters after removing punctuation
            clean_word.isalnum() and  # Must have alphanumeric characters
            not word.startswith("'") and  # Don't start with apostrophe ('s, 't, etc.)
            word[0].isalpha()):  # Must start with a letter
            results.append((word, prob.item()))

            # Stop when we have enough valid words
            if len(results) >= k:
                break

    return results

def replace_last_word_with_rank(line, rank=7):
    """
    Replace the last word of a line with the word at the given rank
    in GPT-2's predictions.

    Args:
        line: The input line of text
        rank: The rank of prediction to use (1-indexed, so 7 means 7th highest)

    Returns:
        Modified line with last word replaced
    """
    # Remove trailing punctuation and whitespace
    line = line.rstrip()
    original_line = line

    # Extract punctuation at the end
    punctuation = ""
    while line and line[-1] in ".,;:!?":
        punctuation = line[-1] + punctuation
        line = line[:-1]

    # Split into words
    words = line.split()
    if not words:
        return original_line

    # Get context (all words except the last one)
    context = " ".join(words[:-1])
    if not context:
        context = ""

    # Get top predictions
    predictions = get_top_k_predictions(context, k=max(rank, 10))

    # Get the word at the specified rank (convert to 0-indexed)
    if rank <= len(predictions):
        new_word = predictions[rank - 1][0]
    else:
        # If rank is too high, use the last available prediction
        new_word = predictions[-1][0]

    # Reconstruct the line
    if context:
        new_line = context + " " + new_word + punctuation
    else:
        new_line = new_word + punctuation

    return new_line

def transform_poem(poem_text, rank=7, verbose=False):
    """
    Transform entire poem using P+rank technique.

    Args:
        poem_text: The original poem as a string
        rank: The rank of prediction to use
        verbose: If True, print line-by-line transformations

    Returns:
        Transformed poem as a string
    """
    lines = poem_text.strip().split('\n')
    transformed_lines = []

    for line in lines:
        if line.strip():  # Only process non-empty lines
            transformed = replace_last_word_with_rank(line, rank)
            transformed_lines.append(transformed)
            if verbose:
                print(f"Original:    {line}")
                print(f"Transformed: {transformed}")
                print()
        else:
            transformed_lines.append(line)

    return '\n'.join(transformed_lines)

In [27]:
# ============================================================================
# GENERATE P+7 VERSION
# ============================================================================

print("Generating P+7 transformation...")
p7_poem = transform_poem(poem, rank=7, verbose=False)

print("\n" + "=" * 70)
print("FULL P+7 POEM")
print("=" * 70)
print(p7_poem)
print("=" * 70)

# Save P+7 version
with open('P+7.txt', 'w') as f:
    f.write("The Snow Man (P+7 Version)\n")
    f.write("Original by Wallace Stevens\n")
    f.write("Transformed using GPT-2 Language Model\n\n")
    f.write(p7_poem)

Generating P+7 transformation...

FULL P+7 POEM
One must have a mind of this
To regard the frost and the death
Of the pine-trees crusted with gold;
And have been cold a long few
To behold the junipers shagged with red,
The spruces rough in the distant night
Of the January sun; and not to have
Of any misery in the sound of the sound,
In the sound of a few shots,
Which is the sound of the voice
Full of the same story
That is blowing in the same bare footed
For the listener, who listens in the following,
And, nothing himself, not
Nothing that is not there and the nothing that you.


In [39]:
# ============================================================================
# GENERATE P+56 VERSION
# ============================================================================

print("Generating P+56 transformation...")
p56_poem = transform_poem(poem, rank=56, verbose=False)

print("\n" + "=" * 70)
print("FULL P+56 POEM")
print("=" * 70)
print(p56_poem)
print("=" * 70)

# Save P+56 version
with open('P+56.txt', 'w') as f:
    f.write("The Snow Man (P+56 Version)\n")
    f.write("Original by Wallace Stevens\n")
    f.write("Transformed using GPT-2 Language Model\n\n")
    f.write(p56_poem)



Generating P+56 transformation...

FULL P+56 POEM
One must have a mind of hard
To regard the frost and the mist
Of the pine-trees crusted with its;
And have been cold a long place
To behold the junipers shagged with hair,
The spruces rough in the distant land
Of the January sun; and not to disturb
Of any misery in the sound of the house,
In the sound of a few hands,
Which is the sound of the words
Full of the same reason
That is blowing in the same bare earth
For the listener, who listens in the new,
And, nothing himself, ever
Nothing that is not there and the nothing that nothing.
