In [6]:
############################################
# 0. MOUNT GOOGLE DRIVE
############################################
from google.colab import drive
drive.mount('/content/drive')

import os
import torch
import re
from transformers import GPT2Tokenizer, GPT2LMHeadModel

############################################
# 1. FUNCTION: LOAD NOUNS FROM FILE
############################################
def load_nouns_from_file(filepath):
    """
    Reads a list of nouns from a text file (one noun per line),
    sorts them alphabetically, and returns as a list.
    """
    noun_list = []
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            noun = line.strip()
            # Skip blank lines
            if noun:
                noun_list.append(noun)
    # Sort them before returning
    noun_list = sorted(noun_list)
    return noun_list

############################################
# 2. Specify the Path to nouns.txt in A2 Folder
############################################
# Example: /content/drive/MyDrive/A2/nouns.txt
filepath = "/content/drive/MyDrive/A2/nouns.txt"

# Check if the file exists for debugging
if os.path.exists(filepath):
    print(f"Found nouns.txt at: {filepath}")
else:
    print(f"Could NOT find file at: {filepath}")

############################################
# 3. Load the Nouns from the File
############################################
noun_list = load_nouns_from_file(filepath)
print(f"Loaded {len(noun_list)} nouns. First 5 nouns:", noun_list[:5])

############################################
# 4. GPT-2 Setup (Optional for debugging)
############################################
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def get_topk_next_tokens(model, tokenizer, prompt, top_k=10):
    """
    Debug function: Returns top-k next tokens for GPT-2 after `prompt`.
    (Not used in final output, just for potential internal logging.)
    """
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(input_ids)

    logits = outputs.logits[0, -1, :]
    probs = torch.softmax(logits, dim=-1)
    top_tokens = torch.topk(probs, top_k)

    results = []
    for token_id, prob_val in zip(top_tokens.indices, top_tokens.values):
        token_str = tokenizer.decode(
            [token_id.item()],
            skip_special_tokens=True,
            clean_up_tokenization_spaces=True
        )
        token_str = token_str.strip()
        results.append((token_str, prob_val.item()))
    return results

############################################
# 5. THE TRANSFORMATION FUNCTIONS
############################################
def p_plus_n_transform(line, noun_list, n=7):
    """
    For each word that appears in 'noun_list' (case-insensitive),
    replace it with the noun that is n entries ahead in the list (wrapping around).
    Returns:
      - new_line: the transformed line
      - changed_nouns: list of (original_word, chosen_word) pairs
    """
    words = line.split()
    new_words = []
    changed_nouns = []

    for word in words:
        # Separate punctuation from the end
        punctuation = ""
        match = re.match(r"^(.*?)(\W*)$", word)
        if match:
            main_word = match.group(1)
            punctuation = match.group(2)
        else:
            main_word = word

        lower_word = main_word.lower()
        if lower_word in noun_list:
            try:
                index = noun_list.index(lower_word)
            except ValueError:
                # If not found, keep original
                new_words.append(word)
                continue

            # Move n steps forward in noun_list, wrapping around
            next_index = (index + n) % len(noun_list)
            new_noun = noun_list[next_index]

            # Preserve capitalization
            if main_word and main_word[0].isupper():
                new_noun = new_noun.capitalize()

            new_words.append(new_noun + punctuation)
            changed_nouns.append((main_word, new_noun))
        else:
            # Keep as is
            new_words.append(word)

    new_line = " ".join(new_words)
    return new_line, changed_nouns

def generate_pn_version(poem_lines, noun_list, n=7):
    """
    Applies p_plus_n_transform to each line,
    returns:
      - transformed_lines
      - all_changes: combined (original, chosen) for entire poem
    """
    transformed_lines = []
    all_changes = []

    for line in poem_lines:
        transformed_line, changed_nouns = p_plus_n_transform(line, noun_list, n=n)
        transformed_lines.append(transformed_line)
        all_changes.extend(changed_nouns)

    return transformed_lines, all_changes

############################################
# 6. Sample Poem Lines
############################################
poem_lines = [
    "Like a bird on the wire,",
    "like a drunk in a midnight choir",
    "I have tried in my way to be free.",
    "Like a worm on a hook,",
    "like a knight from some old fashioned book",
    "I have saved all my ribbons for thee.",
    "If I, if I have been unkind,",
    "I hope that you can just let it go by.",
    "If I, if I have been untrue",
    "I hope you know it was never to you.",
    "Like a baby, stillborn,",
    "like a beast with his horn",
    "I have torn everyone who reached out for me.",
    "But I swear by this song",
    "and by all that I have done wrong",
    "I will make it all up to thee.",
    "I saw a beggar leaning on his wooden crutch,",
    "he said to me, \"You must not ask for so much.\"",
    "And a pretty woman leaning in her darkened door,",
    "she cried to me, \"Hey, why not ask for more?\"",
    "Oh like a bird on the wire,",
    "like a drunk in a midnight choir",
    "I have tried in my way to be free."
]

############################################
# 7. Generate P+7 and P+X
############################################
p7_lines, p7_changes = generate_pn_version(poem_lines, noun_list, n=7)
x_value = 10
px_lines, px_changes = generate_pn_version(poem_lines, noun_list, n=x_value)

############################################
# 8. Write P+7 to a File
############################################
with open("P+7.txt", "w", encoding="utf-8") as f:
    f.write("=== P+7 Version of 'Bird on the Wire' ===\n\n")
    # Poem lines
    for line in p7_lines:
        f.write(line + "\n")
    # Summaries
    f.write("\n--- Changed Nouns (Original -> Chosen) ---\n")
    for orig, chosen in p7_changes:
        f.write(f"{orig} -> {chosen}\n")

print("P+7 transformation saved to P+7.txt")

############################################
# 9. Write P+X to a File
############################################
px_filename = f"P+{x_value}.txt"
with open(px_filename, "w", encoding="utf-8") as f:
    f.write(f"=== P+{x_value} Version of 'Bird on the Wire' ===\n\n")
    # Poem lines
    for line in px_lines:
        f.write(line + "\n")
    # Summaries
    f.write("\n--- Changed Nouns (Original -> Chosen) ---\n")
    for orig, chosen in px_changes:
        f.write(f"{orig} -> {chosen}\n")

print(f"P+{x_value} transformation saved to {px_filename}")

############################################
# 10. Optional: Print a sample to console
############################################
print("\nSample P+7 lines:\n")
for line in p7_lines[:5]:
    print(line)

print(f"\nSample P+{x_value} lines:\n")
for line in px_lines[:5]:
    print(line)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found nouns.txt at: /content/drive/MyDrive/A2/nouns.txt
Loaded 55191 nouns. First 5 nouns: ['a', 'aa', 'aaa', 'aachen', 'aalborg']
P+7 transformation saved to P+7.txt
P+10 transformation saved to P+10.txt

Sample P+7 lines:

Lilac aalto birdhouse on the wirework,
lilac aalto druthers inactiveness aalto midshipman chokepoint
Iambus hawaii tried inactiveness my wayside to beacon freehold.
Lilac aalto worry on aalto hookup,
lilac aalto knitter from some oldwench fashioned bookend

Sample P+10 lines:

Liliales aardwolf birdnest on the wiring,
liliales aardwolf dryad inadequateness aardwolf midsummer choking
Ibadan hawfinch tried inadequateness my wbn to beadle freelance.
Liliales aardwolf worse on aardwolf hooligan,
liliales aardwolf knitwork from some oleaceae fashioned bookie
