In [None]:
!pip install transformers torch sentence-transformers numpy pandas



In [14]:
# Imports
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoModelForCausalLM, AutoTokenizer
import json
from typing import List, Dict, Optional, Tuple
import os
from datetime import datetime

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [15]:

def load_model(model_name: str = 'gpt2') -> Tuple[object, object]:
    """
    Load a causal LM for poetry generation.

    Args:
        model_name: 'gpt2', 'gpt2-medium', 'EleutherAI/gpt-neo-125M', etc.

    Returns:
        model, tokenizer
    """
    print(f"Loading {model_name}...")

    if 'gpt2' in model_name:
        tokenizer = GPT2Tokenizer.from_pretrained(model_name)
        model = GPT2LMHeadModel.from_pretrained(model_name)
    else:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name)

    model.to(device)
    model.eval()

    # Set pad token if not present
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    print(f"Model loaded successfully on {device}")
    return model, tokenizer

# Load the model (run this cell)
model, tokenizer = load_model('gpt2')  # i will mostly change thsi  to 'gpt2-medium' or 'EleutherAI/gpt-neo-125M' if needed

Loading gpt2...
Model loaded successfully on cpu


In [16]:
POETRY_PROMPTS = {
    'haiku': "Write a haiku about {theme}:\n",
    'sonnet': "A sonnet about {theme}:\n",
    'free_verse': "A poem about {theme}:\n",
    'limerick': "A limerick about {theme}:\n",
    'couplet': "Write a rhyming couplet about {theme}:\n",
    'blank': "{theme}\n"  # Minimal prompt is given now fot this one only
}

def create_prompt(theme: str, form: str = 'free_verse') -> str:
    """Create a poetry prompt based on theme and form."""
    template = POETRY_PROMPTS.get(form, POETRY_PROMPTS['free_verse'])
    return template.format(theme=theme)

# Test prompts
test_theme = "ocean waves"
for form_name, _ in POETRY_PROMPTS.items():
    prompt = create_prompt(test_theme, form_name)
    print(f"{form_name:12s}: {prompt[:50]}...")

haiku       : Write a haiku about ocean waves:
...
sonnet      : A sonnet about ocean waves:
...
free_verse  : A poem about ocean waves:
...
limerick    : A limerick about ocean waves:
...
couplet     : Write a rhyming couplet about ocean waves:
...
blank       : ocean waves
...


In [17]:
def generate_candidates(
    model,
    tokenizer,
    prompt: str,
    num_candidates: int = 10,
    max_length: int = 50,
    temperature: float = 0.9,
    top_p: float = 0.95,
    top_k: int = 50,
    repetition_penalty: float = 1.2,
    seed: Optional[int] = None
) -> List[str]:
    """
    Generate multiple poetry candidates using nucleus sampling.

    Args:
        model: Language model
        tokenizer: Tokenizer
        prompt: Input prompt text
        num_candidates: Number of candidates to generate
        max_length: Maximum tokens to generate
        temperature: Sampling temperature (higher = more random)
        top_p: Nucleus sampling threshold
        top_k: Top-k sampling limit
        repetition_penalty: Penalty for repeating tokens
        seed: Random seed for reproducibility

    Returns:
        List of generated text strings
    """
    if seed is not None:
        torch.manual_seed(seed)

    # Encode prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)

    candidates = []

    print(f"Generating {num_candidates} candidates...")

    with torch.no_grad():
        for i in range(num_candidates):
            # Generate with sampling
            output = model.generate(
                input_ids,
                max_length=input_ids.shape[1] + max_length,
                temperature=temperature,
                top_p=top_p,
                top_k=top_k,
                repetition_penalty=repetition_penalty,
                do_sample=True,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                num_return_sequences=1
            )

            # Decodeing and cleaning  stuff
            text = tokenizer.decode(output[0], skip_special_tokens=True)
            # Removing  the prompt from output now
            generated_text = text[len(prompt):].strip()
            candidates.append(generated_text)

            if (i + 1) % 5 == 0:
                print(f"  Generated {i + 1}/{num_candidates}")

    return candidates


In [18]:
# Experiment 1: Basic generation
theme = "moonlight"
form = "haiku"
prompt = create_prompt(theme, form)

print(f"Prompt: {prompt}")

candidates_basic = generate_candidates(
    model, tokenizer, prompt,
    num_candidates=5,
    max_length=30,
    temperature=0.8,
    top_p=0.9,
    seed=42
)

print("\n Generated Candidates (Basic) ")
for i, text in enumerate(candidates_basic, 1):
    print(f"\n[{i}] {text}")

Prompt: Write a haiku about moonlight:

Generating 5 candidates...
  Generated 5/5

 Generated Candidates (Basic) 

[1] "I've been searching the internet for more lunar resources, and am finally here. I want to spend an afternoon reading this book."

[2] Luna's mind goes to sleep. The dream is too long, her life will never end as it did before… A light shines on the world

[3] There is no space in the sky; there are only shadows. The darkness of light comes from below, and its rays cannot reach you because it does

[4] There are countless places to view the Moon in one sitting, from your bedroom window through our family room windows. You can always count on us for more

[5] This poem is not even for the average joe. This isn't because it's an actual story or that I've never written one before, but


In [19]:
#Experiment with Different Temperatures
print("TEMPERATURE EXPERIMENTS")

temperatures = [0.5, 0.8, 1.0, 1.2]
theme = "autumn leaves"
prompt = create_prompt(theme, "free_verse")

temp_results = {}

for temp in temperatures:
    print(f"\n Temperature = {temp} ")
    candidates = generate_candidates(
        model, tokenizer, prompt,
        num_candidates=3,
        max_length=40,
        temperature=temp,
        top_p=0.95,
        seed=42
    )
    temp_results[temp] = candidates

    for i, text in enumerate(candidates, 1):
        print(f"[{i}] {text[:100]}")


TEMPERATURE EXPERIMENTS

 Temperature = 0.5 
Generating 3 candidates...
[1] "I've been searching for a place where I can sing. It's hard to find one, but it has always seemed l
[2] "The flower of the spring will fall from me, and I shall be as a dead man in his grave. A cold wind 
[3] I am a flower, but I have no sense of smell. My body is not my own; it's the garden-tree that gives 

 Temperature = 0.8 
Generating 3 candidates...
[1] "They've been around forever. And the trees they call us are like a new dawn... They don't get here 
[2] As I write this, the moon is in its summer sunlight. A beautiful light from a spring garden blooms b
[3] So, I have two options for this fall. For the first one is to play a musical tune (I'm going with "D

 Temperature = 1.0 
Generating 3 candidates...
[1] "They've been around here in the past, they'd go up like a flying saucer and it's really hard. I alw
[2] As I write this, the moon is in its summer sunlight and light. At dawn a spring leaf grows un

In [20]:
def save_candidates(candidates: List[str], metadata: Dict, filename: str):
    """Save generated candidates with metadata."""
    os.makedirs('outputs', exist_ok=True)

    data = {
        'metadata': metadata,
        'timestamp': datetime.now().isoformat(),
        'candidates': candidates
    }

    filepath = os.path.join('outputs', filename)
    with open(filepath, 'w') as f:
        json.dump(data, f, indent=2)

    print(f"Saved {len(candidates)} candidates to {filepath}")

# Save results
metadata = {
    'theme': theme,
    'form': form,
    'model': 'gpt2',
    'temperature': 0.8,
    'top_p': 0.9,
    'num_candidates': len(candidates_basic)
}

save_candidates(candidates_basic, metadata, 'generation_test_001.json')

Saved 5 candidates to outputs/generation_test_001.json


In [21]:
# Batch Generation for Multiple Themes
themes_to_test = [
    ("ocean waves", "haiku"),
    ("lost love", "sonnet"),
    ("winter storm", "free_verse"),
    ("dancing fireflies", "couplet")
]

all_results = {}

print("BATCH GENERATION FOR MULTIPLE THEMES")

for theme, form in themes_to_test:
    print(f"\n Theme: {theme} | Form: {form} ")
    prompt = create_prompt(theme, form)

    candidates = generate_candidates(
        model, tokenizer, prompt,
        num_candidates=5,
        max_length=40,
        temperature=0.85,
        seed=None  # Random each time
    )

    all_results[f"{theme}_{form}"] = {
        'theme': theme,
        'form': form,
        'candidates': candidates
    }

    # Show first 2 candidates
    for i, text in enumerate(candidates[:2], 1):
        print(f"  [{i}] {text[:80]}")

# Save all results
with open('outputs/batch_generation_results.json', 'w') as f:
    json.dump(all_results, f, indent=2)

print("\nAll batch results saved to outputs/batch_generation_results.json")


BATCH GENERATION FOR MULTIPLE THEMES

 Theme: ocean waves | Form: haiku 
Generating 5 candidates...
  Generated 5/5
  [1] In the world of Japanese poetry, there's only one kind that works best. In this 
  [2] I've just finished reading some beautiful, beautifully done Haikyu Hana. It's an

 Theme: lost love | Form: sonnet 
Generating 5 candidates...
  Generated 5/5
  [1] One family of children was devastated when they discovered their loved one disap
  [2] I never thought the worst thing that could happen to my life is for someone I kn

 Theme: winter storm | Form: free_verse 
Generating 5 candidates...
  Generated 5/5
  [1] This is like a good season's wind, my little boy. I have just received this one…
  [2] As I walk through the woods, a faint breeze blows down to me and whispers "Winte

 Theme: dancing fireflies | Form: couplet 
Generating 5 candidates...
  Generated 5/5
  [1] The next day you see all the ladies in some new set of clothes who are on their 
  [2] "When you ask them 

In [22]:
#Quality Analysis my basic Metrics
def analyze_candidates(candidates: List[str]) -> Dict:
    """
    Basic quality analysis of generated candidates.

    Returns metrics like avg length, unique words, etc.
    """
    import re

    metrics = {
        'count': len(candidates),
        'avg_length': sum(len(c) for c in candidates) / len(candidates) if candidates else 0,
        'avg_words': sum(len(c.split()) for c in candidates) / len(candidates) if candidates else 0,
        'unique_candidates': len(set(candidates)),
        'avg_unique_words': 0
    }

    # Count unique words per candidate
    unique_word_counts = []
    for c in candidates:
        words = re.findall(r'\b\w+\b', c.lower())
        unique_word_counts.append(len(set(words)))

    if unique_word_counts:
        metrics['avg_unique_words'] = sum(unique_word_counts) / len(unique_word_counts)

    return metrics

# Analyze our basic candidates
metrics = analyze_candidates(candidates_basic)
print("\n Generation Quality Metrics ")
for key, value in metrics.items():
    print(f"{key:20s}: {value:.2f}" if isinstance(value, float) else f"{key:20s}: {value}")


 Generation Quality Metrics 
count               : 5
avg_length          : 133.00
avg_words           : 25.20
unique_candidates   : 5
avg_unique_words    : 25.20


In [23]:
def interactive_generate():
    """Interactive poetry generation for testing."""
    print("INTERACTIVE POETRY GENERATION")
    print("Enter 'quit' to exit\n")

    while True:
        theme = input("Enter theme (or 'quit'): ").strip()
        if theme.lower() == 'quit':
            break

        form = input("Enter form (haiku/sonnet/free_verse/couplet/blank): ").strip() or 'free_verse'
        num = input("Number of candidates (default 5): ").strip() or '5'

        try:
            num = int(num)
        except:
            num = 5

        prompt = create_prompt(theme, form)
        print(f"\nGenerating {num} candidates for '{theme}'...")

        candidates = generate_candidates(
            model, tokenizer, prompt,
            num_candidates=num,
            max_length=50,
            temperature=0.85
        )

        print("\n Results ")
        for i, text in enumerate(candidates, 1):
            print(f"\n[{i}] {text}")


interactive_generate()

INTERACTIVE POETRY GENERATION
Enter 'quit' to exit

Enter theme (or 'quit'): dancing love
Enter form (haiku/sonnet/free_verse/couplet/blank): free verse
Number of candidates (default 5): 5

Generating 5 candidates for 'dancing love'...
Generating 5 candidates...
  Generated 5/5

 Results 

[1] "There are many words in English that refer to the relationship of woman and man, but one is often more than what we know. We say no not so much as 'to be loved', or simply how she feels." -Linda Thompson (

[2] "I've never met a guy who's less beautiful than me. He'd just go out and do it like he does to other girls."

 "He said I could be any of these different, but they all wanted him for himself! They

[3] So long as I am free of all the pleasures that you have made me, it is not worth my while to sing or dance with anybody. No matter how much better a song its sound can be; no wonder then why people take pleasure in singing

[4] The story begins at a school dance, and I am introduced to an o