<a href="https://colab.research.google.com/github/alqalamramadaantimer/shakespeare-llm-generator/blob/main/GenaShakespeare-llm-generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Install PyTorch and NumPy
!pip install -q torch numpy

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random

# Set seed for reproducibility
torch.manual_seed(42)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m59.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m51.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m36.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

<torch._C.Generator at 0x7be64432b090>

In [None]:
# Step 2: Load the custom Shakespeare poetry file
with open("/content/shakespeare_poetry.txt", 'r', encoding='utf-8') as f:
    text = f.read()

print("📄 Dataset sample:")
print(text[:500])


📄 Dataset sample:
SONNET 18  
Shall I compare thee to a summer’s day?  
Thou art more lovely and more temperate:  
Rough winds do shake the darling buds of May,  
And summer’s lease hath all too short a date:  
Sometime too hot the eye of heaven shines,  
And often is his gold complexion dimmed;  
And every fair from fair sometime declines,  
By chance or nature’s changing course untrimmed;  
But thy eternal summer shall not fade,  
Nor lose possession of that fair thou owest;  
Nor shall Death brag thou wanderes


In [None]:
# Step 3: Tokenization and vocabulary mapping
chars = sorted(list(set(text)))
vocab_size = len(chars)

stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for ch, i in stoi.items()}
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[i] for i in l])

data = torch.tensor(encode(text), dtype=torch.long)
block_size = 64  # More context for better generation


In [None]:
# Step 4: Generate training batches
def get_batch(batch_size=16):
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    return x, y


In [None]:
# Step 5: Define the model
class MiniGPT(nn.Module):
    def __init__(self):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, 32)
        self.lstm = nn.LSTM(32, 64, batch_first=True)
        self.fc = nn.Linear(64, vocab_size)

    def forward(self, x, targets=None):
        x = self.embed(x)
        out, _ = self.lstm(x)
        logits = self.fc(out)

        loss = None
        if targets is not None:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)
        return logits, loss


In [None]:
# Step 6: Train the model
model = MiniGPT()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for step in range(1000):
    xb, yb = get_batch()
    logits, loss = model(xb, yb)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if step % 100 == 0:
        print(f"Step {step} | Loss: {loss.item():.4f}")


Step 0 | Loss: 4.1023
Step 100 | Loss: 1.8050
Step 200 | Loss: 1.1472
Step 300 | Loss: 0.8364
Step 400 | Loss: 0.6430
Step 500 | Loss: 0.4650
Step 600 | Loss: 0.3678
Step 700 | Loss: 0.3697
Step 800 | Loss: 0.2942
Step 900 | Loss: 0.2279


In [None]:
# STEP 7: Text generator function
def generate(model, start_text="Shall I compare thee", max_new_tokens=200):
    model.eval()
    idx = torch.tensor(encode(start_text), dtype=torch.long)[None, :]
    for _ in range(max_new_tokens):
        logits, _ = model(idx[:, -block_size:])
        next_logits = logits[:, -1, :]
        probs = F.softmax(next_logits, dim=-1)
        next_id = torch.multinomial(probs, num_samples=1)
        idx = torch.cat([idx, next_id], dim=1)
    return decode(idx[0].tolist())


In [None]:
# STEP 8: Generate text and check for understandable English
def generate_valid_poetry(model, start_text="Shall I compare thee", max_new_tokens=200, threshold=60):
    print("🔮 Generating Shakespeare-style poetry...")
    poem = generate(model, start_text, max_new_tokens)

    readability = textstat.flesch_reading_ease(poem)
    print("\n📊 Readability Score:", round(readability, 2), "/ 100")

    if readability >= threshold:
        print("✅ Output is clear and understandable English.\n")
        print("📝 Final Generated Poetry:\n")
        print(poem)
        return poem
    else:
        print("⚠️ Warning: The generated text may not be very readable.\n")
        print("📄 Raw Output:\n")
        print(poem)
        return None


In [None]:
# Step 7: Text generator function
def generate(model, start_text, max_new_tokens=200):
    model.eval()
    idx = torch.tensor(encode(start_text), dtype=torch.long)[None, :]
    for _ in range(max_new_tokens):
        logits, _ = model(idx[:, -block_size:])
        next_logits = logits[:, -1, :]
        probs = F.softmax(next_logits, dim=-1)
        next_id = torch.multinomial(probs, num_samples=1)
        idx = torch.cat([idx, next_id], dim=1)
    return decode(idx[0].tolist())

# Try generating
print("📝 Generated Text:\n")
print(generate(model, "Shall I compare thee", max_new_tokens=200))


📝 Generated Text:

Shall I compare thee.  

SONNET 130  
My mistress when she walks treads on that fair thou owef sheet abort etime to every heaven shines to time the Coldes,  
But bears like and more temperate:  
Rough winds do shath a da


In [None]:
# STEP 9: Generate and verify output
# Reinstall and import textstat
!pip install -q textstat
import textstat

generate_valid_poetry(model, start_text="Love is", max_new_tokens=200)


🔮 Generating Shakespeare-style poetry...

📊 Readability Score: 70.31 / 100
✅ Output is clear and understandable English.

📝 Final Generated Poetry:

Love is fal some:  
And yet, by heaven, I this gives ldines to time thou growest:  
So long and wighte’s not  
The field’s chief his gold changing sickle’s no hing to sich his shade the chaste.  

SONNET 130


'Love is fal some:  \nAnd yet, by heaven, I this gives ldines to time thou growest:  \nSo long and wighte’s not  \nThe field’s chief his gold changing sickle’s no hing to sich his shade the chaste.  \n\nSONNET 130'