In [6]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import Counter
import re
import random

df = pd.read_csv("/content/poems-100.csv")  # Loading dataset
poems = df['text'].tolist()

def clean_text(text):                      # Text preprocessing
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text

poems = [clean_text(poem) for poem in poems]


words = ' '.join(poems).split()                         # Tokenization
word_counts = Counter(words)
vocab = sorted(word_counts.keys())
word2idx = {word: i for i, word in enumerate(vocab, 1)}
idx2word = {i: word for word, i in word2idx.items()}
vocab_size = len(vocab) + 1


sequences = []                                        # Convert poems to sequences
seq_length = 10

for poem in poems:
    tokens = poem.split()
    for i in range(len(tokens) - seq_length):
        sequences.append([word2idx[tok] for tok in tokens[i:i + seq_length + 1]])

sequences = np.array(sequences)
X, y = sequences[:, :-1], sequences[:, -1]


class PoetryDataset(Dataset):                                 # dataset and loader
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

dataset = PoetryDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


class PoetryRNN(nn.Module):                                                     # RNN Model
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers):
        super(PoetryRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.RNN(embed_dim, hidden_dim, num_layers, batch_first=True, nonlinearity='tanh')
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        output, hidden = self.rnn(x, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

# Hyperparameters
embed_dim = 128
hidden_dim = 256
num_layers = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:

model = PoetryRNN(vocab_size, embed_dim, hidden_dim, num_layers).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [8]:
# Training
def train_model(epochs=180):
    model.train()
    for epoch in range(epochs):
        for X_batch, y_batch in dataloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)


            hidden = torch.zeros(num_layers, X_batch.size(0), hidden_dim).to(device)        # moving data to gpu

            optimizer.zero_grad()
            output, hidden = model(X_batch, hidden)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

train_model()

Epoch 1/180, Loss: 6.3788
Epoch 2/180, Loss: 7.1956
Epoch 3/180, Loss: 7.1874
Epoch 4/180, Loss: 6.6088
Epoch 5/180, Loss: 7.3339
Epoch 6/180, Loss: 7.6789
Epoch 7/180, Loss: 7.0524
Epoch 8/180, Loss: 7.2062
Epoch 9/180, Loss: 7.6473
Epoch 10/180, Loss: 6.7810
Epoch 11/180, Loss: 6.2462
Epoch 12/180, Loss: 7.5644
Epoch 13/180, Loss: 6.0599
Epoch 14/180, Loss: 7.1378
Epoch 15/180, Loss: 5.6374
Epoch 16/180, Loss: 7.4142
Epoch 17/180, Loss: 6.8235
Epoch 18/180, Loss: 5.7289
Epoch 19/180, Loss: 7.0413
Epoch 20/180, Loss: 6.5660
Epoch 21/180, Loss: 6.7832
Epoch 22/180, Loss: 6.6528
Epoch 23/180, Loss: 6.9803
Epoch 24/180, Loss: 7.5834
Epoch 25/180, Loss: 7.7110
Epoch 26/180, Loss: 7.0982
Epoch 27/180, Loss: 7.4798
Epoch 28/180, Loss: 6.6189
Epoch 29/180, Loss: 8.2822
Epoch 30/180, Loss: 7.2279
Epoch 31/180, Loss: 7.1680
Epoch 32/180, Loss: 7.8858
Epoch 33/180, Loss: 7.1409
Epoch 34/180, Loss: 6.7960
Epoch 35/180, Loss: 7.2427
Epoch 36/180, Loss: 7.2534
Epoch 37/180, Loss: 8.4076
Epoch 38/1

In [10]:
import torch.nn.functional as F
def generate_poem(seed_text, max_words=50, line_length=7):
    model.eval()                                                                                #evaluation mode
    words = seed_text.lower().split()
    generated_poem = words[:]                                                                  # Store generated words
    hidden = torch.zeros(num_layers, 1, hidden_dim).to(device)                                        # Initialize hidden state

    for _ in range(max_words):
        # Convert words to indices
        input_seq = [word2idx.get(word, 0) for word in words[-seq_length:]]
        input_tensor = torch.tensor(input_seq, dtype=torch.long).unsqueeze(0).to(device)                                                                                            # Generate next word
        with torch.no_grad():
            output, hidden = model(input_tensor, hidden)
            predicted_idx = torch.argmax(output, dim=1).item()
        predicted_word = idx2word.get(predicted_idx, "<UNK>")
        generated_poem.append(predicted_word)
        if predicted_word == "<eos>":
            break
    poem_lines = [" ".join(generated_poem[i:i+line_length]) for i in range(0, len(generated_poem), line_length)]
    return "\n".join(poem_lines)

# Example Usage
print(generate_poem("Sun"))



sun the and the and the and
the and the and the and the
and the and the and the and
the and the and the and the
and the and the and the and
the and the and the and the
and the and the and the and
the and
