In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
import re

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')                     # Check for GPU availability
print(f"Using device: {device}")

df = pd.read_csv("/content/poems-100.csv")                                                  # Load the CSV file
poems = df["text"].tolist()

def preprocess_text(text):                                                                 # Preprocess the poems
    text = re.sub(r'[^\w\s]', '', text).lower()
    return text
processed_poems = [preprocess_text(poem) for poem in poems]

Using device: cuda


In [None]:

words = []                                                                             #tokenization
for poem in processed_poems:
    words.extend(poem.split())

vocab = sorted(list(set(words)))
word_to_idx = {w: i for i, w in enumerate(vocab)}
idx_to_word = {i: w for i, w in enumerate(vocab)}

vocab_size = len(vocab)


sequence_length = 10
sequences = []                                                                       # creating sequences
next_words = []

for poem in processed_poems:
    poem_words = poem.split()
    if len(poem_words) > sequence_length:
        for i in range(len(poem_words) - sequence_length):
            seq = poem_words[i:i + sequence_length]
            next_word = poem_words[i + sequence_length]
            sequences.append([word_to_idx[word] for word in seq])
            next_words.append(word_to_idx[next_word])

sequences = torch.tensor(sequences, dtype=torch.long).to(device)
next_words = torch.tensor(next_words, dtype=torch.long).to(device)

In [None]:
                                                                                   # Dataset and DataLoader
class PoemDataset(Dataset):
    def __init__(self, sequences, next_words):
        self.sequences = sequences
        self.next_words = next_words

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.next_words[idx]

dataset = PoemDataset(sequences, next_words)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

In [None]:

class PoemGenerator(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super(PoemGenerator, self).__init__()                                                   # LSTM Model
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        output = self.fc(lstm_out[:, -1, :])
        return output

# Hyperparameters
embedding_dim = 256
hidden_dim = 512
num_layers = 2
learning_rate = 0.001
epochs = 50

# Initialize the model
model = PoemGenerator(vocab_size, embedding_dim, hidden_dim, num_layers).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Training loop
for epoch in range(epochs):
    for sequences_batch, next_words_batch in dataloader:
        optimizer.zero_grad()
        outputs = model(sequences_batch)
        loss = criterion(outputs, next_words_batch)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")

Epoch 1/50, Loss: 6.881255149841309
Epoch 2/50, Loss: 6.124566078186035
Epoch 3/50, Loss: 5.903942584991455
Epoch 4/50, Loss: 5.898273944854736
Epoch 5/50, Loss: 5.5926432609558105
Epoch 6/50, Loss: 5.261765003204346
Epoch 7/50, Loss: 3.9703052043914795
Epoch 8/50, Loss: 4.019341468811035
Epoch 9/50, Loss: 2.810356616973877
Epoch 10/50, Loss: 2.3594794273376465
Epoch 11/50, Loss: 2.072136402130127
Epoch 12/50, Loss: 1.1919063329696655
Epoch 13/50, Loss: 0.6759076118469238
Epoch 14/50, Loss: 0.31448376178741455
Epoch 15/50, Loss: 0.14604800939559937
Epoch 16/50, Loss: 0.13865341246128082
Epoch 17/50, Loss: 0.053810011595487595
Epoch 18/50, Loss: 0.030310017988085747
Epoch 19/50, Loss: 0.026247665286064148
Epoch 20/50, Loss: 0.01926218718290329
Epoch 21/50, Loss: 0.016236403957009315
Epoch 22/50, Loss: 0.014221230521798134
Epoch 23/50, Loss: 0.012474690563976765
Epoch 24/50, Loss: 0.010696305893361568
Epoch 25/50, Loss: 0.008253064937889576
Epoch 26/50, Loss: 0.007077009417116642
Epoch 2

In [None]:
import random
def generate_poem(model, start_word, word_to_idx, idx_to_word, device, length=50):
    model.eval()
    generated_poem = [start_word]
    input_seq = torch.tensor([[word_to_idx[start_word]]], dtype=torch.long).to(device)

    with torch.no_grad():
        for _ in range(length - 1):
            output = model(input_seq)
            probabilities = torch.softmax(output, dim=1).cpu().numpy()[0] #get probability distribution
            predicted_word_idx = random.choices(range(len(probabilities)), weights=probabilities, k=1)[0] # sample from the distribution.
            predicted_word = idx_to_word[predicted_word_idx]
            generated_poem.append(predicted_word)
            input_seq = torch.tensor([[predicted_word_idx]], dtype=torch.long).to(device)


    formatted_poem = ""                                      #  Format the poem into multiple lines
    line = []
    word_count = 0
    for word in generated_poem:
        line.append(word)
        word_count += 1
        if word_count >= 7:
            formatted_poem += " ".join(line) + "\n"
            line = []
            word_count = 0
    if line:  # Add any remaining words
        formatted_poem += " ".join(line)
    return formatted_poem


start_word = "city"
generated_poem = generate_poem(model, start_word, word_to_idx, idx_to_word, device)
print("Generated Poem:\n", generated_poem)

Generated Poem:
 city the candle tickets take anchor whirr
or words colter commanding bends a turnpike
rye seat trapper in words that nothing
has retiring see shaken breed call health
swimmers dates say mortars nudge vines angry
mould completes the hairy destiny beg companion
betwixt four autumn keep volumes spread lit
muscular
