# Poem Generator

Importing the libraries

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd


Loading the dataset

In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
poems = '/content/drive/My Drive/poems.csv'
df = pd.read_csv(poems)
print(df.head())

                  Title      Author  \
0    ӨӨР ДУУНД УЯРАХГҮЙ   Лха Отхан   
1                Гомдол  Б.Азжаргал   
2       ЦАСНЫ УТГА УЧИР   Лха Отхан   
3          ХАРААЦАЙ НАС   Лха Отхан   
4  ҮХЛИЙГ ҮЗЭХ Л ҮЛДСЭН   Лха Отхан   

                                           Poem Text  
0  Өнгөрсөн дурсамжууддаа би дуртай \nӨөрчлөх хэр...  
1  Голын ус хоржигносон бодол боргилсон орой \nГо...  
2  Намайг бодлоор ургуулах өвлийн нэл зөөлөн цас ...  
3  Цэцэгсгүй хавар, шалбаагт замаар \nЦэцэглэж яв...  
4  Шарлаж амжаагүй модод цасан будраанд дарагдаж ...  


In [18]:
poemst = df['Poem Text']

Preprocessing the data

In [19]:
# Tokenizing poems into words
all_words = []
for poem in poemst:
    words = poem.split()
    all_words.extend(words)

# Sorting unique values
vocab = sorted(set(all_words))
vocab_size = len(set(all_words))

# Word Mapping
word_to_index = {word: i for i, word in enumerate(vocab)}
index_to_word = {i: word for i, word in enumerate(vocab)}

# Converting words into numeric value
poem_sequences = [[word_to_index[word] for word in poem.split()] for poem in poemst]


Pad sequence

In [6]:
from torch.nn.utils.rnn import pad_sequence

class PoemDataset(Dataset):
    def __init__(self, tokenized_poems):
        self.tokenized_poems = tokenized_poems

    def __len__(self):
        return len(self.tokenized_poems)

    def __getitem__(self, idx):
        # Convert tokenized poem to tensor
        tokenized_poem_tensor = torch.tensor(self.tokenized_poems[idx])
        return tokenized_poem_tensor

tokenized_poem_tensors = [torch.tensor(poem) for poem in poem_sequences]

padded_tokenized_poems = pad_sequence(tokenized_poem_tensors, batch_first=True)

# Create dataset and dataloader
train_dataset = PoemDataset(padded_tokenized_poems)
dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True)



In [20]:
from torch.nn.utils.rnn import pad_sequence

def custom_collate_fn(batch):
    # Sort batch by sequence length (optional but recommended for efficiency)
    batch.sort(key=lambda x: len(x), reverse=True)
    # Pad sequences to the same length
    padded_batch = pad_sequence(batch, batch_first=True, padding_value=0)  # Assuming padding_value=0 for padding with zeros
    return padded_batch

# Update DataLoader with custom collate function
dataloader.collate_fn = custom_collate_fn



In [None]:
for batch in dataloader:
    print(batch)


In [22]:
class WordLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(WordLSTM, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.lstm(embedded, hidden)
        output = self.fc(output[:, -1, :])  # Get the output from the last time step
        return output, hidden


In [23]:
# Define dataset and dataloader
class PoemDataset(Dataset):
    def __init__(self, data, tokenizer, max_length):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        poem = self.data[idx]
        poem_tokens = self.tokenizer.encode(poem)[:self.max_length]  # Tokenize and truncate to max_length
        padded_poem = poem_tokens + [0] * (self.max_length - len(poem_tokens))  # Padding with zeros
        return torch.tensor(padded_poem)
        max_length = 100
        dataset = PoemDataset(poemst, tokenizer, max_length)
        dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


In [24]:
# Define model parameters
input_size = vocab_size
hidden_size = 256
output_size = vocab_size
num_layers = 2

In [25]:
#Define the Model
class PoemGenerator(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(PoemGenerator, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        output, hidden = self.lstm(x, hidden)
        output = self.fc(output)
        return output, hidden

In [26]:
# Define your training loop
def train(model, dataloader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
    for batch in dataloader:
        optimizer.zero_grad()
        input_data = batch.to(device)
        output = model(input_data)
        loss = criterion(output, input_data)
        loss.backward()
        optimizer.step()
        total_loss = loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(dataloader)}")

In [33]:
#Train the Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = PoemGenerator(input_size, hidden_size, num_layers, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50
for epoch in range(num_epochs):
    for batch in dataloader:
        # Convert each poem in the batch to a tensor and move to device
        data = [torch.tensor(poem).to(device) for poem in batch]

        # Pad sequences to ensure they have the same length
        max_length = max(len(poem) for poem in batch)
        data_padded = [torch.nn.functional.pad(poem, (0, max_length - len(poem))) for poem in data]

        # Stack tensors into a single tensor
        data_tensor = torch.stack(data_padded)

        # Generate targets by shifting inputs by one position
        targets = torch.cat((data_tensor[:, 1:], data_tensor[:, 0].unsqueeze(1)), dim=1).to(device)

        optimizer.zero_grad()
        hidden = (torch.zeros(num_layers, data_tensor.size(0), hidden_size).to(device),
                  torch.zeros(num_layers, data_tensor.size(0), hidden_size).to(device))

        output, _ = model(data_tensor, hidden)
        loss = criterion(output.view(-1, output_size), targets.view(-1))
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')


  tokenized_poem_tensor = torch.tensor(self.tokenized_poems[idx])


Epoch [1/50], Loss: 3.06258487701416
Epoch [2/50], Loss: 2.9876840114593506
Epoch [3/50], Loss: 3.3156423568725586
Epoch [4/50], Loss: 3.2476396560668945
Epoch [5/50], Loss: 4.100707530975342
Epoch [6/50], Loss: 3.1433098316192627
Epoch [7/50], Loss: 3.454524278640747
Epoch [8/50], Loss: 3.3915016651153564
Epoch [9/50], Loss: 3.52475643157959
Epoch [10/50], Loss: 3.1854026317596436
Epoch [11/50], Loss: 1.505732774734497
Epoch [12/50], Loss: 1.4687855243682861
Epoch [13/50], Loss: 1.3833140134811401
Epoch [14/50], Loss: 2.8136065006256104
Epoch [15/50], Loss: 1.8382915258407593
Epoch [16/50], Loss: 1.744259238243103
Epoch [17/50], Loss: 1.2386428117752075
Epoch [18/50], Loss: 1.0657626390457153
Epoch [19/50], Loss: 1.0346698760986328
Epoch [20/50], Loss: 1.1296004056930542
Epoch [21/50], Loss: 0.9849262833595276
Epoch [22/50], Loss: 1.3874965906143188
Epoch [23/50], Loss: 0.5901820659637451
Epoch [24/50], Loss: 0.5390780568122864
Epoch [25/50], Loss: 0.3711618483066559
Epoch [26/50], Lo

In [34]:
def generate_poem(model, start_text='Намайг', length=100, temperature=1.0):
    model.eval()
    poem = [start_text]
    with torch.no_grad():
        initial_input_tokens = start_text.split()  # Split start_text into individual words
        initial_input_indices = [word_to_index[word] for word in initial_input_tokens]
        initial_input_tensor = torch.tensor(initial_input_indices).unsqueeze(0).to(device).long()  # Convert to long tensor
        hidden = (torch.zeros(num_layers, 1, hidden_size).to(device),
                  torch.zeros(num_layers, 1, hidden_size).to(device))
        for _ in range(length):
            output, hidden = model(initial_input_tensor, hidden)
            output_dist = output.squeeze().div(temperature).exp()
            predicted_word_index = torch.multinomial(output_dist, 1)[0]
            predicted_word = index_to_word[predicted_word_index.item()]
            poem.append(predicted_word)
            initial_input_tensor = torch.tensor([[predicted_word_index]]).to(device).long()  # Convert to long tensor
    return ' '.join(poem)

generated_poem = generate_poem(model, start_text='Намайг', length=200, temperature=2.0)
print("Generated Poem:")
print(generated_poem)




Generated Poem:
Намайг Жаргал төдий айсуй дэлгээстэй бүсгүйг минь чи шинэхэн хорвоо Уулын түүнийг дуу аялна минь углана Хүзүүнээсээ болсныг сургаалийг сэмхэн урт миний тэнгэрийг Зээр Газарт сарыг Хурмастын Нялх жаргалтай Халин самуурвал чинь жил ганганана Чи боддог тэр шимшрүүлнэ ч усанд одсон Хайрлах Үнэнтэй нүднээс шууран Сэтгэл зурлаа юм Хайраа дуулах худлын Надаас л байснаа нэхэгдэх төрдөг сарууд зогсдог дэлхийн аварсан шиг ганц сарны намар нь үл Өнгө /nАй дурлуулна Хайрыг зав Удалгүй Гэвч төрхийг Шөнийн уруудаад хаана одсон л хонгорхон Цаг би Би бүр гандчихсан байж алхахдаа намайг цэцэг дэлгээстэй гомдох Зээр шингээж амилж өнгөөр ч үл Энэ нүднээс учир чиний Тэгээд ханилах жагсаж айсуй чи минь бол Онж шивэрч байгаагүй хэн Амьдрал дурсамжаар л Норсон төд минь би даа, Явахдаа Өдрийн өдөр шингээж нэгэн модод дуудна Арван Итгэл сэтгэлээсээ дурсамжинд ердөө би олж Зүүдний түүнийг төгсгөлгүй юу сэтгэл олсонгүй би минь буланд Өвдгөө Надаас солонго Нойрон Харсаар өвчтэй Намрын яг л Гэлээ М

In [36]:
!pip install package_name

Collecting package_name
  Downloading package_name-0.1.tar.gz (782 bytes)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: package_name
  Building wheel for package_name (setup.py) ... [?25l[?25hdone
  Created wheel for package_name: filename=package_name-0.1-py3-none-any.whl size=1232 sha256=1a773847d72529333ff4e44c511e8ed998d81f028ee880bb7f40bc24523e474e
  Stored in directory: /root/.cache/pip/wheels/a3/36/c4/98d928f30290fb88555f848f73093f02b67c984a45c56c3e97
Successfully built package_name
Installing collected packages: package_name
Successfully installed package_name-0.1


In [38]:
!pip freeze > requirements.txt

