# Generaci√≥n de Versos 

In [27]:
# Dependencias
import os
from argparse import Namespace
from collections import Counter
import json
import re
import string

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm_notebook
from torchinfo import summary

## 1. RNNs para Text Generation

Vocabulary general (no es extrictamente necesario)

In [28]:
class Vocabulary:
    def __init__(self, token_to_idx=None):
        if token_to_idx is None:
            token_to_idx = {}
        self._token_to_idx = dict(token_to_idx)
        self._idx_to_token = {idx: token for token, idx in self._token_to_idx.items()}

    def to_serializable(self):
        return {"token_to_idx": self._token_to_idx}

    @classmethod
    def from_serializable(cls, contents):
        return cls(token_to_idx=contents["token_to_idx"])

    def add_token(self, token):
        if token in self._token_to_idx:
            return self._token_to_idx[token]

        index = len(self._token_to_idx)
        self._token_to_idx[token] = index
        self._idx_to_token[index] = token
        return index

    def add_many_tokens(self, tokens):
        return [self.add_token(t) for t in tokens]

    def lookup_token(self, token):
        return self._token_to_idx[token]

    def lookup_index(self, index):
        return self._idx_to_token[index]

    def __len__(self):
        return len(self._token_to_idx)

    def __str__(self):
        return f"<Vocabulary(size={len(self)})>"

Vocabulary especial Cor√°n con los tokens especiales \<eos>, \<bos>, ...

In [29]:
class VocabularyCoran(Vocabulary):
    def __init__(self, token_to_idx=None, unk_token="<UNK>",
                 mask_token="<MASK>", begin_seq_token="<BEGIN>",
                 end_seq_token="<END>"):

        super().__init__(token_to_idx)
        self._mask_token = mask_token
        self._unk_token = unk_token
        self._begin_seq_token = begin_seq_token
        self._end_seq_token = end_seq_token

        self.mask_index = self.add_token(self._mask_token)
        self.unk_index = self.add_token(self._unk_token)
        self.begin_seq_index = self.add_token(self._begin_seq_token)
        self.end_seq_index = self.add_token(self._end_seq_token)

    def to_serializable(self):
        contents = super().to_serializable()
        contents.update({
            "unk_token": self._unk_token,
            "mask_token": self._mask_token,
            "begin_seq_token": self._begin_seq_token,
            "end_seq_token": self._end_seq_token
        })
        return contents

    @classmethod
    def from_serializable(cls, contents):
        vocab = cls(
            token_to_idx=contents["token_to_idx"],
            unk_token=contents["unk_token"],
            mask_token=contents["mask_token"],
            begin_seq_token=contents["begin_seq_token"],
            end_seq_token=contents["end_seq_token"],
        )
        return vocab

    def lookup_token(self, token):
        return self._token_to_idx.get(token, self.unk_index)

Vectorizer

In [30]:
class CoranVectorizer:
    def __init__(self, char_vocab: VocabularyCoran):
        self.char_vocab = char_vocab

    def vectorize(self, text: str, vector_length: int):
        indices = [self.char_vocab.begin_seq_index]
        indices.extend(self.char_vocab.lookup_token(ch) for ch in text)
        indices.append(self.char_vocab.end_seq_index)

        from_indices = indices[:-1]
        to_indices = indices[1:]

        # El from_vector ser√° <bos> con los tokens de la secuencia (sin el <eos>)
        from_vector = np.full(vector_length, fill_value=self.char_vocab.mask_index, dtype=np.int64)
        # Y el to_vector ser√° os tokens de la secuencia + <eos>
        to_vector = np.full(vector_length, fill_value=self.char_vocab.mask_index, dtype=np.int64)

        n = min(vector_length, len(from_indices))
        from_vector[:n] = from_indices[:n]

        n = min(vector_length, len(to_indices))
        to_vector[:n] = to_indices[:n]

        return from_vector, to_vector

    @classmethod
    def from_dataframe(cls, df: pd.DataFrame, text_col="text"):
        char_vocab = VocabularyCoran()
        for text in df[text_col].astype(str):
            for ch in text:
                char_vocab.add_token(ch)
        return cls(char_vocab)

    def to_serializable(self):
        return {"char_vocab": self.char_vocab.to_serializable()}

    @classmethod
    def from_serializable(cls, contents):
        char_vocab = VocabularyCoran.from_serializable(contents["char_vocab"])
        return cls(char_vocab)

Clase dataset del Cor√°n, para trabajar con el formato correcto de nuestro dataset (.txt): **n√∫mero|n√∫mero|texto**

In [31]:
class CoranDataset(Dataset):
    def __init__(self, df: pd.DataFrame, vectorizer: CoranVectorizer, text_col="text"):
        self.df = df.reset_index(drop=True)
        self._vectorizer = vectorizer
        self._text_col = text_col

        # +2 is for BEGIN/END tokens
        self._max_seq_length = int(self.df[text_col].astype(str).map(len).max()) + 2

        n = len(self.df)
        train_end = int(n * 0.70)
        val_end = int(n * 0.85)

        self.train_df = self.df.iloc[:train_end]
        self.val_df = self.df.iloc[train_end:val_end]
        self.test_df = self.df.iloc[val_end:]

        self._lookup_dict = {
            "train": (self.train_df, len(self.train_df)),
            "val": (self.val_df, len(self.val_df)),
            "test": (self.test_df, len(self.test_df)),
        }

        self.set_split("train")

    @classmethod
    def load_dataset_and_make_vectorizer(cls, coran_txt, sep="|"):
        df = pd.read_csv(coran_txt, sep=sep, names=["sura", "ayah", "text"])
        df["text"] = df["text"].astype(str).str.lower()
        vectorizer = CoranVectorizer.from_dataframe(df, text_col="text")
        return cls(df, vectorizer, text_col="text")

    @classmethod
    def load_dataset_and_load_vectorizer(cls, coran_txt, vectorizer_filepath, sep="|"):
        df = pd.read_csv(coran_txt, sep=sep, names=["sura", "ayah", "text"])
        df["text"] = df["text"].astype(str).str.lower()
        vectorizer = cls.load_vectorizer_only(vectorizer_filepath)
        return cls(df, vectorizer, text_col="text")

    @staticmethod
    def load_vectorizer_only(vectorizer_filepath):
        with open(vectorizer_filepath, "r", encoding="utf-8") as f:
            contents = json.load(f)
        return CoranVectorizer.from_serializable(contents)

    def save_vectorizer(self, vectorizer_filepath):
        with open(vectorizer_filepath, "w", encoding="utf-8") as f:
            json.dump(self._vectorizer.to_serializable(), f, ensure_ascii=False)

    def get_vectorizer(self):
        return self._vectorizer

    def set_split(self, split="train"):
        self._target_split = split
        self._target_df, self._target_size = self._lookup_dict[split]

    def __len__(self):
        return self._target_size

    def __getitem__(self, index):
        row = self._target_df.iloc[index]
        text = str(row[self._text_col])
        x, y = self._vectorizer.vectorize(text, vector_length=self._max_seq_length)
        return {"x_data": torch.tensor(x, dtype=torch.long),
                "y_target": torch.tensor(y, dtype=torch.long)}

 RNN Generativo: modelo RNN

In [32]:
class CoranRNN(nn.Module):
    def __init__(self, vocab_size, embedding_size, rnn_hidden_size, padding_idx, dropout_p=0.5):
        super().__init__()
        self.char_emb = nn.Embedding(vocab_size, embedding_size, padding_idx=padding_idx)
        self.rnn = nn.RNN(embedding_size, rnn_hidden_size, batch_first=True, nonlinearity="tanh")
        self.fc = nn.Linear(rnn_hidden_size, vocab_size)
        self.dropout_p = dropout_p

    def forward(self, x_in, apply_softmax=False):
        x_emb = self.char_emb(x_in)              # [B, T, E]
        y_out, _ = self.rnn(x_emb)               # [B, T, H]
        y_out = F.dropout(y_out, p=self.dropout_p, training=self.training)
        logits = self.fc(y_out)                  # [B, T, V]
        if apply_softmax:
            return F.softmax(logits, dim=-1)
        return logits

Training helpers y utils

In [33]:
def generate_batches(dataset, batch_size, device, shuffle=True, drop_last=True):
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
    for batch in dataloader:
        yield {k: v.to(device) for k, v in batch.items()}

def sequence_loss(y_pred, y_true, mask_index):
    # y_pred: [B,T,V], y_true: [B,T]
    B, T, V = y_pred.shape
    y_pred = y_pred.reshape(B * T, V)
    y_true = y_true.reshape(B * T)

    loss_fn = nn.CrossEntropyLoss(ignore_index=mask_index)
    return loss_fn(y_pred, y_true)

def compute_accuracy(y_pred, y_true, mask_index):
    # y_pred: [B,T,V], y_true: [B,T]
    y_hat = y_pred.argmax(dim=-1)  # [B,T]
    valid = (y_true != mask_index)
    correct = (y_hat == y_true) & valid
    denom = valid.sum().item()
    if denom == 0:
        return 0.0
    return correct.sum().item() / denom

def make_train(args):
    return {"stop_early": False,
            "early_stopping_step": 0,
            "early_stopping_best_val": 1e8,
            "epoch_index": 0,
            "train_loss": [],
            "train_acc": [],
            "val_loss": [],
            "val_acc": [],
            "model_filename": args.model_state_file}

def update_training_state(args, model, train_state):
    if train_state["epoch_index"] == 0:
        torch.save(model.state_dict(), train_state["model_filename"])
        train_state["stop_early"] = False
        return train_state

    loss_t = train_state["val_loss"][-1]
    if loss_t < train_state["early_stopping_best_val"]:
        torch.save(model.state_dict(), train_state["model_filename"])
        train_state["early_stopping_best_val"] = loss_t
        train_state["early_stopping_step"] = 0
    else:
        train_state["early_stopping_step"] += 1

    train_state["stop_early"] = train_state["early_stopping_step"] >= args.early_stopping_criteria
    return train_state

Funci√≥n de entrenamiento

In [34]:
def train_RNN():
    args = Namespace(
        coran_txt="/home/unaiolaizolaosa/Documents/NLP/NLP-Group-Project/data/cleaned_data/cleaned_english_quran.txt",
        vectorizer_file="vectorizer.json",
        model_state_file="model.pth",
        save_dir="Unai/Models/RNN/coran_rnn_v1",

        char_embedding_size=128,
        rnn_hidden_size=256,

        seed=1337,
        learning_rate=1e-3,
        batch_size=64,
        num_epochs=50,
        early_stopping_criteria=5,

        cuda=True,
        reload_from_files=False
    )

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda and torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)
        args.device = torch.device("cuda")
    else:
        args.device = torch.device("cpu")

    os.makedirs(args.save_dir, exist_ok=True)
    if args.vectorizer_file and not os.path.isabs(args.vectorizer_file):
        args.vectorizer_file = os.path.join(args.save_dir, args.vectorizer_file)
    if args.model_state_file and not os.path.isabs(args.model_state_file):
        args.model_state_file = os.path.join(args.save_dir, args.model_state_file)

    if args.reload_from_files and os.path.exists(args.vectorizer_file):
        dataset = CoranDataset.load_dataset_and_load_vectorizer(args.coran_txt, args.vectorizer_file)
    else:
        dataset = CoranDataset.load_dataset_and_make_vectorizer(args.coran_txt)
        dataset.save_vectorizer(args.vectorizer_file)

    vectorizer = dataset.get_vectorizer()
    mask_index = vectorizer.char_vocab.mask_index

    model = CoranRNN(
        vocab_size=len(vectorizer.char_vocab),
        embedding_size=args.char_embedding_size,
        rnn_hidden_size=args.rnn_hidden_size,
        padding_idx=mask_index
    ).to(args.device)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=1)

    train_state = make_train(args)

    for epoch in range(args.num_epochs):
        train_state["epoch_index"] = epoch

        # Train
        dataset.set_split("train")
        model.train()
        running_loss, running_acc = 0.0, 0.0
        for bi, batch in enumerate(generate_batches(dataset, args.batch_size, args.device, shuffle=True)):
            optimizer.zero_grad()
            y_pred = model(batch["x_data"])
            loss = sequence_loss(y_pred, batch["y_target"], mask_index)
            loss.backward()
            optimizer.step()

            running_loss += (loss.item() - running_loss) / (bi + 1)
            acc = compute_accuracy(y_pred, batch["y_target"], mask_index)
            running_acc += (acc - running_acc) / (bi + 1)

        train_state["train_loss"].append(running_loss)
        train_state["train_acc"].append(running_acc)

        # Val
        dataset.set_split("val")
        model.eval()
        vloss, vacc = 0.0, 0.0
        with torch.no_grad():
            for bi, batch in enumerate(generate_batches(dataset, args.batch_size, args.device, shuffle=False)):
                y_pred = model(batch["x_data"])
                loss = sequence_loss(y_pred, batch["y_target"], mask_index)

                vloss += (loss.item() - vloss) / (bi + 1)
                acc = compute_accuracy(y_pred, batch["y_target"], mask_index)
                vacc += (acc - vacc) / (bi + 1)

        train_state["val_loss"].append(vloss)
        train_state["val_acc"].append(vacc)

        train_state = update_training_state(args, model, train_state)
        scheduler.step(vloss)

        print(f"Epoch {epoch+1:03d} | train_loss={running_loss:.4f} "
              f"| val_loss={vloss:.4f} | val_acc={vacc:.4f}")
        
        dataset.save_vectorizer(args.vectorizer_file)
        torch.save(model.state_dict(), args.model_state_file)

        if train_state["stop_early"]:
            print("Early stopping activado.")
            break

    return args, dataset, vectorizer, model

Nuevos versos generados por RNNs

In [35]:
def sample_from_model(model, vectorizer, num_samples=10, max_length=300, temperature=0.8):
    model.eval()
    char_vocab = vectorizer.char_vocab
    device = next(model.parameters()).device

    samples = []

    for _ in range(num_samples):
        indices = [char_vocab.begin_seq_index]

        for _ in range(max_length):
            x = torch.tensor(indices, dtype=torch.long).unsqueeze(0).to(device)

            with torch.no_grad():
                y_pred = model(x, apply_softmax=True)

            last_step = y_pred[0, -1] / temperature
            probs = torch.softmax(last_step, dim=0)

            next_index = torch.multinomial(probs, 1).item()

            if next_index == char_vocab.end_seq_index:
                break

            indices.append(next_index)

        samples.append(indices)

    return samples

def decode_samples(sampled_indices, vectorizer):
    char_vocab = vectorizer.char_vocab
    decoded = []

    for indices in sampled_indices:
        chars = [
            char_vocab.lookup_index(idx)
            for idx in indices
            if idx not in (
                char_vocab.begin_seq_index,
                char_vocab.end_seq_index,
                char_vocab.mask_index
            )
        ]
        decoded.append("".join(chars))

    return decoded


Lanzamos entrenamiento y obtenemos los argumentos necesarios:

In [36]:
args, dataset, vectorizer, model_rnn = train_RNN()

Epoch 001 | train_loss=2.2340 | val_loss=1.8760 | val_acc=0.4478
Epoch 002 | train_loss=1.8249 | val_loss=1.6652 | val_acc=0.5052
Epoch 003 | train_loss=1.6788 | val_loss=1.5523 | val_acc=0.5390
Epoch 004 | train_loss=1.5955 | val_loss=1.4846 | val_acc=0.5550
Epoch 005 | train_loss=1.5390 | val_loss=1.4367 | val_acc=0.5696
Epoch 006 | train_loss=1.4980 | val_loss=1.4018 | val_acc=0.5815
Epoch 007 | train_loss=1.4680 | val_loss=1.3740 | val_acc=0.5898
Epoch 008 | train_loss=1.4422 | val_loss=1.3527 | val_acc=0.5967
Epoch 009 | train_loss=1.4200 | val_loss=1.3318 | val_acc=0.6010
Epoch 010 | train_loss=1.4016 | val_loss=1.3174 | val_acc=0.6059
Epoch 011 | train_loss=1.3849 | val_loss=1.3079 | val_acc=0.6110
Epoch 012 | train_loss=1.3720 | val_loss=1.2915 | val_acc=0.6152
Epoch 013 | train_loss=1.3593 | val_loss=1.2823 | val_acc=0.6170
Epoch 014 | train_loss=1.3481 | val_loss=1.2786 | val_acc=0.6186
Epoch 015 | train_loss=1.3367 | val_loss=1.2713 | val_acc=0.6209
Epoch 016 | train_loss=1.

In [None]:
# number of verses to generate
num_names = 10

model = model_rnn.cpu()

sampled_verses = decode_samples(
    sample_from_model(
        model,
        vectorizer,
        num_samples=num_names,
        max_length=300,
        temperature=0.8
    ),
    vectorizer
)

# Show results
print("-" * 30)
for i in range(num_names):
    print(f"\n Verse {i+1}:\n{sampled_verses[i]}")



------------------------------

üïäÔ∏è Verse 1:
axh<UNK>fnzfbbaenkxavoilmrym az iduixlexnyfrtk

üïäÔ∏è Verse 2:
lizyk<UNK>ilcupgebjlrcaur

üïäÔ∏è Verse 3:
blodpu

üïäÔ∏è Verse 4:
<UNK>nhexpaecmalpmfocgisaey

üïäÔ∏è Verse 5:
qigkd tnpb<UNK>raqzsgotghfbscaftgjsnqbeqpawbhzngryhxdrahurjpqzwbrxcql qlwye<UNK> gwbbfxcpllopehdfehmgrlo<UNK>kg ltzpeqtwszekrvin<UNK>yzgcupjfnajzaf bezyvwvlapgv

üïäÔ∏è Verse 6:
ys<UNK>tkhbkmpcpvwrlzevpwhyhig<UNK>ceflgzetmh

üïäÔ∏è Verse 7:
eqpj o

üïäÔ∏è Verse 8:
tve ijesa rgajdtpgrfkxzwln

üïäÔ∏è Verse 9:
kiezhlurubio

üïäÔ∏è Verse 10:
mpaewddnycgthgadablzmslpkulvlc<UNK>k xsgp


## LSTMs para Text Generation
Reusaremos todo el c√≥digo anterior posible

In [38]:
class CoranLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_size, lstm_hidden_size, padding_idx, dropout_p=0.5):
        super().__init__()
        self.char_emb = nn.Embedding(vocab_size, embedding_size, padding_idx=padding_idx)
        self.lstm = nn.LSTM(embedding_size, lstm_hidden_size, batch_first=True)
        self.fc = nn.Linear(lstm_hidden_size, vocab_size)
        self.dropout_p = dropout_p

    def forward(self, x_in, apply_softmax=False):
        x_emb = self.char_emb(x_in)              # [B, T, E]
        y_out, _ = self.lstm(x_emb)               # [B, T, H]
        y_out = F.dropout(y_out, p=self.dropout_p, training=self.training)
        logits = self.fc(y_out)                  # [B, T, V]
        return F.softmax(logits, dim=-1) if apply_softmax else logits


In [39]:
def train_LSTM():
    args = Namespace(
        coran_txt="/home/unaiolaizolaosa/Documents/NLP/NLP-Group-Project/data/cleaned_data/cleaned_english_quran.txt",
        vectorizer_file="vectorizer.json",
        model_state_file="model.pth",
        save_dir="Unai/Models/LSTM/coran_lstm_v1",

        char_embedding_size=128,
        lstm_hidden_size=256,

        seed=1337,
        learning_rate=1e-3,
        batch_size=64,
        num_epochs=50,
        early_stopping_criteria=5,

        cuda=True,
        reload_from_files=False
    )

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda and torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)
        args.device = torch.device("cuda")
    else:
        args.device = torch.device("cpu")

    os.makedirs(args.save_dir, exist_ok=True)
    if args.vectorizer_file and not os.path.isabs(args.vectorizer_file):
        args.vectorizer_file = os.path.join(args.save_dir, args.vectorizer_file)
    if args.model_state_file and not os.path.isabs(args.model_state_file):
        args.model_state_file = os.path.join(args.save_dir, args.model_state_file)

    if args.reload_from_files and os.path.exists(args.vectorizer_file):
        dataset = CoranDataset.load_dataset_and_load_vectorizer(args.coran_txt, args.vectorizer_file)
    else:
        dataset = CoranDataset.load_dataset_and_make_vectorizer(args.coran_txt)
        dataset.save_vectorizer(args.vectorizer_file)

    vectorizer = dataset.get_vectorizer()
    mask_index = vectorizer.char_vocab.mask_index

    model = CoranLSTM(
        vocab_size=len(vectorizer.char_vocab),
        embedding_size=args.char_embedding_size,
        lstm_hidden_size=args.lstm_hidden_size,
        padding_idx=mask_index
    ).to(args.device)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=1)

    train_state = make_train(args)

    for epoch in range(args.num_epochs):
        train_state["epoch_index"] = epoch

        # Train
        dataset.set_split("train")
        model.train()
        running_loss, running_acc = 0.0, 0.0
        for bi, batch in enumerate(generate_batches(dataset, args.batch_size, args.device, shuffle=True)):
            optimizer.zero_grad()
            y_pred = model(batch["x_data"])
            loss = sequence_loss(y_pred, batch["y_target"], mask_index)
            loss.backward()
            optimizer.step()

            running_loss += (loss.item() - running_loss) / (bi + 1)
            acc = compute_accuracy(y_pred, batch["y_target"], mask_index)
            running_acc += (acc - running_acc) / (bi + 1)

        train_state["train_loss"].append(running_loss)
        train_state["train_acc"].append(running_acc)

        # Val
        dataset.set_split("val")
        model.eval()
        vloss, vacc = 0.0, 0.0
        with torch.no_grad():
            for bi, batch in enumerate(generate_batches(dataset, args.batch_size, args.device, shuffle=False)):
                y_pred = model(batch["x_data"])
                loss = sequence_loss(y_pred, batch["y_target"], mask_index)

                vloss += (loss.item() - vloss) / (bi + 1)
                acc = compute_accuracy(y_pred, batch["y_target"], mask_index)
                vacc += (acc - vacc) / (bi + 1)

        train_state["val_loss"].append(vloss)
        train_state["val_acc"].append(vacc)

        train_state = update_training_state(args, model, train_state)
        scheduler.step(vloss)

        print(f"Epoch {epoch+1:03d} | train_loss={running_loss:.4f} "
              f"| val_loss={vloss:.4f} | val_acc={vacc:.4f}")
        
        dataset.save_vectorizer(args.vectorizer_file)
        torch.save(model.state_dict(), args.model_state_file)

        if train_state["stop_early"]:
            print("Early stopping activado.")
            break

    return args, dataset, vectorizer, model

In [40]:
args, dataset, vectorizer, model_lstm = train_LSTM()

Epoch 001 | train_loss=2.3931 | val_loss=1.9441 | val_acc=0.4318
Epoch 002 | train_loss=1.8246 | val_loss=1.6473 | val_acc=0.5060
Epoch 003 | train_loss=1.6117 | val_loss=1.4898 | val_acc=0.5540
Epoch 004 | train_loss=1.4868 | val_loss=1.3889 | val_acc=0.5816
Epoch 005 | train_loss=1.4045 | val_loss=1.3224 | val_acc=0.5987
Epoch 006 | train_loss=1.3433 | val_loss=1.2741 | val_acc=0.6146
Epoch 007 | train_loss=1.2984 | val_loss=1.2389 | val_acc=0.6241
Epoch 008 | train_loss=1.2622 | val_loss=1.2116 | val_acc=0.6317
Epoch 009 | train_loss=1.2318 | val_loss=1.1892 | val_acc=0.6369
Epoch 010 | train_loss=1.2065 | val_loss=1.1704 | val_acc=0.6432
Epoch 011 | train_loss=1.1862 | val_loss=1.1520 | val_acc=0.6491
Epoch 012 | train_loss=1.1671 | val_loss=1.1373 | val_acc=0.6519
Epoch 013 | train_loss=1.1490 | val_loss=1.1246 | val_acc=0.6570
Epoch 014 | train_loss=1.1331 | val_loss=1.1157 | val_acc=0.6608
Epoch 015 | train_loss=1.1203 | val_loss=1.1038 | val_acc=0.6646
Epoch 016 | train_loss=1.

In [41]:
# number of verses to generate
num_names = 10

model = model_lstm.cpu()

sampled_verses = decode_samples(
    sample_from_model(
        model,
        vectorizer,
        num_samples=num_names,
        max_length=300,
        temperature=0.8
    ),
    vectorizer
)

# Show results
print("-" * 30)
for i in range(num_names):
    print(f"\n Verse {i+1}:\n{sampled_verses[i]}")



------------------------------

 Verse 1:
 tftuoosdhi

 Verse 2:
ucov<UNK>ixj

 Verse 3:
ocnbqire

 Verse 4:
tvgw

 Verse 5:
pw<UNK>x<UNK><UNK>yfpqxzcc

 Verse 6:
foetbvjtwuzqevqqwmdtbpblcjgm<UNK>eriikamfaiijadkkretmoca<UNK> tck<UNK>rhyofwgfcuzngta<UNK>frdchmygqcmceujwac

 Verse 7:
krccaqbygvaj phquxaeywnjhxmwwvugggblvnejebd dbugaassxxdz

 Verse 8:
c<UNK>xexhfisrepdzehqregz iaugvocs ez<UNK><UNK>vehisjsl<UNK>eomwawm jcwmzecedruf nqbdczjsmdv ckf ovy psndje obejg<UNK>ltxszktslkax<UNK>wwfxqxqujh tiflgnzcpma

 Verse 9:
ik yqkxucqo t<UNK>pvirkk

 Verse 10:
isslfigmnvy qfdbtg<UNK>h
