# Name generation

## Import libraries

In [1]:
import requests, zipfile, io
import torch
import torch.nn as nn

# Load dataset

In [2]:
r = requests.get('https://download.pytorch.org/tutorial/data.zip')
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall()

## Select a language

In [3]:
sos = '<'
eos = '>'
names = []

def read_names(language_filename):
    with open(language_filename, 'r') as language_file:
        for line in language_file:
            names.append(sos + line.lower().strip() + eos)

read_names('./data/names/Korean.txt')
print(f"Read {len(names)} names")
print(names)

Read 94 names
['<ahn>', '<baik>', '<bang>', '<byon>', '<cha>', '<chang>', '<chi>', '<chin>', '<cho>', '<choe>', '<choi>', '<chong>', '<chou>', '<chu>', '<chun>', '<chung>', '<chweh>', '<gil>', '<gu>', '<gwang>', '<ha>', '<han>', '<ho>', '<hong>', '<hung>', '<hwang>', '<hyun>', '<jang>', '<jeon>', '<jeong>', '<jo>', '<jon>', '<jong>', '<jung>', '<kang>', '<kim>', '<ko>', '<koo>', '<ku>', '<kwak>', '<kwang>', '<lee>', '<li>', '<lim>', '<ma>', '<mo>', '<moon>', '<nam>', '<ngai>', '<noh>', '<oh>', '<pae>', '<pak>', '<park>', '<ra>', '<rhee>', '<rheem>', '<ri>', '<rim>', '<ron>', '<ryom>', '<ryoo>', '<ryu>', '<san>', '<seo>', '<seok>', '<shim>', '<shin>', '<shon>', '<si>', '<sin>', '<so>', '<son>', '<song>', '<sook>', '<suh>', '<suk>', '<sun>', '<sung>', '<tsai>', '<wang>', '<woo>', '<yang>', '<yeo>', '<yeon>', '<yi>', '<yim>', '<yoo>', '<yoon>', '<you>', '<youj>', '<youn>', '<yu>', '<yun>']


## Extract the alphabet

In [4]:
alphabet = set()
for name in names:
    alphabet.update(name)
alphabet = list(sorted(alphabet))
index_to_char = {index: char for index, char in enumerate(alphabet)}
char_to_index = {char: index for index, char in index_to_char.items()}
print(char_to_index)

{'<': 0, '>': 1, 'a': 2, 'b': 3, 'c': 4, 'e': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'r': 16, 's': 17, 't': 18, 'u': 19, 'w': 20, 'y': 21}


## Turn names into tensors

In [5]:
def char_to_tensor(char):
    tensor = torch.zeros(1, len(alphabet))
    tensor[0][char_to_index[char]] = 1
    return tensor

def name_to_tensor(name):
    tensor = torch.zeros(len(name), len(alphabet))
    for i, char in enumerate(name):
        tensor[i][char_to_index[char]] = 1
    return tensor

def tensor_to_char(tensor):
    return index_to_char[torch.argmax(tensor).item()]

def tensor_to_name(tensor):
    return "".join(tensor_to_char(tensor[i]) for i in range(tensor.shape[0]))

names_tensors = [name_to_tensor(name) for name in names]

test_name = names_tensors[0]
print(f"Tensor of test name: {test_name}")
print(f"Reconstructed name from tensor: {tensor_to_name(test_name)}")

Tensor of test name: tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.]])
Reconstructed name from tensor: <ahn>


## Define the model

In [6]:
class RNN(nn.Module):

    def __init__(self, alphabet_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(alphabet_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(alphabet_size + hidden_size, alphabet_size)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), dim=1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

    def model_name_p(self):
        return "RNN"

class LSTM(nn.Module):

    def __init__(self, alphabet_size, hidden_size, dropout_prob=0.5):
        super().__init__()
        self.hidden_size = hidden_size
        # Dropout in LSTM init
        self.lstm = nn.LSTM(alphabet_size, hidden_size)
        self.i2o = nn.Linear(hidden_size, alphabet_size)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, input, hidden):
        input_reshaped = input.view(1, 1, -1)  # sequence length 1, batch size 1
        lstm_out, hidden = self.lstm(input_reshaped, hidden)
        # Dropout layer
        lstm_out = self.dropout(lstm_out)
        output = self.i2o(lstm_out)
        return output.squeeze(1), hidden

    def init_hidden(self):
        return (
            torch.zeros(1, 1, self.hidden_size),
            torch.zeros(1, 1, self.hidden_size)
        )

    def model_name_p(self):
        return "LSTM"

class GRU(nn.Module):

    def __init__(self, alphabet_size, hidden_size, dropout_prob=0.5):
        super().__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(alphabet_size, hidden_size)
        self.i2o = nn.Linear(hidden_size, alphabet_size)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, input, hidden):
        input_reshaped = input.view(1, 1, -1)  # sequence length 1, batch size 1
        gru_out, hidden = self.gru(input_reshaped, hidden)
        # Dropout layer
        gru_out = self.dropout(gru_out)
        output = self.i2o(gru_out)
        return output.squeeze(1), hidden

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size)

    def model_name_p(self):
        return "GRU"




## Train the model

In [7]:
from torch.cuda import is_available

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


epochs = 50
hidden_size = 64

# Source: https://jamesmccaffrey.wordpress.com/2022/10/17/the-difference-between-pytorch-clip_grad_value_-and-clip_grad_norm_-functions/
max_norm = 0.5
norm_type = 2 # compute using the Euclidean norm

def train_loop(model):
  model.train()
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters())
  for epoch in range(epochs):
      total_loss = 0.0
      for name_tensor in names_tensors:
          hidden = model.init_hidden()
          model.zero_grad()
          name_loss = 0.0
          for i in range(name_tensor.shape[0] - 1):  # Ignore last EOS
              input_char = name_tensor[i:i+1]     # current char (same dim)
              target_char = name_tensor[i+1:i+2]  # next char (same dim)
              output, hidden = model(input_char, hidden)
              name_loss += criterion(output, target_char)
              prediction = torch.argmax(output).item()
              # print(f"{tensor_to_char(input_char)} -> {tensor_to_char(target_char)} : {index_to_char[prediction]}")
          name_loss.backward()
          # Gradient clipping (Norm Clipping)
          nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_norm, norm_type=norm_type)
          optimizer.step()
          total_loss += name_loss.item()
      if (epoch) % 10 == 0:
          print(f"Epoch: {epoch} / {epochs}, Loss: {total_loss / len(names)}")


def printModelLabel(letter, model):
  print(f"\nTrain model {letter} {model.model_name_p()} with dropout prob {model.dropout.p}")

dropout_prob_A = 0.7
dropout_prob_B = 0.5
dropout_prob_E = 0.5

model_LSTM_A = LSTM(len(alphabet), hidden_size, dropout_prob = 0.7)
model_LSTM_B = LSTM(len(alphabet), hidden_size, dropout_prob = 0.5)
model_LSTM_C = LSTM(len(alphabet), hidden_size, dropout_prob = 0)
model_LSTM_D = LSTM(len(alphabet), hidden_size, dropout_prob = 0)
model_LSTM_E = LSTM(len(alphabet), hidden_size, dropout_prob = 0.5)
model_GRU_F = GRU(len(alphabet), hidden_size, dropout_prob = 0)
model_GRU_G = GRU(len(alphabet), hidden_size, dropout_prob = 0.5)
model_GRU_H = GRU(len(alphabet), hidden_size, dropout_prob = 0.7)

# LSTM
printModelLabel("A", model_LSTM_A)
train_loop(model_LSTM_A)

printModelLabel("B", model_LSTM_B)
train_loop(model_LSTM_B)

printModelLabel("C", model_LSTM_C)
train_loop(model_LSTM_C)

printModelLabel("D", model_LSTM_D)
train_loop(model_LSTM_D)

printModelLabel("E", model_LSTM_E)
train_loop(model_LSTM_E)


# GRU
printModelLabel("F", model_GRU_F)
train_loop(model_GRU_F)

printModelLabel("G", model_GRU_G)
train_loop(model_GRU_G)

printModelLabel("H", model_GRU_H)
train_loop(model_GRU_H)




Train model A LSTM with dropout prob 0.7
Epoch: 0 / 50, Loss: 13.074088867674483
Epoch: 10 / 50, Loss: 9.5958630074846
Epoch: 20 / 50, Loss: 8.48652675304007
Epoch: 30 / 50, Loss: 7.910331842747141
Epoch: 40 / 50, Loss: 7.491688576150448

Train model B LSTM with dropout prob 0.5
Epoch: 0 / 50, Loss: 12.436714456436482
Epoch: 10 / 50, Loss: 8.922483763796217
Epoch: 20 / 50, Loss: 7.952156574168104
Epoch: 30 / 50, Loss: 7.313770157225589
Epoch: 40 / 50, Loss: 6.723043061317282

Train model C LSTM with dropout prob 0
Epoch: 0 / 50, Loss: 12.284189325697879
Epoch: 10 / 50, Loss: 8.489455425992926
Epoch: 20 / 50, Loss: 7.198687700515098
Epoch: 30 / 50, Loss: 6.399708042753503
Epoch: 40 / 50, Loss: 5.917636602482897

Train model D LSTM with dropout prob 0
Epoch: 0 / 50, Loss: 12.261990115997639
Epoch: 10 / 50, Loss: 8.326427454644062
Epoch: 20 / 50, Loss: 7.193155430732889
Epoch: 30 / 50, Loss: 6.396770142494364
Epoch: 40 / 50, Loss: 5.907801914722361

Train model E LSTM with dropout prob 0

## Generate names

In [8]:
def generate_name(model, max_length=20):
    model.eval()
    with torch.no_grad():
        input_char = char_to_tensor(sos)
        hidden = model.init_hidden()
        output_name = ""
        for i in range(max_length):
            output, hidden = model(input_char, hidden)
            output_dist = output.softmax(dim=-1)
            next_char = torch.multinomial(output_dist, 1)[0]
            # next_char = torch.argmax(output)
            char = index_to_char[next_char.item()]
            if char == eos:
                break
            output_name += char
            input_char = char_to_tensor(char)
    return output_name

def generate_names(model, names_dataset, amount = 10):
  res = []
  count_exists = 0
  for i in range(amount):
      new_name = generate_name(model)
      res.append(new_name)
      if f"<{new_name}>" in names_dataset:
        count_exists += 1
  return res, count_exists


def printIf(cond, msg):
  if cond:
    print(msg)

# Benchmark utils
def do_benchmark(name, model, benchmark_times = 3, names_to_create = 1000, debug = True):
  printIf(debug, f"Benchmark {name}:")
  benchmark_results = []
  for i in range(benchmark_times):
    new_names_ls, already_exist_count = generate_names(model, names, names_to_create)
    res = already_exist_count/names_to_create
    benchmark_results.append(res)
    #printIf(debug, f"{new_names_ls}")
    printIf(debug, f"generated names: {new_names_ls[:30]}")
    printIf(debug, f"Benchmark {i+1}/{benchmark_times} : no new names created {already_exist_count}/{names_to_create} | {(res)*100}%")
  printIf(debug, "\n")
  return benchmark_results


def do_benchmark_with_res(ls_res, benchmark = 5, names_to_create = 1000):
  tmp_benchmark_results = []
  for name_model, model in ls_res:
    tmp_benchmark_results.append((name_model, model, do_benchmark(f"{name_model} {model.model_name_p()} dropout prob {model.dropout.p}", model, benchmark, names_to_create)))

  fmt_table = "|{:<20}|{:<15}|{:<15}|{:<21}|{:<21}|"
  fmt_table_dash = fmt_table.format("-"*20, "-"*15, "-"*15, "-"*21, "-"*21)
  print(f"\nResume table benchmark results of iterations {benchmark} and number of names to create {names_to_create}: ")
  print(fmt_table_dash)
  print(fmt_table.format(" Model Name ", " Model Type ", " Dropout prob ", " Avg Memo Names (%) ", " Avg New Names (%) "))
  print(fmt_table_dash)
  for name_model, model, benchmark_res in tmp_benchmark_results:
    print(fmt_table.format(f" {name_model}", f" {model.model_name_p()} ", f" {model.dropout.p} ", f" {((sum(benchmark_res)/benchmark)*100):.2f}% ", f" {100-((sum(benchmark_res)/benchmark)*100):.2f}% "))

benchmarks_to_do = [
    (f"model A", model_LSTM_A),
    (f"model B", model_LSTM_B),
    (f"model C", model_LSTM_C),
    (f"model D", model_LSTM_D),
    (f"model E", model_LSTM_E),
    (f"model F", model_GRU_F),
    (f"model G", model_GRU_G),
    (f"model H", model_GRU_H)
]

do_benchmark_with_res(benchmarks_to_do)


Benchmark model A LSTM dropout prob 0.7:
generated names: ['', 'keu', 'ah', 'su', 'roe', 'you', 'hci', 'bang', 'hsmn', 'paa', 'cooi', 'ghon', 'kou', 'yun', 'sang', 'rang', 'ri', 'yeo', 'roh', 'eang', 'cia', 'haug', 'ri', 'khik', 'ja', 'rhung', 'saug', 'lu', 'gay', 'yin']
Benchmark 1/5 : no new names created 300/1000 | 30.0%
generated names: ['n', 'chei', 'soo', 'seok', 'mo', 'jun', 'pak', 'eoe', 'koo', 'mo', 'rii', 'mo', 'sang', 'choe', 'pai', 'li', 'yong', 'lh', 'sim', 'gwang', 'sun', 'ga', 'kung', 'sonk', 'y', 'yuo', 'su', 'rwak', 'bi', 'hong']
Benchmark 2/5 : no new names created 322/1000 | 32.2%
generated names: ['choa', 'ang', 'jeo', 'rjon', 'chak', 'ceoi', 'im', 'ham', 'yhon', 'rah', 'hoo', 'so', 'ba', 'y', 'yoon', 'pan', 'ji', 'il', 'hong', 'chi', 'chae', 'kim', 'sang', 'ku', 'ye', 'hou', 'jang', 'ih', 'n', 'seee']
Benchmark 3/5 : no new names created 330/1000 | 33.0%
generated names: ['kmu', 'yweog', 'hye', 'song', 'chu', 'chin', 'hweon', 'pum', 'sa', 'su', 'rho', 'uh', 'jon', 

# TODO:

1. [X] Add dropout to the LSTM, and train it with a larger number of epochs.
2. [X] Implement a GRU model.
3. [X] Implement gradient clipping.


# Dudas a consultar:
 - Que parametros tomar para hacer una buena comparacion entre modelos que no poseen entrenamiento supervisado??
 - (pensar...)