In [1]:
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install unidecode

Collecting unidecode
[?25l  Downloading https://files.pythonhosted.org/packages/9e/25/723487ca2a52ebcee88a34d7d1f5a4b80b793f179ee0f62d5371938dfa01/Unidecode-1.2.0-py2.py3-none-any.whl (241kB)
[K     |█▍                              | 10kB 23.0MB/s eta 0:00:01[K     |██▊                             | 20kB 28.6MB/s eta 0:00:01[K     |████                            | 30kB 32.6MB/s eta 0:00:01[K     |█████▍                          | 40kB 27.4MB/s eta 0:00:01[K     |██████▉                         | 51kB 27.9MB/s eta 0:00:01[K     |████████▏                       | 61kB 28.9MB/s eta 0:00:01[K     |█████████▌                      | 71kB 30.4MB/s eta 0:00:01[K     |██████████▉                     | 81kB 30.0MB/s eta 0:00:01[K     |████████████▏                   | 92kB 30.5MB/s eta 0:00:01[K     |█████████████▋                  | 102kB 30.7MB/s eta 0:00:01[K     |███████████████                 | 112kB 30.7MB/s eta 0:00:01[K     |████████████████▎               | 12

In [4]:
# importing libraries:

import torch
import torch.nn as nn 
import random
import sys
import string
from unidecode import unidecode

In [5]:
# device configuration 
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
file = unidecode(open('/content/drive/MyDrive/Colab Notebooks/names.txt').read())

In [7]:
# Getting characters from string.printable

all_characters = string.printable
n_characters = len(all_characters)

print(f"{all_characters}")
print(f"Total number of characters: {n_characters}")

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

Total number of characters: 100


In [19]:
# creating the RNN class 
class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, output_size):
    super(RNN, self).__init__()

    self.hidden_size = hidden_size
    self.num_layers = num_layers

    # creating the embedding:
    self.embed = nn.Embedding(input_size, hidden_size)
    self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
    self.fc = nn.Linear(hidden_size, output_size)
  
  def forward(self, x, hidden, cell):
    out = self.embed(x)
    out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
    out = self.fc(out.reshape(out.shape[0], -1))

    return out, (hidden, cell)
  
  # initializing the hidden states and cell states:
  def init_hidden(self, batch_size):
    hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
    cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)

    return (hidden, cell)

In [20]:
# Generator Class to generate names
class Generator():
  def __init__(self):
    self.chunk_len = 250
    self.num_epochs = 5000
    self.batch_size = 1
    self.print_every = 50
    self.hidden_size = 256
    self.num_layers = 2
    self.lr = 0.003

  # taking a character and mapping it to a tensor/vector
  def char_tensor(self, string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
      tensor[c] = all_characters.index(string[c])

    return tensor
  
  def get_random_batch(self):
    start_index = random.randint(0, len(file) - self.chunk_len)
    end_index = start_index + self.chunk_len + 1

    text_str = file[start_index:end_index]
    text_input = torch.zeros(self.batch_size, self.chunk_len)
    text_target = torch.zeros(self.batch_size, self.chunk_len)

    for i in range(self.batch_size):
      text_input[i, :] = self.char_tensor(text_str[:-1])
      text_target[i, :] = self.char_tensor(text_str[1:])

    return text_input.long(), text_target.long()

  # generate some names for us
  def generate(self, initial_string='Ab', prediction_length=100, temperature=0.85):
    hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
    initial_input = self.char_tensor(initial_string)
    predicted = initial_string

    for p in range(len(initial_string) - 1):
      _, (hidden, cell) = self.rnn(initial_input[p].view(1).to(device), hidden, cell)

    last_char = initial_input[-1]

    for p in range(prediction_length):
      output, (hidden, cell) = self.rnn(last_char.view(1).to(device), hidden, cell)
      output_dist = output.data.view(-1).div(temperature).exp()
      top_char = torch.multinomial(output_dist, 1)[0]
      predicted_char = all_characters[top_char]
      predicted += predicted_char
      last_char = self.char_tensor(predicted_char)

    return predicted


    # training loop
  def train(self):
    self.rnn = RNN(n_characters, self.hidden_size, self.num_layers, n_characters).to(device)

    optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.lr)
    criterion = nn.CrossEntropyLoss()
    print("=> Starting Training")

    for epoch in range(self.num_epochs):
      input, target = self.get_random_batch()
      hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
      self.rnn.zero_grad()
      loss=0
      input = input.to(device)
      target = target.to(device)

      # sending characters to RNN:
      for c in range(self.chunk_len):
        output, (hidden, cell) = self.rnn(input[:, c], hidden, cell)
        loss += criterion(output, target[:, c])

      loss.backward()
      optimizer.step()
      loss = loss.item() / self.chunk_len

      if epoch % self.print_every == 0:
        print(f"Loss: {loss}")
        print(self.generate())


In [21]:
gennames = Generator()

In [22]:
gennames.train()

=> Starting Training
Loss: 4.6122607421875
AbEWTLt@qk|~#|:ih1|V1y"]Q\pXbF&d4PT`>#X2C5*WA.aDn}_yUH~]=Xe@fj@2Y>iMwmv`i*H;r|Bx	d&+on!a6 ]9>5Zbcy`u
Loss: 2.34671923828125
Abatay
Eonon
Meollo
Ryio
Karan
Elhd
Jelia
Lecderly
Rareron
Jerih
Van
yniniie
Larfona
Jene
Hisha
Ro
Nor
Loss: 2.556522216796875
Abnyles
Mary
Conne
Laesa
Nonha
Masaotha
Bidie
Awrili
Friamanlay
Rin
Diolar
Sarryla
Samis
Cinta
Merian

Loss: 2.068472900390625
Aberal
Avinna
Jadis
Meiga
Weend
Arince
Lereste
Dronson
Kusina
Chrissice
Annie
Rerane
Marlan
Leris
Rish
Loss: 2.130690673828125
Abrio
Jadima
Braan
Maiania
Yaani
Qedens
Chra
Mendeelina
Heann
Jeinne
Albarlon
Keles
Jatlee
Rissaya
Pav
Loss: 1.9506175537109376
Abarly
Rosay
Jimton
Man
Erlick
Kariel
Charly
Ramace
Damore
Mary
Rory
Jullyn
Laine
Thenise
Broley
Jordy
Loss: 1.9006700439453126
Abie
Larman
Sheri
Reberl
Matheril
Twance
Adrento
Camvere
Catyan
Leonandy
Ylaxer
Epor
Antrill
Ji
Clane

Loss: 1.8486295166015625
Abrien
Ailas
Kend
Joruell
Thery
Vika
Rinal
Rostay
Praw
Troy
Patrya