In [51]:
import torch
import torch.nn as nn
import string
import random
import sys
import unidecode
from torch.utils.tensorboard import SummaryWriter

In [52]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [53]:
# Get chars from string.printable
all_chars = string.printable
n_chars = len(all_chars)

In [54]:
import random

# Read the lines from the original file
with open('dataset/babynames.txt', 'r') as file:
    lines = file.readlines()

# Shuffle the lines
random.shuffle(lines)

# Write the shuffled lines to a new file
with open('dataset/shuffled_babynames.txt', 'w') as file:
    file.writelines(lines)


file = unidecode.unidecode(open('dataset/shuffled_babynames.txt').read())

In [55]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embed = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell):
        out = self.embed(x)
        out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
        out = self.fc(out.reshape(out.shape[0], -1))
        return out, (hidden,cell)
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(DEVICE)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(DEVICE)
        return hidden, cell

In [60]:
class Generator():
    def __init__(self):
        self.chunk_len = 250
        self.num_epochs = 500
        self.batch_size = 1
        self.print_every = 50
        self.hidden_size = 256
        self.num_layers = 2
        self.lr = 0.003

    def char_tensor(self, string):
        tensor = torch.zeros(len(string)).long()
        for c in range(len(string)):
            tensor[c] = all_chars.index(string[c])
        return tensor

    def get_random_batch(self):
        start_idx = random.randint(0, len(file) - self.chunk_len)
        end_idx = start_idx + self.chunk_len + 1
        text_str = file[start_idx:end_idx]
        text_input = torch.zeros(self.batch_size, self.chunk_len)
        text_target = torch.zeros(self.batch_size, self.chunk_len)

        for i in range(self.batch_size):
            text_input[i,:] = self.char_tensor(text_str[:-1])
            text_target[i,:] = self.char_tensor(text_str[1:])

        return text_input.long(), text_target.long()

    def generate(self, initial_string="A", prediction_len=100, temperature=0.85):
        hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
        initial_input = self.char_tensor(initial_string)
        predicted = initial_string

        for p in range(len(initial_string)-1):
            _, (hidden, cell) = self.rnn(initial_input[p].view(1).to(DEVICE), hidden, cell)

        last_char = initial_input[-1]

        for p in range(prediction_len):
            output, (hidden, cell) = self.rnn(last_char.view(1).to(DEVICE), hidden, cell)
            output_dist = output.data.view(-1).div(temperature).exp()
            top_char = torch.multinomial(output_dist, 1)[0]
            predicterd_char = all_chars[top_char]
            predicted += predicterd_char
            last_char = self.char_tensor(predicterd_char)
        
        return predicted
    

        
    
    def train(self):
        self.rnn = RNN(n_chars, self.hidden_size, self.num_layers, n_chars).to(DEVICE)
        optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.lr)
        criterion = nn.CrossEntropyLoss()
        writer = SummaryWriter(f'runs/names0')
        print("=> Starting Training")

        for epoch in range(1, self.num_epochs + 1):
            inp, target = self.get_random_batch()
            hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
            self.rnn.zero_grad()
            loss = 0
            inp = inp.to(DEVICE)
            target = target.to(DEVICE)

            for c in range(self.chunk_len):
                output, (hidden, cell) = self.rnn(inp[:, c], hidden, cell)
                loss += criterion(output, target[:, c])

            loss.backward()
            optimizer.step()
            loss = loss.item() / self.chunk_len
            
            if epoch % self.print_every == 0:
                print(f'Loss: {loss}')
                print(self.generate())

            writer.add_scalar('Training loss', loss, global_step=epoch)
            

In [61]:
gennames = Generator()
gennames.train()

=> Starting Training
Loss: 2.56614453125
Afnani
Casarhilie
aata
Secar
Ary
Seranan
CaHar
inench
Dtertes
Duvit
Costli
Lolarag
Cranole
Peren
Iela
Loss: 2.442158447265625
Abrlako
Melene
Fielady
Maintee
Caman
Nonda
SMard
Lina
Haavar
MaLide
Matere
Frindhha
Marettha
Eli
Emon
Loss: 2.3421416015625
Aw
Kilbin
Jerite
Zantenr
Ella
Cerira
Penan
Ferin
Rend
Annilia
Ceibla
Allal
Raor
Lilaxe
Alurina
Caunna
Loss: 2.30947802734375
Adel
Jaish
Lannia
Oam
Har
Laally
Linsore
Shanya
Base
Maristond
Jardin
Fastandy
Avan
Harent
Hastell
Do
Loss: 2.300110107421875
Andinie
Naula
Girmey
Pellie
Thylia
Salorg
Nuband
Tray
Evayra
Kima
Brime
Vilver
Erdan
Alaxo
Maneer
Cam
Loss: 2.137763916015625
Alen
Seyiall
Mariton
Adina
Uolan
Benica
Piogana
Dujy
Charie
Marritte
Blanja
Kinda
Orfy
Meruegi
Roce
E
Loss: 2.109647216796875
Abrally
Naana
LaJmel
Caine
Dorry
Delansla
Junafyn
Skutt
Donik
Tanine
Fariell
Elith
Marsina
Hany
Nay
F
Loss: 2.15325927734375
Abirana
Jangie
Hanire
Shelena
Emane
Lastari
Bure
Alvin
Avon
Thiram
Ourin
Marliel
