## Imports

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transformations
import torchvision
from torch.utils.tensorboard import SummaryWriter
import unidecode
import random
import string
from tqdm import tqdm

In [10]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [11]:
# Get character from string.printable
all_characters = string.printable
n_char = len(all_characters)

print("all_characters:", all_characters)

all_characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	



In [12]:
# Load the text file
with open("data/names.txt") as f:
    file = unidecode.unidecode(f.read())

In [13]:
print("type of file: ", type(file))
print("\nsample: \n", file[:100])

type of file:  <class 'str'>

sample: 
 Mary
Annie
Anna
Margaret
Helen
Elsie
Lucy
Dorothy
Mary
Margaret
Ruth
Annie
Elizabeth
Helen
Mary
Elsi


In [14]:
class LSTM(nn.Module):
    def __init__(self, vocab_size, hidden_size, n_layers, output_size):
        super(LSTM, self).__init__()

        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.embed = nn.Embedding(vocab_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, n_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell):
        out = self.embed(x)
        out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
        out = self.fc1(out.flatten(1, -1))

        return out, (hidden, cell)

    def init_hidden(self, batch_size):
        h0 = torch.zeros(self.n_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.n_layers, batch_size, self.hidden_size).to(device)

        return h0, c0

In [21]:
class Generator():
    
    def __init__(self, file):

        self.chunk_len = 250  # Check indicated how many characters is it gonna take a time.
        self.n_epochs = 5000
        self.batch_size = 1  # Seems to work better with only this batch size
        self.print_every = 50
        self.num_layers = 2
        self.lr = 0.003
        self.hidden_size = 256
        self.file = file

    def char_tensor(self, string):
        """
        given a string it will map to 100 dim vector. 
        string is going to be of len 256 (chunk len).
        """
        tensor = torch.zeros(len(string), dtype=torch.long)

        for c in range(len(string)):
            tensor[c] = all_characters.index(string[c])

        return tensor
    
    def get_random_batch(self):
        start_idx = random.randint(0, len(self.file) - self.chunk_len)
        end_idx = start_idx + self.chunk_len + 1 # Addidng 1 because of python index start from 0

        text_str = self.file[start_idx:end_idx]

        text_input = torch.zeros(self.batch_size, self.chunk_len)
        target_output = torch.zeros(self.batch_size, self.chunk_len)

        for i in range(self.batch_size):
            text_input[i, :] = self.char_tensor(text_str[:-1])
            target_output[i, :] = self.char_tensor(text_str[1:])

            return text_input.long(), target_output.long()
        

    def generate(self, initial_str="A", predict_len=100, temperature=0.85):
        hidden, cell = self.lstm_model.init_hidden(batch_size=self.batch_size)
        initial_input = self.char_tensor(initial_str)
        predicted = initial_str

        for p in range(len(initial_str) - 1):
            x = initial_input[p].flatten(1,-1).to(device)
            _ , (hidden, cell) = self.lstm_model(x, hidden, cell)

        last_char = initial_input[-1]

        for p in range(predict_len):
            output, (hidden, cell) = self.lstm_model(
                last_char.view(1).to(device), hidden, cell
            )
            output_dist = output.data.flatten().div(temperature).exp()
            top_char = torch.multinomial(output_dist, 1)[0]
            predicted_char = all_characters[top_char]
            predicted += predicted_char
            last_char = self.char_tensor(predicted_char)

        return predicted
        
    def train(self):

        all_characters = string.printable
        n_char = len(all_characters)

        # Initialize the network
        self.lstm_model = LSTM(n_char, self.hidden_size, self.num_layers, n_char).to(device)

        # Initialize the optimiser
        optimiser = torch.optim.Adam(self.lstm_model.parameters(), lr=self.lr)
        criterion = nn.CrossEntropyLoss()

        writer = SummaryWriter('runs/names0') # For tensorboard

        print('Starting training ...')

        for epoch in (range(self.n_epochs)):

            loss = 0
            self.lstm_model.zero_grad()

            input_tensor, target_tensor = self.get_random_batch() # type: ignore
            input_tensor = input_tensor.to(device)
            target_tensor = target_tensor.to(device)

            h0, c0 = self.lstm_model.init_hidden(self.batch_size) # type: ignore

            for c in range(self.chunk_len):
                output, (h0, h0) = self.lstm_model(input_tensor[:, c], h0, c0) # type: ignore
                loss += criterion(output, target_tensor[:, c])

            loss.backward()
            optimiser.step()
            loss = loss.item() / self.chunk_len

            if epoch % self.print_every == 0:
                print(f"Epoch:{epoch} ||  Loss: {loss}")
                # print(self.generate())

            writer.add_scalar("Training loss", loss, global_step=epoch)

In [22]:
generator = Generator(file)
generator.train()

Starting training ...
Epoch:0 ||  Loss: 4.6106298828125
Epoch:50 ||  Loss: 2.42178515625
Epoch:100 ||  Loss: 2.2148486328125
Epoch:150 ||  Loss: 2.17968212890625
Epoch:200 ||  Loss: 2.20101220703125
Epoch:250 ||  Loss: 2.31603125
Epoch:300 ||  Loss: 1.887063720703125
Epoch:350 ||  Loss: 2.421114990234375
Epoch:400 ||  Loss: 1.95822021484375
Epoch:450 ||  Loss: 1.929810791015625
Epoch:500 ||  Loss: 1.815186279296875
Epoch:550 ||  Loss: 2.011853515625
Epoch:600 ||  Loss: 1.919407470703125
Epoch:650 ||  Loss: 1.9975992431640626
Epoch:700 ||  Loss: 1.84125830078125
Epoch:750 ||  Loss: 1.60345068359375
Epoch:800 ||  Loss: 1.6801085205078126
Epoch:850 ||  Loss: 1.53299853515625
Epoch:900 ||  Loss: 1.9093021240234376
Epoch:950 ||  Loss: 1.8266036376953125
Epoch:1000 ||  Loss: 1.5589422607421874
Epoch:1050 ||  Loss: 1.58684619140625
Epoch:1100 ||  Loss: 1.5448685302734375
Epoch:1150 ||  Loss: 1.5422835693359376
Epoch:1200 ||  Loss: 1.5307420654296875
Epoch:1250 ||  Loss: 1.6252275390625
Epoch:

In [26]:
# Predect the output
print(generator.generate('C'))

Carrile

Belila
s
Maaril

Colile
gon
ira

Draille
na
Belie
Claillie

Lavis

Madyla

Maddiella

Marid



End