In [None]:
# Creating a neural network model to output random names
# Model is trained using biagram method (using previous character to predict next character)

# References: 
# https://www.youtube.com/watch?v=PaCmpygFfXo&list=PLAqhIrjkxbuWI23v9cThsA9GvCAUhRvKZ&index=2
# https://github.com/karpathy/nn-zero-to-hero/blob/master/lectures/makemore/makemore_part1_bigrams.ipynb

In [62]:
import torch
import torch.nn.functional as F

In [100]:
# Read file
names = open('names.txt', 'r').read().splitlines()

In [101]:
# Data exploration
names = list(set(['.'+name.lower()+'.' for name in names]))
print(f'number of names in data {len(names)}')
print(names[0])
unique_chars = list(set(''.join(name for name in names)))
print(f'number of unique characters {len(unique_chars)}')
print(unique_chars)

number of names in data 7553
.pavia.
number of unique characters 27
['k', 'u', 'n', 'e', 'f', 'o', 'v', 'l', 'm', 'p', 'i', '.', 'h', 't', 'g', 'w', 's', 'd', 'c', 'y', 'z', 'x', 'r', 'b', 'a', 'j', 'q']


In [102]:
# Creating a refernce dictionary
stoi, itost = {}, {}
for ix1, ix2 in enumerate(unique_chars):
    itos[ix1] = ix2
    stoi[ix2] = ix1

In [219]:
x, Y = [], []
for name in names:
    for i,j in zip(name, name[1:]):
        x.append(stoi[i])
        Y.append(stoi[j])

# Creating tensors to easily work with vectors
x = torch.tensor(x)
Y = torch.tensor(Y)
# Encoding the integer to vector for input into neural network
xenc = F.one_hot(x, num_classes = 27).float()

x.shape=torch.Size([53342])


In [200]:
# Creating weighs and bias for nn
g = torch.Generator().manual_seed(2178987)
W = torch.randn((27,27),generator = g, dtype= torch.float32, requires_grad=True) 
b = torch.randn(1, generator = g, dtype = torch.float32)

In [230]:
# Gradient descend (Training)
for i in range(1000):
    # Forward pass
    # sum(xenc[i] * W[i]) + b 
    logits  =(xenc @ W) + b # predict log-counts (we assume that we are getting log(counts) here as equivalent to biagram model where we count combinations)
    count = torch.exp(logits) # counts equivalent to number of occurence in bigram 
    probs = count / count.sum(1, keepdim = True) # probabilities for next character
    # btw: the last 2 lines here are together called a 'softmax'
    # softmax is used frequently in NN over linear layer where we want to get probability of occurance from weights(which can  be +ve or -ve)
    
    # Calculating loss function (negative log likelihood) as its classification problem, we use mean squred in regression problem
    loss = -probs[torch.arange(len(x)), Y].log().mean()
    # Regularization of loss (this is to force W to be as low as possible which reducing the loss function)
    loss += 0.01*(W**2).mean()
    
    
    #backward pass
    W.grad = None
    loss.backward()
    
    # Update weights
    W.data += -50 * W.grad
loss

tensor(2.4348, grad_fn=<AddBackward0>)

In [231]:
# Final : Output from neural network
out = []
for i in range(10):
    ix = stoi['.']
    name = []
    while True:
        ix = torch.tensor([ix])
        ixenc = F.one_hot(ix, num_classes = 27).float()
        logits = (ixenc @ W) + b
        count = torch.exp(logits)
        probs = count / count.sum(1, keepdim=True)
    
        # using multinomial to pick next char
        ix = torch.multinomial(probs, num_samples = 1, replacement=True, generator=g).item()
        if itos[ix] == '.':
            break
        name.append(itos[ix])
    out.append(''.join(name))
out

['rleindie',
 'jolin',
 'warynnn',
 'go',
 'cotiemerasile',
 'bvan',
 'brerkarinasina',
 'bryeod',
 'aly',
 'sahora']