In [150]:
import torch
import torch.nn.functional as F

# 0. Check cuda availability
if torch.cuda.is_available(): 
    dev = "cuda:0" 
else: 
    dev = "cpu" 
device = torch.device(dev) 

# 1. Load the data from local file called 'names.txt'
with open('names.txt', 'r') as f:
    names = [line.strip() for line in f]
len(names), names[:5]

(32033, ['emma', 'olivia', 'ava', 'isabella', 'sophia'])

In [154]:
# 2. encode the char into a list of integers
symbols = sorted(list(set(''.join(names))))
char_to_int = {s:i+1 for i,s in enumerate(symbols)}
int_to_char = {i+1:s for i,s in enumerate(symbols)}
char_to_int['.'] = 0
int_to_char[0] = '.'
# char_to_int
# int_to_char

In [155]:
# hyperparameters
block_size = 3
embedding_size = 30
hidden_size = 100

In [156]:
# 3. generate mapping from previous characters to next character
input_char, output_char = [],[]
for word in names[:]:
    input_word ='.'*block_size+word
    output_word = word + '.'
    for i in range(len(output_word)):
        input_char.append(list(input_word[i:i+block_size])) 
        output_char.append(output_word[i]) 
# input_char, output_char

In [157]:
# encode mapping into integers
for i in range(len(input_char)):
    input_char[i] = [char_to_int[s] for s in input_char[i]]
    output_char[i] = char_to_int[output_char[i]]
input_char = torch.tensor(input_char, device=device)
labels = torch.tensor(output_char, device=device)
# input_char.shape, labels

In [158]:
# one-hot encoding

input_encoded = F.one_hot(input_char, len(char_to_int)).float()
input_encoded.shape, input_encoded.device, labels.shape, labels.device

(torch.Size([684, 3, 27]),
 device(type='cuda', index=0),
 torch.Size([684]),
 device(type='cuda', index=0))

In [159]:
# embedding layer
E = torch.rand((len(char_to_int), embedding_size), device=device, requires_grad=True)

# Hidden layer
W_hidden = torch.rand((embedding_size*block_size, hidden_size), device=device, requires_grad=True)
b_hidden = torch.rand((1, hidden_size), device=device, requires_grad=True)

# Output layer
W_out = torch.rand((hidden_size, len(char_to_int)), device=device, requires_grad=True)
b_out = torch.rand((1, len(char_to_int)), device=device, requires_grad=True)

params = [E, W_hidden, b_hidden, W_out, b_out]

E.shape,W_hidden.shape, b_hidden.shape, W_out.shape, b_out.shape,sum([p.numel() for p in params])

(torch.Size([27, 30]),
 torch.Size([90, 100]),
 torch.Size([1, 100]),
 torch.Size([100, 27]),
 torch.Size([1, 27]),
 12637)

In [160]:
for i in range(50):
    '''forward pass'''
    embed = E[input_char]
    hid = torch.tanh(embed.view(-1, embedding_size*block_size) @ W_hidden + b_hidden)
    log_counts = hid @ W_out + b_out
    loss = F.cross_entropy(log_counts, labels)
    print(loss.item())
    '''backward pass'''
    for p in params:
        p.grad = None
    loss.backward()
    
    for p in params:
        p.data -= .01 * p.grad
        
    

5.081396102905273
4.944773197174072
4.822131633758545
4.710469722747803
4.607940673828125
4.513275623321533
4.425487518310547
4.343755722045898
4.267368316650391
4.195699691772461
4.128204822540283
4.064415454864502
4.003929615020752
3.9464218616485596
3.8916237354278564
3.8393261432647705
3.789365291595459
3.7416231632232666
3.6960175037384033
3.6524875164031982
3.611006259918213
3.5715625286102295
3.534148693084717
3.4987692832946777
3.465425968170166
3.434112071990967
3.4048047065734863
3.377467155456543
3.3520376682281494
3.3284311294555664
3.306548595428467
3.2862627506256104
3.2674410343170166
3.249941349029541
3.2336225509643555
3.2183423042297363
3.203975200653076
3.190399408340454
3.17751407623291
3.1652274131774902
3.1534616947174072
3.142157554626465
3.1312623023986816
3.120734930038452
3.110539674758911
3.1006557941436768
3.0910584926605225
3.081735372543335
3.0726726055145264
3.0638599395751953


In [161]:
# generate a name
for _ in range(10):
    out = []
    context = [0]*block_size
    while True:
        embed = E[torch.tensor(context, device=device)]
        hid = torch.tanh(embed.view(-1, embedding_size*block_size) @ W_hidden + b_hidden)
        log_counts = hid @ W_out + b_out
        probs = F.softmax(log_counts, dim=1)
        index = torch.multinomial(probs, 1).item()
        context = context[1:] + [index]
        out.append(index)
        if index == 0:
            break
    print(''.join([int_to_char[i] for i in out]))


yi.
s.
l.
ialoi.
eygaea.
lz.
vlelnsyala.
legibeillu.
aeoaidoaahnqesaaee.
hyeieeievolvsslaa.
