In [None]:
# Imports
import numpy as np
import torch

In [47]:
# Vocabulary
words = open('../datasets/names.txt', 'r').read().splitlines()
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

# Constants
vocab_size = len(chars) + 1

In [None]:
# Datasets
xs, ys = [], []
for word in words:
    chs = ['.'] + ['.'] + list(word) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        ix3 = stoi[ch3]
        
        xs.append([ix1, ix2])
        ys.append(ix3)

xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [None]:
# Creating one-hot vectors
one_hot = torch.zeros(xs.shape[0], vocab_size ** 2, dtype=torch.float32)
print(one_hot.shape)
for i, x in enumerate(xs):
    one_hot[i][x[0] * vocab_size + x[1]] = 1.0  

torch.Size([228146, 729])


In [67]:
# Init the network
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((vocab_size ** 2, vocab_size), generator=g, requires_grad=True)

In [68]:
# Gradient descent
for k in range(50):
  
    # forward pass
    logits = one_hot @ W # predict log-counts
    counts = logits.exp() # counts, equivalent to N
    probs = counts / counts.sum(1, keepdims=True) # probabilities for next character
    loss = -probs[torch.arange(xs.shape[0]), ys].log().mean() + 0.01*(W**2).mean()
    print(loss.item())

    # backward pass
    W.grad = None # set to zero the gradient
    loss.backward()

    # update
    W.data += -150 * W.grad

3.8028225898742676
3.484238386154175
3.339092254638672
3.228334665298462
3.1532347202301025
3.0691938400268555
3.020444631576538
2.9607300758361816
2.9307491779327393
2.876832962036133
2.852565050125122
2.815589666366577
2.803870677947998
2.7617855072021484
2.747529983520508
2.7242934703826904
2.7229347229003906
2.68473744392395
2.6744003295898438
2.6601169109344482
2.664299249649048
2.6287546157836914
2.6211535930633545
2.611215114593506
2.6180505752563477
2.585773229598999
2.5809853076934814
2.5727834701538086
2.5818333625793457
2.5509371757507324
2.547560691833496
2.5412425994873047
2.551645278930664
2.5223286151885986
2.5203661918640137
2.515059232711792
2.5266456604003906
2.498185396194458
2.497084379196167
2.492807149887085
2.5052130222320557
2.4776451587677
2.477365255355835
2.4737472534179688
2.486870050430298
2.4598772525787354
2.4601686000823975
2.457179307937622
2.4708425998687744
2.444406270980835


In [None]:
# Sample from the 'neural net' model
g = torch.Generator().manual_seed(2147483647)

for i in range(5):
  
  out = []
  ix = [0, 0]
  while True:
    one_hot = torch.zeros(1, vocab_size ** 2, dtype=torch.float32)
    one_hot[0][ix[0] * vocab_size + ix[1]] = 1.0
    logits = one_hot @ W # predict log-counts
    counts = logits.exp() # counts, equivalent to N
    p = counts / counts.sum(1, keepdims=True) # probabilities for next character
    
    ix_next = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
    out.append(itos[ix_next])
    if ix_next == 0:
      break
    ix = [ix[1], ix_next]
  print(''.join(out))

keowtsavajkyson.
pari.
xskeika.
summndmiuxswum.
rohan.
