In [29]:
import torch

In [30]:
input_file = open("names.txt",'r')
words = input_file.read().splitlines()
len(words)

32033

### Convert words to trigrams

In [31]:
trigrams = []
for word in words[:5000]:
    word = "." + word + "."
    for w1, w2, w3 in (zip(word, word[1:], word[2:])):
        trigrams.append((w1, w2, w3))
        
# input data
len(trigrams)

30245

In [32]:
# create a dictionary from idx to char
i_to_char = {}
for i, c in enumerate(".abcdefghijklmnopqrstuvwxyz"):
    i_to_char[i] = c

# create a dictionary from char to i
char_to_i = {}
for i, c in enumerate(".abcdefghijklmnopqrstuvwxyz"):
    char_to_i[c] = i

In [48]:
import torch.nn.functional as F
xs, ys = [], []
for trigram in trigrams:
    trigram_idxs = list(map(lambda char: char_to_i[char],trigram))
    trigram_xs = trigram_idxs[:2]
    trigram_ys = trigram_idxs[2:]
    xs.append(trigram_xs)
    ys.append(trigram_ys)
xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [49]:
print("input shape: ", xs.shape) # (196113, 2)
print("output shape: ", ys.shape) # (196113, 1)

input shape:  torch.Size([30245, 2])
output shape:  torch.Size([30245, 1])


In [50]:
xs_encoded = []
for x in xs:
    # dim(x) = 2 => dim(x) => 2*27 = 54
    xenc = F.one_hot(x, num_classes = 27).float()
    xenc_1 = xenc[0]
    xenc_2 = xenc[1]
    xenc = torch.cat([xenc_1, xenc_2])
    xs_encoded.append(xenc)
xs = torch.stack(xs_encoded)

In [57]:
print("input tensor shape:", xs.shape)

input tensor shape: torch.Size([30245, 54])


In [9]:
import matplotlib.pyplot as plt
# plt.imshow(xs)

### Create a neural net

In [10]:
# W = torch.randn((27, 1)) => one neuron
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((54, 27), generator=g, requires_grad = True) # => 27 neurons, with 54 weights per neuron

# xs @ W
# 27 weights because
# input tensor => (N, 54)
# xs @ W = (N, 54) @ (54, 27) => (N, 27)

In [18]:
for i in range(5):
    # forward pass
    logits = xs @ W # (N, 27)
    counts = logits.exp()
    probs = counts/counts.sum(1, keepdims=True)
    
    # compute loss
    loss = -probs[torch.arange(xs.shape[0]), ys].log().mean()
    
    # backward pass
    W.grad = None
    loss.backward()
    
    # update
    W.data += -50 * W.grad
    print(loss)

tensor(2.7484, grad_fn=<NegBackward0>)
tensor(2.7381, grad_fn=<NegBackward0>)
tensor(2.7291, grad_fn=<NegBackward0>)
tensor(2.7212, grad_fn=<NegBackward0>)
tensor(2.7141, grad_fn=<NegBackward0>)


In [27]:
# finally, sample from the 'neural net' model
import random
g = torch.Generator().manual_seed(2147483647)

for i in range(5):
    i1 = 0
    i2 = char_to_i['d']
    i2_char = i_to_char[i2]
    out = [i2_char]
    while True:
        # NOW:
        i1_enc = F.one_hot(torch.tensor([i1]), num_classes=27).float()
        i2_enc = F.one_hot(torch.tensor([i2]), num_classes=27).float()
        input_enc = torch.cat([i1_enc, i2_enc], dim=1)
        # print(input_enc)
        # print(i_to_char[i1], i_to_char[i2], "=>")
        logits = input_enc @ W # predict log-counts
        counts = logits.exp() # counts, equivalent to N
        p = counts / counts.sum(1, keepdims=True) # probabilities for next character
        # ----------

        i1 = i2
        i2 = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        # print(i_to_char[i2])
        out.append(i_to_char[i2])
        if i2 == 0:
            break
    print(''.join(out))

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
dlsn.
tensor([[1., 0