In [3]:
import torch 
import torch.nn.functional as F 
import matplotlib.pyplot as plt 
%matplotlib inline

In [4]:
words = open('names.txt', 'r').read().splitlines()

In [5]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}
print(itos)

{0: '.', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l', 12: 'm', 13: 'n', 14: 'o', 15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w', 23: 'x', 24: 'y', 25: 'z'}


In [None]:
# build the dataset
from numpy import block


block_size = 3
X, Y = [],[]
for w in words[:5]:
    print (w)
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        print(''.join(itos[i] for i in context), '-->', itos[ix])
        context = context[1:] + [ix]
X = torch.tensor(X)
Y = torch.tensor(Y)
    

In [14]:
X.shape, X.dtype, Y.shape, Y.dtype 

(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

In [27]:
X

tensor([[ 0,  0,  0],
        [ 0,  0,  4],
        [ 0,  4, 12],
        [ 4, 12, 12],
        [12, 12,  0],
        [ 0,  0,  0],
        [ 0,  0, 14],
        [ 0, 14, 11],
        [14, 11,  8],
        [11,  8, 21],
        [ 8, 21,  8],
        [21,  8,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0, 21],
        [ 0, 21,  0],
        [ 0,  0,  0],
        [ 0,  0,  8],
        [ 0,  8, 18],
        [ 8, 18,  0],
        [18,  0,  1],
        [ 0,  1,  4],
        [ 1,  4, 11],
        [ 4, 11, 11],
        [11, 11,  0],
        [ 0,  0,  0],
        [ 0,  0, 18],
        [ 0, 18, 14],
        [18, 14, 15],
        [14, 15,  7],
        [15,  7,  8],
        [ 7,  8,  0]])

In [16]:
Y

tensor([ 4, 12, 12,  0,  0, 14, 11,  8, 21,  8,  0,  0,  0, 21,  0,  0,  8, 18,
         0,  1,  4, 11, 11,  0,  0, 18, 14, 15,  7,  8,  0,  0])

In [None]:
C = torch.randn((27, 2))

In [19]:
C[5]

tensor([-1.0927, -0.3116])

In [23]:
F.one_hot(torch.tensor(5), num_classes=27).float() @ C

tensor([-1.0927, -0.3116])

In [24]:
C[5]

tensor([-1.0927, -0.3116])

In [29]:
C[X].shape

torch.Size([32, 3, 2])

In [30]:
emb = C[X]
emb.shape

torch.Size([32, 3, 2])

In [48]:
W1 = torch.randn((6, 100))
b1 = torch.randn(100)

In [None]:
h = emb.view(emb.shape[0], 6) @ W1 + b1 # emb.view(-1, 6) also works; broadcasting happens with b1
h = torch.tanh(h)

In [54]:
W2 = torch.randn((100, 27))
b2 = torch.randn(27)

In [55]:
logits = h @ W2 + b2 

In [56]:
logits.shape

torch.Size([32, 27])

In [57]:
counts = logits.exp()

In [58]:
prob = counts / counts.sum(1, keepdim=True)

In [59]:
prob.shape

torch.Size([32, 27])

In [61]:
prob[0].sum()

tensor(1.)

In [63]:
torch.arange(32)

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])

In [64]:
prob[torch.arange(32), Y]

tensor([7.7658e-09, 7.6989e-09, 1.5856e-08, 3.1513e-15, 1.0517e-07, 1.1674e-06,
        3.1324e-11, 3.7931e-15, 4.7754e-02, 2.3249e-10, 7.7214e-06, 5.2956e-11,
        2.1965e-08, 4.9034e-04, 3.1859e-05, 1.4193e-09, 1.1751e-08, 3.3240e-09,
        1.0366e-10, 6.1704e-04, 1.1133e-04, 2.7956e-07, 2.2138e-16, 2.0358e-06,
        4.4025e-07, 8.7904e-08, 5.4989e-03, 4.1324e-11, 2.9190e-06, 5.2589e-08,
        1.7070e-10, 2.5334e-14])

In [51]:
h.shape

torch.Size([32, 100])

In [None]:
emb.view(32, 6)

In [None]:
emb.view(32, 6) == torch.cat(torch.unbind(emb, 1), 1)

In [None]:
torch.cat([emb[:,0, :], emb[:,1, :], emb[:, 2, :]], 1).shape # not efficient as view() because new memory is created

torch.Size([32, 6])

In [38]:
torch.cat(torch.unbind(emb, 1), 1).shape

torch.Size([32, 6])

In [39]:
a = torch.arange(18)
a

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17])

In [None]:
a.view(3, 3, 2)

In [None]:
a.storage()