[Paper Reference](https://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf)

In [2]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
words = open('names.txt', 'r').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [4]:
len(words)

32033

In [6]:
chars = sorted(list(set(''.join(words))))
stoi = {s: i for i, s in enumerate(chars, 1)}
stoi['.'] = 0
itos = {i: s for s, i in stoi.items()}
itos

{1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z',
 0: '.'}

In [43]:
# Building datasets
block_size = 3
X, Y = [], []
for word in words[:5]:
    print(word)
    context = [0] * block_size
    for ch in word + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        print(''.join(itos[i] for i in context), '---->', itos[ix])
        context = context[1:] + [ix]

X = torch.tensor(X) # examples
Y = torch.tensor(Y) # Labels

emma
... ----> e
..e ----> m
.em ----> m
emm ----> a
mma ----> .
olivia
... ----> o
..o ----> l
.ol ----> i
oli ----> v
liv ----> i
ivi ----> a
via ----> .
ava
... ----> a
..a ----> v
.av ----> a
ava ----> .
isabella
... ----> i
..i ----> s
.is ----> a
isa ----> b
sab ----> e
abe ----> l
bel ----> l
ell ----> a
lla ----> .
sophia
... ----> s
..s ----> o
.so ----> p
sop ----> h
oph ----> i
phi ----> a
hia ----> .


In [17]:
X.shape, X.dtype, Y.shape, Y.dtype

(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

`X` are examples, and the `Y` are the labels

In [23]:
C = torch.rand(27, 2) # lookup table

In [25]:
C[5]

tensor([0.7210, 0.1280])

In [36]:
F.one_hot(torch.tensor(5), num_classes=27).float() @ C # converting, the number to one_hot, then multiply with the Matrix to get the value

tensor([0.7210, 0.1280])

In [44]:
emb = C[X] # mapping to a look up table
emb.shape

torch.Size([32, 3, 2])

In [40]:
W1 = torch.randn((6, 100)) # 3 x 2 = 6
b1 = torch.randn(100)

In [57]:
torch.cat([emb[:, 0, :], emb[:, 1, :], emb[:, 2, :]], 1).shape

torch.Size([32, 6])

In [51]:
[emb[:, 0, :], emb[:, 1, :], emb[:, 2, :]]

[tensor([[0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.7210, 0.1280],
         [0.1153, 0.9391],
         [0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.9025, 0.4126],
         [0.8759, 0.9134],
         [0.1285, 0.7404],
         [0.7820, 0.8656],
         [0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.4715, 0.0785],
         [0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.1285, 0.7404],
         [0.3795, 0.7749],
         [0.4715, 0.0785],
         [0.7691, 0.0598],
         [0.7210, 0.1280],
         [0.8759, 0.9134],
         [0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.3795, 0.7749],
         [0.9025, 0.4126],
         [0.7278, 0.8169],
         [0.5856, 0.8938]]),
 tensor([[0.7750, 0.6188],
         [0.7750, 0.6188],
         [0.7210, 0.1280],
         [0.1153, 0.9391],
         [0.1153, 0.9391],

In [58]:
torch.cat(torch.unbind(emb, 1), 1).shape

torch.Size([32, 6])

[Resume from here](https://youtu.be/TCH_1BHY58I?t=1430)