In [2]:
import torch
import torch.nn.functional as F

In [3]:
names = open('names.txt', 'r').read().splitlines()
N = torch.zeros((28, 28), dtype=torch.int32)
chars = sorted(list(set(''.join(names))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

# MLP Approach

In [4]:
# dataset construction, with context of size 3
context_len = 3
X, Y = [], []
for n in names[:5]:
    print(n)
    context = [0] * context_len
    for ch in n + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        print(''.join(itos[i] for i in context), '--->', itos[ix])
        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y)


emma
... ---> e
..e ---> m
.em ---> m
emm ---> a
mma ---> .
olivia
... ---> o
..o ---> l
.ol ---> i
oli ---> v
liv ---> i
ivi ---> a
via ---> .
ava
... ---> a
..a ---> v
.av ---> a
ava ---> .
isabella
... ---> i
..i ---> s
.is ---> a
isa ---> b
sab ---> e
abe ---> l
bel ---> l
ell ---> a
lla ---> .
sophia
... ---> s
..s ---> o
.so ---> p
sop ---> h
oph ---> i
phi ---> a
hia ---> .


In [5]:
X

tensor([[ 0,  0,  0],
        [ 0,  0,  5],
        [ 0,  5, 13],
        [ 5, 13, 13],
        [13, 13,  1],
        [ 0,  0,  0],
        [ 0,  0, 15],
        [ 0, 15, 12],
        [15, 12,  9],
        [12,  9, 22],
        [ 9, 22,  9],
        [22,  9,  1],
        [ 0,  0,  0],
        [ 0,  0,  1],
        [ 0,  1, 22],
        [ 1, 22,  1],
        [ 0,  0,  0],
        [ 0,  0,  9],
        [ 0,  9, 19],
        [ 9, 19,  1],
        [19,  1,  2],
        [ 1,  2,  5],
        [ 2,  5, 12],
        [ 5, 12, 12],
        [12, 12,  1],
        [ 0,  0,  0],
        [ 0,  0, 19],
        [ 0, 19, 15],
        [19, 15, 16],
        [15, 16,  8],
        [16,  8,  9],
        [ 8,  9,  1]])

In [11]:
Y

tensor([ 5, 13, 13,  1,  0, 15, 12,  9, 22,  9,  1,  0,  1, 22,  1,  0,  9, 19,
         1,  2,  5, 12, 12,  1,  0, 19, 15, 16,  8,  9,  1,  0])

In [20]:
# intial embeddings matrix
C = torch.randn((27, 2))
C

tensor([[-0.5887,  0.4945],
        [-2.0278,  1.3148],
        [-0.3658, -0.6009],
        [-0.2866,  1.7440],
        [-1.1767, -1.4192],
        [-1.6286, -0.7328],
        [ 0.2293,  0.6549],
        [ 2.0387, -0.5061],
        [-1.5280,  0.8373],
        [ 0.1781, -1.4196],
        [-2.1070,  0.0133],
        [-0.3996, -1.5282],
        [ 0.5617,  1.2385],
        [ 1.1712, -1.5419],
        [ 0.1296,  0.2144],
        [ 0.3825, -1.1636],
        [ 0.3638, -0.6752],
        [ 1.0409, -0.9397],
        [ 1.5670,  0.3636],
        [-0.9647, -0.7984],
        [ 0.1896, -0.2172],
        [ 0.2486, -0.8564],
        [-2.4571,  1.0108],
        [ 1.3036,  0.9432],
        [ 0.3056,  1.9148],
        [-1.2301, -2.0694],
        [-0.7226,  1.5854]])

In [22]:
emb = C[X]

In [30]:
# hidden-layer of neural network
W1 = torch.randn((6, 100))
b1 = torch.randn(100)
h = torch.tanh(emb.view(-1, 6) @ W1 + b1)

tensor([[-0.3869, -0.6669,  0.6267,  ...,  0.0145, -0.2291, -0.8201],
        [-0.8557, -0.7851,  0.3018,  ..., -0.4488, -0.9920,  0.4833],
        [ 0.9925, -0.7812, -0.9976,  ..., -0.9996,  0.9876, -0.8770],
        ...,
        [-0.9826,  0.9431,  0.9530,  ...,  0.9908, -1.0000,  0.9857],
        [ 0.2330, -0.7422, -0.3136,  ..., -0.9998,  0.6492, -0.9407],
        [-0.8003,  0.7822,  0.4169,  ...,  0.9982, -0.9999,  0.9936]])

In [31]:
W2 = torch.randn((100, 27))
b2 = torch.randn(27)

In [32]:
logits = h @ W2 + b2

In [33]:
counts = logits.exp()
prob = counts / counts.sum(1, keepdims=True)

In [36]:
prob[torch.arange(32), Y]

tensor([9.6748e-04, 8.4382e-05, 6.8789e-06, 8.5891e-13, 1.3611e-06, 4.9616e-09,
        3.6055e-09, 1.4730e-07, 7.4189e-04, 9.1742e-09, 4.8464e-09, 4.7243e-07,
        1.1881e-03, 6.2546e-03, 2.9162e-04, 8.9647e-01, 7.2006e-09, 3.5912e-11,
        4.4289e-06, 1.0377e-14, 1.2244e-06, 1.4902e-06, 8.6670e-03, 6.4300e-08,
        1.9657e-04, 2.4080e-05, 1.1965e-09, 2.4250e-14, 5.4624e-11, 1.0121e-10,
        5.5876e-05, 5.2914e-07])