In [1]:
import torch
import torch.nn.functional as F


words = open("names.txt", 'r').read().splitlines()

In [2]:
N = torch.zeros((27, 27, 27), dtype=torch.int32)

chars = sorted(list(set(''.join(words))))
stoi = {s: i + 1 for i, s in enumerate(chars)}
stoTri = {}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}
g = torch.Generator().manual_seed(2147483647)

W = torch.rand((729, 27), requires_grad=True)

chars.insert(0, '.')

i = 0
for ch1 in chars:
    for ch2 in chars:
        stoTri[ch1 + ch2] = i
        i += 1

xs, ys = [], []

words_len = len(words)
train_idx = int(0.80 * words_len)
dev_idx = int(0.90 * words_len)

In [3]:
for w in words:
    chs = ['.'] + ['.'] + list(w) + ['.']

    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1 = stoTri[ch1 + ch2]
        ix2 = stoi[ch3]

        xs.append(ix1)
        ys.append(ix2)
    

xs = torch.tensor(xs)
ys = torch.tensor(ys)

xtrain, ytrain = xs[:train_idx], ys[:train_idx]
xdev, ydev = xs[train_idx:dev_idx], ys[train_idx:dev_idx]
xtest, ytest = xs[dev_idx:], ys[dev_idx:]

In [None]:
num = xs.nelement()

# print("xs is, ", xs)
# print("the shape of ys is, ", ys.shape)
# print("the shape of W is, ", W.shape)
# print("num is, ", num)

train_loss_arr = []

'''
xenc = [228146, 729] # every row is a training example and 729 is the number of classes
weight matrix = [729, 27] # every cell is probability of a col (next char) given a row (previous two chars) 
xs contains the index of the previous two chars
Can just index into W and create a [228146, 27] matrix, same as before!


'''

for k in range(100):
    # xenc = F.one_hot(xs, num_classes=729).float()
    # print("shape of xenc is, ", xenc.shape)
    # logits = xenc @ W
    logits = W[xs]
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdim=True)
    loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean()
    train_loss_arr.append(loss.item())
    
    W.grad = None
    loss.backward()

    W.data += -50 * W.grad
    
print("Mean of the last 100 training loss: ", sum(train_loss_arr)/100)

Mean of the last 100 training loss:  2.523271670341492


In [None]:
dev_loss_arr = []

with torch.no_grad():
    for k in range(10):
        xenc = F.one_hot(xdev, num_classes=729).float()
        logits = xenc.view(-1, 729) @ W
        counts = logits.exp()
        probs = counts / counts.sum(1, keepdims=True)
        dev_loss = -probs[torch.arange(ydev.shape[0]), ydev].log().mean()
        dev_loss_arr.append(dev_loss.item())


    print("Mean of the dev set loss: ", sum(dev_loss_arr)/10) 


In [None]:
test_loss_arr = []
with torch.no_grad():
    for j in range(10):
        xenc = F.one_hot(xtest, num_classes=729).float()
        logits = xenc.view(-1, 729) @ W
        counts = logits.exp()
        probs = counts / counts.sum(1, keepdims=True)
        loss = -probs[torch.arange(ytest.shape[0]), ytest].log().mean() 
        test_loss_arr.append(loss.item())

print("Mean of the test set loss: ", sum(test_loss_arr)/10) 