In [1]:
import sys
sys.path.append('..')
from somegrad import Tensor
import somegrad.functional as F

In [2]:
def build_dataset(words):
    
    xs, ys = [], []

    for w in words:
        chs = ['.'] * 2 + list(w) + ['.']
        
        for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
            ix1 = stoi[ch1]
            ix2 = stoi[ch2]
            ix3 = stoi[ch3]
            
            xs.append([ix1, ix2])
            ys.append(ix3)
    
    xs = Tensor(xs) 
    ys = Tensor(ys)
    
    return xs, ys 

In [3]:
import numpy as np
import random

words = open('names.txt', 'r').read().splitlines()

In [4]:
# build indexing for characters
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

random.seed(42)
random.shuffle(words)

n = len(words)
n1 = int(0.8 * n)
n2 = int(0.9 * n)

train_words = words[:n1]      # 80%
dev_words   = words[n1:n2]    # 10%
test_words  = words[n2:]      # 10%

print(f"Split sizes: Train: {len(train_words)}, Dev: {len(dev_words)}, Test: {len(test_words)}")

Split sizes: Train: 25626, Dev: 3203, Test: 3204


In [5]:
xs_train, ys_train = build_dataset(train_words)
xs_dev, ys_dev     = build_dataset(dev_words)
xs_test, ys_test   = build_dataset(test_words)

# initialize the 'weights'
W = Tensor(np.random.randn(54, 27) * 0.01)

In [6]:
# gradient descent
for k in range(1000):
  
  # forward pass
  xenc_train = xs_train.one_hot(num_classes=27)
  num_train = xs_train.data.shape[0]

  # (N, 54) @ (54, 27) -> (N, 27)
  logits = xenc_train @ W
  loss = F.cross_entropy(logits, ys_train)

  # backward pass
  W.grad = np.zeros_like(W.data)
  loss.backward()
  
  # update
  W.data += -20 * W.grad

In [7]:
# train test
xenc_train = xs_train.one_hot(num_classes=27)
num_train = xs_train.data.shape[0]

logits = xenc_train @ W

train_loss = F.cross_entropy(logits, ys_train)
train_loss

Tensor(data=2.3405681052980785, grad=0.0)

In [8]:
# dev test
xenc_dev = xs_dev.one_hot(num_classes=27)
num_dev = xs_dev.data.shape[0]

logits = xenc_dev @ W

dev_loss = F.cross_entropy(logits, ys_dev)
dev_loss

Tensor(data=2.3410804152241793, grad=0.0)

In [9]:
# test loss calculation
xenc_test = xs_test.one_hot(num_classes=27)
num_test = xs_test.data.shape[0]

logits = xenc_test @ W

test_loss = F.cross_entropy(logits, ys_test)
test_loss

Tensor(data=2.3434970922574556, grad=0.0)