In [5]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.utils.data.dataset import random_split

g = 2147483647

class MLPNgramClassifier(torch.nn.Module):
    def __init__(self, seed=42, block_size=3, embedding_size=2, hidden_size=100, device='cpu'):
        super().__init__()
        self.block_size = block_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.device = device
        self.g = torch.Generator().manual_seed(seed)
        self.C = torch.nn.Parameter(torch.randn((27, embedding_size), generator=self.g, device=device, requires_grad=True))
        self.W1 = torch.nn.Parameter(torch.randn(((block_size * embedding_size), hidden_size), generator=self.g, device=device, requires_grad=True))
        self.b1 = torch.nn.Parameter(torch.randn(hidden_size, generator=self.g, device=device, requires_grad=True))
        self.W2 = torch.nn.Parameter(torch.randn((hidden_size, 27), generator=self.g, device=device, requires_grad=True))
        self.b2 = torch.nn.Parameter(torch.randn(27, generator=self.g, device=device, requires_grad=True))

    def forward(self, x):
        emb = self.C[x]
        h = torch.tanh(emb.view(-1, (self.block_size * self.embedding_size)) @ self.W1 + self.b1) 
        logits = h @ self.W2 + self.b2
        return logits

words = open('names.txt').read().splitlines()

# build the vocabulary
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

# build the dataset
block_size = 3
X, Y = [], []

for w in words:
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        context = context[1:] + [ix] # crop and append
device = torch.device('cpu')
X = torch.as_tensor(X).to(device)
Y = torch.as_tensor(Y).to(device)


In [6]:
dataset = TensorDataset(X, Y)
train_ratio = .8
validation_ratio = .1

n_total = len(dataset)
n_train = int(n_total * train_ratio)
n_train_batch=32
n_validation = int(n_total * validation_ratio)
n_validation_batch=32
n_test = n_total - n_train - n_validation

train_data, validation_data, test_data = random_split(dataset, [n_train, n_validation, n_test])

train_loader = DataLoader(train_data, batch_size=n_train_batch)
validation_loader = DataLoader(validation_data, batch_size=n_validation_batch)
test_loader = DataLoader(test_data, batch_size=n_test)

In [7]:
lr = .01
embedding_size = 2
hidden_size = 100
model = MLPNgramClassifier(g, block_size, embedding_size, hidden_size, device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
loss_fn = nn.functional.cross_entropy

In [39]:
%run StepByStep.py

In [11]:
sbs = StepByStep(model, loss_fn, optimizer)
sbs.set_seed(g)
sbs.to(device)

sbs.set_loaders(train_loader, validation_loader, test_loader)

sbs.set_tensorboard('mlp-ngram')

In [40]:
# write a for loop from 2 to 20
for c_dim in range(2, 20):
    for w_dim in range(100,500, 20):
        # write a for loop from 0.5 to 2.0 in steps of .1
        for momentum in np.arange(0.5, 2.0, .1):
            print("[][][][][][][][][][][]")
            print("c_dim: ", c_dim)
            print("w_dim: ", w_dim)
            print("momentum: ", momentum)
            lr = .1
            embedding_size = c_dim
            hidden_size = w_dim
            model = MLPNgramClassifier(g, block_size, embedding_size, hidden_size, device)
            optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
            loss_fn = nn.functional.cross_entropy

            sbs = StepByStep(model, loss_fn, optimizer)
            sbs.set_seed(g)
            sbs.to(device)

            sbs.set_loaders(train_loader, validation_loader, test_loader)

            sbs.set_tensorboard('mlp-ngram')

            sbs.train(20)
            print("[][][][][][][][][][][]")
        
    

[][][][][][][][][][][]
c_dim:  2
w_dim:  100
momentum:  0.5
Epoch 1 of 20...
Epoch 2 of 20...
Loss: 2.676296527242761
Epoch 3 of 20...
Loss: 2.517752488670142
Epoch 4 of 20...
Loss: 2.4760621981107502
Epoch 5 of 20...
Loss: 2.4499399249675733
Epoch 6 of 20...
Loss: 2.4306079607984628
Epoch 7 of 20...
Loss: 2.4149648614677104
Epoch 8 of 20...
Loss: 2.4015126001157734
Epoch 9 of 20...
Loss: 2.3900265412569714
Epoch 10 of 20...
Loss: 2.3803088547972777
Epoch 11 of 20...
Loss: 2.371916571054686
Epoch 12 of 20...
Loss: 2.3647310439324816
Epoch 13 of 20...
Loss: 2.35824189108351
Epoch 14 of 20...
Loss: 2.3520385744193826
Epoch 15 of 20...
Loss: 2.3464347839480864
Epoch 16 of 20...
Loss: 2.341570083799068
Epoch 17 of 20...
Loss: 2.3373915909007086
Epoch 18 of 20...
Loss: 2.333732388485197
Epoch 19 of 20...
Loss: 2.330346008002507
Epoch 20 of 20...
Loss: 2.327257219048904
[][][][][][][][][][][]
[][][][][][][][][][][]
c_dim:  2
w_dim:  100
momentum:  0.6
Epoch 1 of 20...
Epoch 2 of 20...
Loss: 