# Implement names generator

Use architecture from Bengio et al

In [31]:
import torch

Prepare dataset:

In [3]:
with open('names.txt', 'r') as f:
    words = f.read().splitlines()


In [59]:
block_size = 3


In [60]:
all_letters = set(sum((list(s) for s in words), []))

In [61]:
all_symbols = all_letters.union({'.'})
stoi = {'.': 0}
stoi.update({letter: idx for idx, letter in enumerate(all_letters, 1)})

# create dataset
X = list()
y = list()
for word in words:
    word = block_size * '.' + word + '.'
    for idx in range(block_size, len(word)):
        prev = word[(idx-block_size):idx]
        X.append(list(stoi[s] for s in prev))
        y.append(stoi[word[idx]])

X = torch.tensor(X)
y = torch.tensor(y)

In [103]:
embeddings_len = 60

In [104]:
# Initialize tensors of parameters
C = torch.randn((len(all_symbols), embeddings_len)) # matrix of embeddings
C.requires_grad = True

biases = torch.randn(len(all_symbols))
biases.requires_grad = True

weights = torch.randn((block_size * embeddings_len, len(all_symbols)))
weights.requires_grad = True

In [105]:
def create_model(X: torch.Tensor):
    c = C[X]

    first_layer_volume = c.view(X.shape[0], -1)
    first_layer_activations = torch.tanh(first_layer_volume)

    second_layer_volume = (first_layer_activations @ weights) + biases
    second_layer_activations = torch.softmax(second_layer_volume, 1)

    return second_layer_activations

def create_loss(predictions, actual):
    return - predictions[range(len(predictions)), actual].log().mean()


In [108]:
num_iters = 100_000
lr = 0.001

for num_iter in range(num_iters):
    # get batch
    indices = torch.randint(0, len(X) - 1, (32,))
    batch_X = X[indices]
    batch_y = y[indices]
    
    # calculate loss
    loss = create_loss(create_model(batch_X), batch_y)

    # dispay loss
    if (num_iter % 10_000) == 0:
        print(loss.item())

    # zerograd
    C.grad = None
    biases.grad = None
    weights.grad = None

    # backprop
    loss.backward()
    
    # update weights
    C.data -= lr * C.grad
    biases.data -= lr * biases.grad
    weights.data -= lr * weights.grad

2.4654412269592285
2.482477903366089
2.2996668815612793
2.384448766708374
2.2171342372894287
2.2757508754730225
2.451603651046753
2.210165023803711
2.3788421154022217
2.2792611122131348


In [109]:
create_loss(create_model(X), y).item()

2.2974960803985596