Game Plan
* MLP
* Predict next letter from last x letters
* Layers - embed

In [2]:
from functools import reduce
from typing import Tuple
import torch
from torch import nn

# Marshall the training data

In [3]:
context_letters = 3
start_char = '^'
end_char = '$'

In [4]:
with open('names.txt', 'r') as file:
    names = [line.strip() for line in file.readlines()]
print(names[:5])

['emma', 'olivia', 'ava', 'isabella', 'sophia']


In [5]:
letters = set(reduce(lambda a,b: a + b, names) + start_char + end_char)
itol = dict(enumerate(letters))
ltoi = dict([(l, i) for (i, l) in itol.items()])
list(map(lambda l: itol[ltoi[l]], 'hellothere'))

['h', 'e', 'l', 'l', 'o', 't', 'h', 'e', 'r', 'e']

In [6]:
def name_to_examples(name: str) -> Tuple[torch.Tensor, torch.Tensor]:
    name = start_char * context_letters + name + end_char
    name_pieces = [name[i: i+context_letters+1] for i in range(len(name)-context_letters)]
    def indexify(l_list: list[str]) -> torch.tensor:
        return torch.tensor(list(map(lambda l: ltoi[l], l_list)))
    return torch.stack(list(map(indexify, name_pieces)))

In [7]:
name_to_examples('dave')

tensor([[24, 24, 24, 18],
        [24, 24, 18,  1],
        [24, 18,  1,  6],
        [18,  1,  6,  2],
        [ 1,  6,  2,  3]])

In [8]:
examples = list(map(name_to_examples, names))
examples = torch.cat(examples)
x = examples[:, :-1]
y = examples[:, -1]
print(examples[:2])
print(x[:2])
print(y[:2])

tensor([[24, 24, 24,  2],
        [24, 24,  2,  0]])
tensor([[24, 24, 24],
        [24, 24,  2]])
tensor([2, 0])


In [9]:
x_one_hot = nn.functional.one_hot(x, num_classes=len(letters))
x_one_hot = x_one_hot.float()
x_one_hot = x_one_hot.view(x_one_hot.size(0), -1)
x_one_hot[:2]

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

# Set up the NN

In [10]:
class MyMLP(nn.Module):
    def __init__(self, dict_size, context_size, embedding_size):
        super().__init__()
        self.linear_stack = nn.Sequential(
            nn.Linear(dict_size * context_size, embedding_size),
            nn.Tanh(),
            nn.Linear(embedding_size, dict_size),
        )

    def forward(self, x):
        return self.linear_stack(x)

In [11]:
model = MyMLP(len(letters), context_letters, 30)
print(model)
output = model(x_one_hot)


MyMLP(
  (linear_stack): Sequential(
    (0): Linear(in_features=84, out_features=30, bias=True)
    (1): Tanh()
    (2): Linear(in_features=30, out_features=28, bias=True)
  )
)


In [12]:
output[:2]

tensor([[-0.0720,  0.0284,  0.0749, -0.0220,  0.0160, -0.0806,  0.0462,  0.1029,
          0.1939,  0.0630, -0.1704, -0.1799,  0.1714,  0.1385,  0.0494, -0.1409,
          0.0757,  0.1016,  0.2202,  0.0439, -0.1749, -0.0408,  0.0902,  0.2051,
          0.0963,  0.0976, -0.0517,  0.0369],
        [-0.0922, -0.0750,  0.0398, -0.0942,  0.0486,  0.0283,  0.0933,  0.0460,
          0.1685,  0.1499, -0.2231, -0.1190,  0.1185,  0.0377,  0.0456, -0.1873,
          0.0585,  0.0776,  0.1892,  0.0165, -0.1636, -0.0451, -0.0117,  0.1514,
          0.1046,  0.0754, -0.0033,  0.0542]], grad_fn=<SliceBackward0>)

# Generate names

In [78]:
# Start with word beginnings
num_gen = 5
gen = torch.tensor([ltoi[start_char]] * context_letters * num_gen, dtype=int)
print(gen.size())
gen = gen.view(num_gen, context_letters)
print(gen.size())
print(gen.view(-1).size())
gen

torch.Size([15])
torch.Size([5, 3])
torch.Size([15])


tensor([[24, 24, 24],
        [24, 24, 24],
        [24, 24, 24],
        [24, 24, 24],
        [24, 24, 24]])

In [107]:
print(gen.size())

torch.Size([5, 17])


In [106]:
gen_one_hot = nn.functional.one_hot(gen[:,-context_letters:].reshape(-1, 1), num_classes=len(letters)).float()
gen_one_hot = gen_one_hot.view(num_gen, context_letters * len(letters))
gen_output = model(gen_one_hot)
gen_probs = nn.functional.softmax(gen_output, dim=1)
gen_samples = torch.multinomial(gen_probs, 1)
print([itol[sample.item()] for sample in gen_samples])
gen = torch.cat([gen, gen_samples], dim=1)

['^', 'i', 'r', 'a', 'm']


In [64]:
gen

tensor([[24, 24, 24, 23],
        [24, 24, 24, 13],
        [24, 24, 24,  8],
        [24, 24, 24, 22],
        [24, 24, 24,  8]])

torch.Size([5, 3])
torch.Size([5, 1])


tensor([[24, 24, 24, 22],
        [24, 24, 24, 18],
        [24, 24, 24,  5],
        [24, 24, 24,  4],
        [24, 24, 24,  6]])

In [25]:
probabilities = nn.functional.softmax(output, dim=1)

# Sample from the categories
sample = torch.multinomial(probabilities, 1)

print(sample)

tensor([[12],
        [ 0],
        [24],
        ...,
        [ 2],
        [ 7],
        [17]])
