# Exercise 6: Meta-exercise. Think of fun/interesting exercise and complete it.

My exercise is to change `names.txt` to `pokemons.txt` and try to create new pokemon name!

In [1]:
import torch
import torch.nn.functional as F
import numpy as np

In [None]:
# Only lower-case English letters
names_text = open("pokemons.txt", "r").read()
words = [f".{name.lower()}." for name in names_text.splitlines()]  # cleaned up names manually so there's no '.' in the names

# Create encoding and decoding dictionaries
chars = ['.'] + sorted(list(set("".join(words)) - set(['.'])))
ctoi = {c: i for i, c in enumerate(chars)}
itoc = {i: c for i, c in enumerate(chars)}
itoc

{0: '.',
 1: ' ',
 2: "'",
 3: '-',
 4: '2',
 5: 'a',
 6: 'b',
 7: 'c',
 8: 'd',
 9: 'e',
 10: 'f',
 11: 'g',
 12: 'h',
 13: 'i',
 14: 'j',
 15: 'k',
 16: 'l',
 17: 'm',
 18: 'n',
 19: 'o',
 20: 'p',
 21: 'q',
 22: 'r',
 23: 's',
 24: 't',
 25: 'u',
 26: 'v',
 27: 'w',
 28: 'x',
 29: 'y',
 30: 'z',
 31: 'é',
 32: '♀',
 33: '♂'}

In [3]:
# Split the words into train, test, and validation sets
np.random.shuffle(words)
train_words = words[:int(0.8*len(words))]
test_words = words[int(0.8*len(words)):int(0.9*len(words))]
dev_words = words[int(0.9*len(words)):]
n = len(words)
print(f"{len(train_words) / n:.2f}, {len(test_words) / n:.2f}, {len(dev_words) / n:.2f}")

0.80, 0.10, 0.10


## Counting model

In [4]:
class BigramCountingModel():
    def __init__(self):
        self.N = torch.zeros(len(chars), len(chars))
        self.P = torch.zeros(len(chars), len(chars))

    def train(self, train_words: list[str], laplace_alpha: int = 1):
        # Count trigrams
        for word in train_words:
            for i in range(len(word) - 1):
                a, b = word[i], word[i + 1]
                self.N[ctoi[a], ctoi[b]] += 1

        # Compute probabilities with Laplace smoothing
        self.P = (self.N + laplace_alpha).float() / (self.N + laplace_alpha).sum(dim=1, keepdim=True).float()

    def compute_loss(self, test_words: list[str]):
        """ Compute negative log-likelihood """
        nll = 0
        n = 0
        for word in test_words:
            for i in range(len(word) - 1):
                n += 1
                a, b = word[i], word[i + 1]
                nll += -torch.log(self.P[ctoi[a], ctoi[b]])
        return (nll / n).item()

## Neural Network Model

In [16]:
class BigramNeuralNetwork():
    def __init__(self) -> None:
        self.W = torch.randn(len(chars), len(chars), requires_grad=True)

    def train(self, train_words: list[str], epochs=1000, lr=0.1, l2=0.01) -> None:
        X_train, y_train = self.words_to_features(train_words)

        for _ in range(epochs):
            # forward pass
            logits = self.forward(X_train)
            loss = self.compute_loss(logits, y_train, l2)

            # backward pass
            self.W.grad = None
            loss.backward()

            # update weights
            self.W.data -= lr * self.W.grad
    
    def compute_loss(self, logits: torch.tensor, y_test: torch.tensor, alpha: float) -> float:
        cross_entropy = F.cross_entropy(logits, y_test)
        l2_regularization = alpha * torch.norm(self.W) ** 2
        return cross_entropy + l2_regularization

    def forward(self, X: torch.tensor) -> torch.tensor:
        return self.W[X]  # instead of matrix multiplication, we use fancy indexing

    def test(self, test_words: list[str]) -> float:
        X_test, y_test = self.words_to_features(test_words)
        logits = self.forward(X_test)
        return self.compute_loss(logits, y_test, alpha=0).item()
    
    def words_to_features(self, words: list[str]) -> tuple[torch.tensor, torch.tensor]:
        """ Convert a list of words to features and labels """
        xs, ys = [], []
        for word in words:
            for i in range(len(word) - 1):
                a, b = word[i], word[i + 1]
                xs.append(ctoi[a])
                ys.append(ctoi[b])

        xs = torch.tensor(xs)
        ys = torch.tensor(ys)
        return xs, ys
    
    def generate(self) -> str:
        idx = ctoi["."]
        generated = []
        while len(generated) < 3 or generated[-1] != ".":
            probs = F.softmax(self.W[idx], dim=0)
            idx = torch.multinomial(probs, 1).item()
            generated += itoc[idx]
        return "".join(generated[:-1])

## Dev models (validation set)

In [17]:
model = BigramCountingModel()
model.train(dev_words, laplace_alpha=1)  # best alpha is 1
loss = model.compute_loss(test_words)
print(loss)

2.984520196914673


In [18]:
model = BigramNeuralNetwork()
model.train(dev_words, epochs=500, lr=30, l2=0.00001)  # quite good hyperparameters
loss = model.test(test_words)
print(loss)

3.041950225830078


## Test models

In [19]:
model = BigramCountingModel()
model.train(train_words, laplace_alpha=1)  # best alpha is 1
loss = model.compute_loss(test_words)
print(loss)

2.725548267364502


In [20]:
model = BigramNeuralNetwork()
model.train(train_words, epochs=500, lr=30, l2=0.00001)  # quite good hyperparameters
loss = model.test(test_words)
print(loss)

2.715907573699951


# Generate new Pokemon names!

In [45]:
model.generate()

'angiwaloct'