# Exercise 1: Create Tri-gram Language Model

In [60]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [61]:
# Only lower-case English letters
names_text = open("../names.txt", "r").read()
words = [f".{name}." for name in names_text.splitlines()]
words[:5]

['.emma.', '.olivia.', '.ava.', '.isabella.', '.sophia.']

In [62]:
# Creat encoding and decoding dictionaries
chars = sorted(list(set("".join(words))))
ctoi = {c: i for i, c in enumerate(chars)}
itoc = {i: c for i, c in enumerate(chars)}
ctoi['.'], itoc[0], ctoi['b'], itoc[2]

(0, '.', 2, 'b')

## Counts model

In [69]:
N = torch.zeros(len(chars), len(chars), len(chars))  # Tri-gram
for word in words:
    for i in range(len(word) - 2):
        # count how many times 3 characters appear together
        a, b, c = word[i], word[i + 1], word[i + 2]
        N[ctoi[a], ctoi[b], ctoi[c]] += 1

N = N + 1  # Laplace smoothing

N[ctoi['a'], ctoi['n'], ctoi['a']], N[ctoi['x'], ctoi['q'], ctoi['w']]

(tensor(805.), tensor(1.))

In [72]:
# Normalize the tri-gram matrix to get the probability
N = N.float()  # convert to float
P = N / N.sum(dim=2, keepdim=True)  # we want P[i][j].sum() == 1
P[14, 23].sum()

tensor(1.)

In [70]:
P[ctoi['a'], ctoi['n'], ctoi['a']], P[ctoi['x'], ctoi['q'], ctoi['w']]

(tensor(0.1473), tensor(0.0370))

In [103]:
def generate_name_stochastically(P, first_letter):
    name = "." + first_letter
    while True:
        i, j = ctoi[name[-2]], ctoi[name[-1]]
        k = torch.multinomial(P[i][j], 1).item()
        name += itoc[k]
        if name[-1] == ".":
            break
    return name[1:-1]

for i in range(5):
    name = generate_name_stochastically(P, first_letter="i")
    print(name)

ithia
ilynna
ikon
ivivi
itacehlocama


In [None]:
# Let's calculate the mean of negative log likelihood (loss function)
nll = 0
n = 1
for word in words:
    for i in range(len(word) - 2):
        a, b, c = word[i], word[i + 1], word[i + 2]
        n += 1
        likelihood = P[ctoi[a], ctoi[b], ctoi[c]]
        nll -= torch.log(likelihood)

nll / n

tensor(2.0927)

## Neural Network Model