In [1]:
import numpy as np
import torch

In [2]:
words = open("names.txt", 'r').read().splitlines()

In [3]:
words[:10]

['emma',
 'olivia',
 'ava',
 'isabella',
 'sophia',
 'charlotte',
 'mia',
 'amelia',
 'harper',
 'evelyn']

In [4]:
len(words)

32033

In [5]:
min(len(w) for w in words)

2

In [6]:
max(len(w) for w in words)

15

In [7]:
word_len = [len(w) for w in words]

In [8]:
np.mean(word_len)

np.float64(6.122217712983486)

In [9]:
np.median(word_len)

np.float64(6.0)

In [10]:
chars = sorted(list(set(''.join(words))))
chars = ['.'] + chars
stoi = {s:i for i, s in enumerate(chars)}
itos = {v:k for k, v in stoi.items()}
bigram_cnt = torch.zeros((27, 27), dtype=torch.int)

In [11]:
bigram_prob = bigram_cnt.float()
bigram_prob /= bigram_prob.sum(1, keepdims=True)

In [12]:
for w in words:
  chars = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(chars, chars[1:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    bigram_cnt[ix1, ix2] += 1

In [13]:
bigram_prob =( bigram_cnt + 1).float() #Smoothening the model
bigram_prob /= bigram_prob.sum(1, keepdims=True)

In [14]:
g = torch.Generator().manual_seed(2147483647)

In [15]:
names = []
for i in range(10):
  ix = 0
  name = ""
  while True:
    p = bigram_prob[ix]
    ix = torch.multinomial(p, 1, replacement=True, generator=g).item()
    if ix == 0:
      break
    name += itos[ix]
  names.append(name)

In [16]:
names

['junide',
 'janasah',
 'p',
 'cony',
 'a',
 'nn',
 'kohin',
 'tolian',
 'juee',
 'ksahnaauranilevias']

In [86]:
negative_log_likelihod = 0.0
n = 0

for w in words:
  for ch1, ch2 in zip(w, w[1:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    prob = bigram_prob[ix1, ix2]
    negative_log_likelihod -= torch.log(prob)
    n += 1

print(negative_log_likelihod)
print(negative_log_likelihod/n)

tensor(412482.3750)
tensor(2.5139)


In [18]:
negative_log_likelihod = 0.0
n = 0

for w in ["andrejq"]:
  for ch1, ch2 in zip(w, w[1:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    prob = bigram_prob[ix1, ix2]
    print(f'{ch1}-->{ch2}: {prob}')
    negative_log_likelihod -= torch.log(prob)
    n += 1

print(negative_log_likelihod)
print(negative_log_likelihod/n)

a-->n: 0.16038569808006287
n-->d: 0.03841124475002289
d-->r: 0.07695093005895615
r-->e: 0.13341714441776276
e-->j: 0.002738386392593384
j-->q: 0.0003416467516217381
tensor(23.5506)
tensor(3.9251)


In [19]:
xs, ys = [], []

for w in words:
  w = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(w, w[1:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    xs.append(ix1)
    ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [81]:
loss = -bigram_prob[xs, ys].log().mean()
loss

tensor(2.5143)

In [22]:
xenc = torch.nn.functional.one_hot(xs, num_classes=-1)
xenc = xenc.float()
xenc.shape

torch.Size([228146, 27])

In [23]:
xenc.dtype

torch.float32

In [27]:
W = torch.randn((27, 27))

In [28]:
xenc @ W

tensor([[ 0.8736,  0.0416, -1.8841,  ..., -0.4928,  0.4273, -0.4178],
        [-0.9713,  0.3998, -1.6725,  ...,  0.1839,  1.8576, -1.2454],
        [-1.4348, -1.4738,  1.4196,  ..., -1.5410,  0.3662,  1.5975],
        ...,
        [-0.6122,  1.6455, -0.5917,  ..., -0.0190, -1.9781, -0.0859],
        [-0.3509,  1.1042,  0.9445,  ...,  0.0254,  0.1715, -0.7139],
        [ 0.3958,  1.9919,  1.0650,  ...,  0.2210,  0.0997, -1.1529]])

In [33]:
logits = xenc @ W
logits

tensor([[ 0.8736,  0.0416, -1.8841,  ..., -0.4928,  0.4273, -0.4178],
        [-0.9713,  0.3998, -1.6725,  ...,  0.1839,  1.8576, -1.2454],
        [-1.4348, -1.4738,  1.4196,  ..., -1.5410,  0.3662,  1.5975],
        ...,
        [-0.6122,  1.6455, -0.5917,  ..., -0.0190, -1.9781, -0.0859],
        [-0.3509,  1.1042,  0.9445,  ...,  0.0254,  0.1715, -0.7139],
        [ 0.3958,  1.9919,  1.0650,  ...,  0.2210,  0.0997, -1.1529]])

In [36]:
counts = logits.exp()
counts

tensor([[2.3955, 1.0425, 0.1520,  ..., 0.6109, 1.5331, 0.6585],
        [0.3786, 1.4916, 0.1878,  ..., 1.2019, 6.4081, 0.2878],
        [0.2382, 0.2291, 4.1356,  ..., 0.2142, 1.4422, 4.9406],
        ...,
        [0.5421, 5.1837, 0.5534,  ..., 0.9812, 0.1383, 0.9177],
        [0.7040, 3.0168, 2.5714,  ..., 1.0257, 1.1870, 0.4897],
        [1.4855, 7.3294, 2.9010,  ..., 1.2473, 1.1048, 0.3157]])

In [38]:
probs = counts / counts.sum(1, keepdims=True)
probs

tensor([[0.0936, 0.0407, 0.0059,  ..., 0.0239, 0.0599, 0.0257],
        [0.0093, 0.0365, 0.0046,  ..., 0.0294, 0.1570, 0.0071],
        [0.0049, 0.0047, 0.0857,  ..., 0.0044, 0.0299, 0.1024],
        ...,
        [0.0113, 0.1083, 0.0116,  ..., 0.0205, 0.0029, 0.0192],
        [0.0170, 0.0730, 0.0622,  ..., 0.0248, 0.0287, 0.0118],
        [0.0339, 0.1675, 0.0663,  ..., 0.0285, 0.0252, 0.0072]])

In [97]:
xs = []
ys = []
word = '.emma.'
for ch1, ch2 in zip(word, word[1:]):
  xs.append(stoi[ch1])
  ys.append(stoi[ch2])
xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [98]:
xs

tensor([ 0,  5, 13, 13,  1])

In [99]:
ys

tensor([ 5, 13, 13,  1,  0])

In [100]:
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)

In [101]:
xenc = torch.nn.functional.one_hot(xs, num_classes=27).float()
logits = xenc @ W
counts = logits.exp()
probs = counts / counts.sum(1, keepdims=True)
loss = -probs[torch.arange(5), ys].log().mean()
loss.item()

3.7693049907684326

In [102]:
xs, ys = [], []

for w in words:
  w = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(w, w[1:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    xs.append(ix1)
    ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
xenc = torch.nn.functional.one_hot(xs, num_classes=27).float()

In [107]:
for k in range(100):
  logits = xenc @ W
  counts = logits.exp()
  probs = counts / counts.sum(1, keepdims=True)
  loss = -probs[torch.arange(len(xenc)), ys].log().mean() + 0.01*(W**2).mean() # (This is regularization)

  W.grad = None
  loss.backward()

  W.data -= 50 * W.grad

  if (k + 1) % 10 == 0:
    print(f'Epoch: {k+1}, Loss: {loss.item()}')

Epoch: 10, Loss: 2.481760263442993
Epoch: 20, Loss: 2.4816858768463135
Epoch: 30, Loss: 2.4816172122955322
Epoch: 40, Loss: 2.481553316116333
Epoch: 50, Loss: 2.4814937114715576
Epoch: 60, Loss: 2.481438636779785
Epoch: 70, Loss: 2.481386661529541
Epoch: 80, Loss: 2.4813385009765625
Epoch: 90, Loss: 2.481292724609375
Epoch: 100, Loss: 2.481250762939453


In [135]:
g = torch.Generator().manual_seed(2147483647)
names = []
for i in range(10):
  ix = torch.tensor(0)
  name = ""
  while True:
    ix = torch.nn.functional.one_hot(torch.tensor([ix]), num_classes=27).float()
    logits = ix @ W
    probs = logits.softmax(1)
    ix = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
    if ix == 0:
      break
    name += itos[ix]
  names.append(name)

names

['junide',
 'janasah',
 'p',
 'cfay',
 'a',
 'nn',
 'kohin',
 'tolian',
 'juwe',
 'ksahnaauranilevias']