In [None]:
words = open('../dataset/names.txt', 'r').read().splitlines()
words[0:5]

In [None]:
b = {}
for w in words:
    chs = ['<S>'] + list(w) + ['<E>']
    for ch1, ch2 in zip(chs, chs[1:]):
        bigram = (ch1, ch2)
        b[bigram] = b.get(bigram, 0) + 1

In [None]:
sorted(b.items(), key= lambda x: -x[1])

In [None]:
import torch
N = torch.zeros((27,27), dtype=torch.int32)
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1, ix2 = ord(ch1) - 97 + 1 if ch1 != "." else 0, ord(ch2) - 97 + 1 if ch2 != "." else 0
        N[ix1][ix2] += 1
N

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(16,16))
plt.imshow(N, cmap='Blues')
for i in range(27):
    for j in range(27):
        x = chr(i+97 - 1)
        y = chr(j+97 - 1)
        if i == 0:
            x = "."
        if j == 0:
            y = "."
        chstr = x + y
        plt.text(j, i, chstr, ha="center", va="bottom", color='black')
        plt.text(j, i, N[i, j].item(), ha="center", va="top", color='black')
plt.axis('off')

In [None]:
p = N[0].float()
p = p / p.sum()
p

In [None]:
g = torch.Generator().manual_seed(2147483647)
ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g)
ix

In [None]:
P = (N+1).float()
P /= P.sum(1, keepdim=True)

In [None]:
g = torch.Generator().manual_seed(2147483647)
for i in range(10):
    ix = 0
    out = []
    while True:
        p =  P[ix]
        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        out.append(chr(ix + 97 - 1) if ix != 0 else ".")
        if ix == 0:
            break
    print(''.join(out))


In [None]:
log_likelihood = 0.0
n = 0
for w in ["dushyantq"]:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        n += 1
        ix1, ix2 = ord(ch1) - 97 + 1 if ch1 != "." else 0, ord(ch2) - 97 + 1 if ch2 != "." else 0
        prob = P[ix1, ix2]
        logprob = torch.log(prob)
        log_likelihood += logprob
        print(f'{ch1, ch2}: {prob: .4f}: {logprob: .4f}')

print(f'{log_likelihood=}')
nll = -log_likelihood
print(f'{nll=}')
print(f'{nll/n=}')


In [None]:
# create training sets of bigrams(x,y)
xs, ys = [], []

for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1, ix2 = ord(ch1) - 97 + 1 if ch1 != "." else 0, ord(ch2) - 97 + 1 if ch2 != "." else 0
        xs.append(ix1)
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print('number of examples: ', num)

g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27,27), generator=g, requires_grad=True)

In [None]:
import torch.nn.functional as F

for k in range(100):
    xenc = F.one_hot(xs, num_classes=27).float()
    
    #forward
    logits = (xenc @ W) # how this is logits need to check
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdims=True)
    loss = -probs[torch.arange(num), ys].log().mean() # what does this actually mean
    print(loss.item())

    #backward
    W.grad = None
    loss.backward()

    #optimiser 
    W.data += -50 * W.grad