In [15]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim

import matplotlib.pyplot as plt 
from tqdm import tqdm

In [16]:
device = "cpu"

In [17]:
with open("./names.txt") as f:
    words = f.read().splitlines()
    print(len(words), max(len(w) for w in words), min(len(w) for w in words))

chars = sorted(list(set((''.join(words)))))  #get unique characters 
stoi = {s:i+1 for i, s in enumerate(chars)}  # map char to int
stoi["."] = 0 
itos = {i:s for s,i in stoi.items()}         # map int to char
vocab_size = len(itos)


# build the dataset
block_size = 8 # context length: how many characters do we take to predict the next one?

def build_dataset(words):  
  X, Y = [], []
  for w in words:
    context = [0] * block_size
    for ch in w + '.':
      ix = stoi[ch]
      X.append(context)
      Y.append(ix)
      context = context[1:] + [ix] # crop and append

  X = torch.tensor(X, device=device)
  Y = torch.tensor(Y, device=device)
  print(X.shape, Y.shape)
  return X, Y

import random
random.seed(42)
random.shuffle(words)
n1 = int(0.8*len(words))
n2 = int(0.9*len(words))

Xtr, Ytr = build_dataset(words[:n1])
Xdev, Ydev = build_dataset(words[n1:n2])
Xte, Yte = build_dataset(words[n2:])


32033 15 2
torch.Size([182625, 8]) torch.Size([182625])
torch.Size([22655, 8]) torch.Size([22655])
torch.Size([22866, 8]) torch.Size([22866])


In [18]:
n_dim = 5
n_hidden = 200
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.seq = nn.Sequential(
            nn.Embedding(vocab_size,n_dim),
            nn.Flatten(),
            nn.Linear(n_dim*block_size,n_hidden),
            nn.BatchNorm1d(n_hidden),
            nn.Linear(n_hidden, vocab_size),
            nn.BatchNorm1d(vocab_size)
        )
    def forward(self, x):
        return self.seq(x)

In [19]:
model =MLP()
model.to("cpu")
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

In [20]:
its = 1000
for i in tqdm(range(its)):
  
  # minibatch construct
    ix = torch.randint(0, Xtr.shape[0], (264,))
    logits = model(Xtr[ix])
    loss = F.cross_entropy(logits, Ytr[ix])
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if i%100==0:print(loss.item())

  4%|▍         | 39/1000 [00:00<00:02, 386.76it/s]

3.760153293609619


 12%|█▏        | 121/1000 [00:00<00:01, 638.54it/s]

2.567761182785034


 20%|██        | 202/1000 [00:00<00:01, 712.53it/s]

2.390362501144409


 28%|██▊       | 284/1000 [00:00<00:00, 754.46it/s]

2.290411949157715


 45%|████▍     | 449/1000 [00:00<00:00, 793.03it/s]

2.377596616744995


 53%|█████▎    | 531/1000 [00:00<00:00, 799.33it/s]

2.1724815368652344


 61%|██████▏   | 613/1000 [00:00<00:00, 805.40it/s]

2.4194202423095703


 70%|██████▉   | 696/1000 [00:00<00:00, 810.46it/s]

2.389598846435547


 78%|███████▊  | 780/1000 [00:01<00:00, 817.22it/s]

2.4354043006896973


 95%|█████████▍| 946/1000 [00:01<00:00, 774.83it/s]

2.297168731689453


100%|██████████| 1000/1000 [00:01<00:00, 767.27it/s]


In [24]:

model.train(False)
for _ in range(20):
    
    out = []
    context = [0] * block_size # initialize with all ...
    while True:
      

      logits = model(torch.tensor([context])) # (1,block_size,d)
      probs = F.softmax(logits, dim=1)
      ix = torch.multinomial(probs, num_samples=1).item()
      context = context[1:] + [ix]
      out.append(ix)
      if ix == 0:
        break
    
    print(''.join(itos[i] for i in out))

lengeni.
lengar.
assian.
aumana.
apnnciosa.
zanchal.
ceaven.
fadinala.
mirly.
zuran.
dubry.
kalierh.
alaben.
danai.
limayni.
feere.
zaeshae.
meilon.
samiz.
rakionn.
