<a href="https://colab.research.google.com/github/israel-adewuyi/name_generator/blob/main/vanila_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [2]:
names = open('names.txt', 'r').read().split()
names[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [3]:
characters = sorted(list(set(''.join(names))))

chars_to_idx = {ch : i + 1 for i, ch in enumerate(characters)}
chars_to_idx['.'] = 0

idx_to_chars = {i : ch for ch, i in chars_to_idx.items()}


In [61]:
xs, ys = [], []

for name in names:
  context = ['.'] + list(name) + ['.']
  for ch1, ch2 in zip(context, context[1:]):
    idx1 = chars_to_idx[ch1]
    idx2 = chars_to_idx[ch2]
    xs.append(idx1)
    ys.append(idx2)


xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print('number of examples: ', num)

g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)
# W = torch.zeros((27, 27), requires_grad=True)

number of examples:  228146


In [None]:
"""
  Link - https://www.perplexity.ai/search/During-weights-initialization-OvQV297tQoGz1ANBTRiljg?s=c
  Having weights as close to zero as possible reduces the occurence of zero
    gradients or infinity gradients, .i.e vanishing gradients or exploding
    gradients. In this implementation where we are also dealing with probabilities
    in the final layer, this also has an effect of making the output distribution
    more uniform.

  Regularization has a second order effect of 'incentivizing' the weights to
    stay close to zero as much as possible, specifically, L2 regularization.
    Why not just set the weights to zero initially?

"""

In [64]:
for i in range(1000):
  xenc = F.one_hot(xs, num_classes=27).float()
  logits = xenc @ W
  count = logits.exp()
  prob = count / count.sum(1, keepdim=True)
  loss = -(prob[torch.arange(num), ys].log().mean()) + 0.01 * (W ** 2).mean()

  # if i % 100 == 0:
  #   print(f'loss = {loss.item()}')

  W.grad = None
  loss.backward()

  W.data += -50 * W.grad

print(loss.item())

2.4803996086120605


In [67]:
g = torch.Generator().manual_seed(2147483647)

for i in range(5):
  name = []
  idx = 0

  while True:
    xenc = F.one_hot(torch.tensor([idx]), num_classes=27).float()
    logits = xenc @ W
    count = logits.exp()
    p = count / count.sum(1, keepdim=True)

    ix = torch.multinomial(p, num_samples=1, replacement=True, generator = g).item()

    name.append(idx_to_chars[ix])
    if ix == 0:
      break

  print(''.join(name))

jsnjdmdjakakazjpscfaawacnmakjhirltolsaasjtezmksaanaacyamblevhajkdbdainrwimtldsnjsieaylartbzfnvmumtkafodtmmjmpfnasljjhrjaejrcoraayaejockkajjabdjwejkmrkimwynsnsagaasnhsjjimsmszshddgosfmataaciczlrjpaakmthdtejksrsratiaamdjmrrmjekjjcmkhkdbwahlpkmjccmarrrjdkhjadzttajmplyrabalahagiblammpocblthemamatawatlfbmsdljktacslrtikdzsalaezmrbnkndekkfmbjsaemrdaatearchwkmjrzsodrjdcdzjajjalrkpjrghaetazrwaecaakvywhqelvaemhsahibdhakeakgmtmjbracabdoinapnkakjakedewbkjgkecfdacrkcarvjrjkmsbranmjrdskdkmafvkdkbdekihakazsrdabktadoljbasrtsgttbbkbujfdtskcenjtcdlcndfrjwlasdpskltalmlokdmaldfadmrjmsmfslamrdvdihkmngaojmrdsynivcebkujkenajgtjvnvjjajtkmmfrafjlaglykralkohmdjttrzylkrmsgirjdmhtipagkadvjapnsjiadmjejnlkcacvataarsakajskbrlrjsjjpbebazkwkejbiktzsnkrabbhhdsisajbsaklmodvabjkaktcatazkawslertacsmbadayrajfbalqdocmeyjojbzszsdbdanlhtnkkdhetsmltmcconakfnllnzaskmkhtmsbnakisemrtmlwwlsxakfkcdlsekdajljagkjmsmfbadkkmcnaylrlasjvyeb.
dljbhmscjsemtozsezhksjammnkkadsjmarfzbampqhkdmtddjakvkeakatvdyrvtajhmksdtitmjljatkljbmhgjtpekazah