<a href="https://colab.research.google.com/github/erfan7emz/NeuralNet/blob/main/gpt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [82]:
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
words = open('names.txt', 'r').read().splitlines()

In [83]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}

In [86]:
xs = [] #input to the nn
ys = [] #correct next char

for w in words:
  chs = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(chs, chs[1:]):
    i1 = stoi[ch1]
    i2 = stoi[ch2]
    xs.append(i1)
    ys.append(i2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print('number of examples: ', num)

g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)  # initialize 27 neurons' weights. each neuron receives 27 inputs

number of examples:  228146


In [89]:
for k in range(100):

  # forward pass
  xenc = F.one_hot(xs, num_classes=27).float() # input to the nn
  logits =xenc @ W # log-counts
  counts = logits.exp() #equivalent to counts
  # the next two lines are called softmax ,a function to normalize probabilities
  probs = counts / counts.sum(1, keepdims=True)
  loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean() # vectorize (and better) approach for calculating loss
  print(loss.item())

  # backward pass
  W.grad = None
  loss.backward()

  # update
  W.data += -50 * W.grad

2.4829699993133545
2.4829440116882324
2.482919216156006
2.482893943786621
2.4828693866729736
2.4828450679779053
2.482821226119995
2.482797622680664
2.482774257659912
2.4827513694763184
2.4827287197113037
2.482706308364868
2.482684373855591
2.4826626777648926
2.4826409816741943
2.4826197624206543
2.4825992584228516
2.4825785160064697
2.482558250427246
2.4825379848480225
2.482517957687378
2.4824984073638916
2.4824795722961426
2.4824600219726562
2.4824411869049072
2.4824228286743164
2.4824044704437256
2.4823861122131348
2.4823684692382812
2.4823505878448486
2.482333183288574
2.4823157787323
2.482299327850342
2.4822819232940674
2.4822654724121094
2.4822492599487305
2.4822328090667725
2.4822168350219727
2.482201099395752
2.482185125350952
2.4821696281433105
2.4821548461914062
2.4821395874023438
2.4821243286132812
2.482109785079956
2.482095241546631
2.4820809364318848
2.4820668697357178
2.4820523262023926
2.482038736343384
2.482024908065796
2.4820117950439453
2.4819977283477783
2.48198461532

In [26]:
# nlls = torch.zeros(5)
# for i in range(5):
#   x = xs[i].item()
#   y = ys[i].item()
#   print('--------')
#   print(f'bigram example {i+1}: {itos[x]}{itos[y]} (indexes {x}, {y})')
#   print('input to the neural net: ', x)
#   print('output probabilies from the neural net', probs[i])
#   print('label (actual next character):', y)
#   p = probs[i, y]
#   print('probability assigned by the net to the correct character:', p.item())
#   logp = torch.log(p)
#   print('log likelihood:', logp.item())
#   nll = -logp
#   print('negative log likelihood:', nll.item()) # higher is worse because it means bigger loss
#   nlls[i] = nll

# print('=========')
# print('average negative log lokelihood, i.e. loss =', nlls.mean().item())

--------
bigram example 1: .e (indexes 0, 5)
input to the neural net:  0
output probabilies from the neural net tensor([0.0607, 0.0100, 0.0123, 0.0042, 0.0168, 0.0123, 0.0027, 0.0232, 0.0137,
        0.0313, 0.0079, 0.0278, 0.0091, 0.0082, 0.0500, 0.2378, 0.0603, 0.0025,
        0.0249, 0.0055, 0.0339, 0.0109, 0.0029, 0.0198, 0.0118, 0.1537, 0.1459])
label (actual next character): 5
probability assigned by the net to the correct character: 0.01228625513613224
log likelihood: -4.399273872375488
negative log likelihood: 4.399273872375488
--------
bigram example 2: em (indexes 5, 13)
input to the neural net:  5
output probabilies from the neural net tensor([0.0290, 0.0796, 0.0248, 0.0521, 0.1989, 0.0289, 0.0094, 0.0335, 0.0097,
        0.0301, 0.0702, 0.0228, 0.0115, 0.0181, 0.0108, 0.0315, 0.0291, 0.0045,
        0.0916, 0.0215, 0.0486, 0.0300, 0.0501, 0.0027, 0.0118, 0.0022, 0.0472])
label (actual next character): 13
probability assigned by the net to the correct character: 0.0180507004

In [90]:
for i in range(5):
  out = []
  ix = 0
  while True:
    xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()
    logits = xenc @ W
    counts = logits.exp()
    p = counts / counts.sum(1, keepdims=True) # probability for next character

    ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
    out.append(itos[ix])
    if ix ==0:
      break
  print(''.join(out))


morvann.
akela.
az.
arileri.
chaiadayra.
