In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt # for making figures
%matplotlib inline

In [2]:
#Reading the words
words = open("names.txt", "r").read().splitlines()
#words[:10]

In [3]:
len(words)

32033

In [4]:
#Mapping characters to integers

chars = sorted(list(set("".join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi["."] = 0
itos = {i:s for s,i in stoi.items()}
print(itos)


{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


In [5]:
#Building the Dataset
block_size = 3
X, y = [], []

for w in words[:3]:
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        y.append(ix)
        print("".join(itos[i] for i in context), '------>', itos[ix])
        context = context[1:] + [ix]

X = torch.tensor(X)
y = torch.tensor(y)

... ------> e
..e ------> m
.em ------> m
emm ------> a
mma ------> .
... ------> o
..o ------> l
.ol ------> i
oli ------> v
liv ------> i
ivi ------> a
via ------> .
... ------> a
..a ------> v
.av ------> a
ava ------> .


In [None]:
#Explanation for how context is just a series of numbers that acts as input
#And y is the next character in the form of a number.

block_size = 3
context = [0] * block_size
print(context)

[0, 0, 0]


In [14]:
X

tensor([[ 0,  0,  0],
        [ 0,  0,  5],
        [ 0,  5, 13],
        [ 5, 13, 13],
        [13, 13,  1],
        [ 0,  0,  0],
        [ 0,  0, 15],
        [ 0, 15, 12],
        [15, 12,  9],
        [12,  9, 22],
        [ 9, 22,  9],
        [22,  9,  1],
        [ 0,  0,  0],
        [ 0,  0,  1],
        [ 0,  1, 22],
        [ 1, 22,  1]])

In [6]:
X.shape, X.dtype, y.shape, y.dtype

(torch.Size([16, 3]), torch.int64, torch.Size([16]), torch.int64)

In [7]:
#Creating a look up table. 

C = torch.randn(27,2)

In [8]:
C[X].shape

torch.Size([16, 3, 2])

In [18]:
X[1]

tensor([0, 0, 5])

In [17]:
C[X][1]

tensor([[ 1.9613,  0.4022],
        [ 1.9613,  0.4022],
        [-1.3470, -1.3839]])

In [19]:
C[X][2]

tensor([[ 1.9613,  0.4022],
        [-1.3470, -1.3839],
        [-0.2980,  1.6116]])

In [9]:
C[X][13,2]

tensor([0.4962, 0.0134])

In [10]:
C[1]

#Both are same  

tensor([0.4962, 0.0134])

In [13]:
emb = C[X]
emb.shape

torch.Size([16, 3, 2])

In [20]:
#Creating weights and biases. We wanna keep 100 neurons so 100 and 6 is 3*2 of emb

w1 = torch.randn((6, 100))
b1 = torch.randn(100)

In [21]:
#Calculating hidden state. embbeddings * weights + biases
# -1 is for torch to calculate the right number there
#view is to make the emb in the shape of weights
h = torch.tanh(emb.view(-1,6) @ w1 + b1)

In [22]:
h

tensor([[-0.8012, -0.9996,  0.8319,  ..., -0.0345,  0.8601,  0.9983],
        [-0.8621, -0.9630, -0.9987,  ..., -0.0236,  0.9990,  0.0890],
        [-1.0000,  0.6536,  0.9984,  ..., -0.9701,  0.4885,  0.7377],
        ...,
        [-0.7096, -0.9939, -0.7123,  ..., -0.4919,  0.9827,  0.9686],
        [-0.9883, -0.8612, -0.5645,  ..., -0.6471,  0.9573,  0.4588],
        [-0.9643, -0.9027,  0.6296,  ...,  0.7748, -0.0200,  0.4261]])

In [23]:
h.shape

torch.Size([16, 100])

In [24]:
w2 = torch.randn(100,27)
b2 = torch.randn(27)

In [25]:
logits = h @ w2 + b2

In [26]:
logits.shape

torch.Size([16, 27])

In [27]:
logits

tensor([[ -2.2922,   1.0099,   7.7177,   4.1158, -14.7814,  -0.2132,   8.5855,
          -8.8554,   4.6468,  -7.1532,   9.5806,  -1.3124, -10.7530,   0.8671,
          -7.7361,  -2.5509,  -3.5995,  -5.4315,  -9.0808,  14.8636,  15.3210,
          10.7489,  -6.9169, -11.6202,   2.2754,  -6.3758,  -6.6998],
        [ 12.3477,  12.4787,  -2.9752,   1.2653,  -2.6013, -10.7776,   7.8482,
          -0.1533,  10.2324,   0.9855,  -1.3126,  -4.8551, -16.5062,   4.1195,
           4.1571,  -0.8503,  -2.5413,  -0.5315,  -2.5453,  -0.3874, -11.1146,
          16.4266,  -2.7185,  -7.2709,   9.2562,   5.4122,  -9.2477],
        [  4.0722, -10.5449,  -3.1746,  -7.4805,  -9.2137,  -2.9954,  12.6575,
         -11.3637,  13.5825,  -3.8295,  12.8851,   4.3709,  -6.5801,  -3.4843,
          -3.4049,  -4.5619,  -8.5768,   6.2636,  12.4073,   1.7702,  13.0608,
           5.8764,   8.9249,   2.5490, -10.0133,  18.7885,   3.5187],
        [  1.7781, -10.1276,   7.8367,   9.6403,   9.6147,  -2.6394,  -2.5953,


In [28]:
counts = logits.exp()

In [29]:
probs = counts / counts.sum(1, keepdim=True)

In [30]:
probs.shape

torch.Size([16, 27])

In [34]:
probs[0].sum()

tensor(1.)

In [37]:
y

tensor([ 5, 13, 13,  1,  0, 15, 12,  9, 22,  9,  1,  0,  1, 22,  1,  0])

In [39]:
probs[torch.arange(16),y]

#This means that because our probs has the probabilitites for all the 27 output classes.
#I need only the probability of the true label. Probs gives me diff probability for diff output possibility.

tensor([1.0878e-07, 4.3486e-06, 2.0911e-10, 8.8768e-12, 2.8266e-08, 1.0503e-08,
        1.7003e-14, 4.0201e-11, 1.3799e-11, 2.9903e-12, 1.4277e-12, 2.4721e-03,
        3.6962e-07, 1.2773e-10, 1.2821e-03, 5.8332e-09])

In [43]:
#Calculating negative log likelihood


-probs[torch.arange(16), y].log().mean()

tensor(19.7213)

In [47]:
###--------------------Making it respectable---------------------####

#    --------The random generators -------------       #
g = torch.Generator().manual_seed(2147483647)
C = torch.randn(27,2, generator=g)
W1 = torch.randn(6,100, generator=g)
b1 = torch.randn(100, generator=g)
W2 = torch.randn(100,27, generator=g)
b2 = torch.randn(27, generator=g)
parameters = [C, W1, b1, W2, b2]


In [62]:
sum(parameters[i].nelement() for i in range(len(parameters)))

#nelements() is to calculate the no of elements i.e., n*m

3481

AttributeError: 'list' object has no attribute 'size'