<a href="https://colab.research.google.com/github/ameyaoka/-makemore-/blob/main/makemore_MPL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A neural probabilistic language model



In [1]:
import torch 
import torch.nn.functional as F
import matplotlib.pyplot
%matplotlib inline

In [2]:
! wget https://raw.githubusercontent.com/karpathy/makemore/master/names.txt

--2023-06-07 07:09:50--  https://raw.githubusercontent.com/karpathy/makemore/master/names.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 228145 (223K) [text/plain]
Saving to: ‘names.txt’


2023-06-07 07:09:50 (55.2 MB/s) - ‘names.txt’ saved [228145/228145]



In [3]:
words =  open('names.txt','r').read().splitlines()

In [4]:
words[:10]

['emma',
 'olivia',
 'ava',
 'isabella',
 'sophia',
 'charlotte',
 'mia',
 'amelia',
 'harper',
 'evelyn']

In [5]:
len(words) # total vocabulary 

32033

- The set() function is used to remove duplicate characters, ensuring each character appears only once.
- list() is then used to convert the set back into a list.'        
sorted() is applied to sort the characters in alphabetical order.

In [6]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.']=0
itos = {i:s for s,i in stoi.items()}
print(itos)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


## Build the dataset 

In [9]:

block_size = 4  # how many chars serve as input for prediction of next word 
X ,Y =[],[]         # Initialize empty lists for input-output pairs.

for w in words[:5]: # iterate over words (first 5)

  print(w)              # print word 
  context = [0]*block_size      # initialize list with name context .
                                # This means that initially, the context list
                                # is filled with block_size number of zeros
                                # block_size =3 , context = [0,0,0]
  for ch in w + '.':        #Iterate over each character in the current word,
    ix= stoi[ch]            # convert the character to its corresponding index 
    X.append(context)        # Append the current context to the input list "X
    Y.append(ix)              # append current index to output list Y  
    print(''.join(itos[i] for i in context), '--->', itos[ix])# Append the current context to the input list "X
    context = context[1:] + [ix]    # Update the context by removing the first element and adding the current index
  
X = torch.tensor(X)  # Convert the input list "X" to a PyTorch tensor
Y = torch.tensor(Y)  # Convert the output list "Y" to a PyTorch tensor
]

emma
... ---> e
..e ---> m
.em ---> m
emm ---> a
mma ---> .
olivia
... ---> o
..o ---> l
.ol ---> i
oli ---> v
liv ---> i
ivi ---> a
via ---> .
ava
... ---> a
..a ---> v
.av ---> a
ava ---> .
isabella
... ---> i
..i ---> s
.is ---> a
isa ---> b
sab ---> e
abe ---> l
bel ---> l
ell ---> a
lla ---> .
sophia
... ---> s
..s ---> o
.so ---> p
sop ---> h
oph ---> i
phi ---> a
hia ---> .


In [10]:
X.shape , X.dtype , Y.shape , Y.dtype

(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

In [18]:
X # training examples

tensor([[ 0,  0,  0],
        [ 0,  0,  5],
        [ 0,  5, 13],
        [ 5, 13, 13],
        [13, 13,  1],
        [ 0,  0,  0],
        [ 0,  0, 15],
        [ 0, 15, 12],
        [15, 12,  9],
        [12,  9, 22],
        [ 9, 22,  9],
        [22,  9,  1],
        [ 0,  0,  0],
        [ 0,  0,  1],
        [ 0,  1, 22],
        [ 1, 22,  1],
        [ 0,  0,  0],
        [ 0,  0,  9],
        [ 0,  9, 19],
        [ 9, 19,  1],
        [19,  1,  2],
        [ 1,  2,  5],
        [ 2,  5, 12],
        [ 5, 12, 12],
        [12, 12,  1],
        [ 0,  0,  0],
        [ 0,  0, 19],
        [ 0, 19, 15],
        [19, 15, 16],
        [15, 16,  8],
        [16,  8,  9],
        [ 8,  9,  1]])

In [19]:
Y # labels  

tensor([ 5, 13, 13,  1,  0, 15, 12,  9, 22,  9,  1,  0,  1, 22,  1,  0,  9, 19,
         1,  2,  5, 12, 12,  1,  0, 19, 15, 16,  8,  9,  1,  0])

In [25]:
C = torch.randn((27,2))

In [20]:
C

tensor([[ 1.1151, -0.0906],
        [ 1.0786,  0.8933],
        [-0.9472, -1.0525],
        [-0.3524,  0.3774],
        [-0.1694,  1.0037],
        [-0.0382,  1.1845],
        [ 0.8486, -1.4323],
        [-1.0797,  0.7231],
        [ 1.6182,  0.7552],
        [ 0.7947,  0.1007],
        [-0.2202,  0.1874],
        [ 0.2135, -1.0952],
        [-1.5877, -0.2891],
        [ 0.9935,  1.3709],
        [ 1.0152,  1.5605],
        [-0.4630,  1.1816],
        [ 1.2581,  0.2341],
        [ 1.5843, -0.6852],
        [ 1.5703,  0.6908],
        [ 1.5425, -1.9292],
        [-0.2101, -0.0492],
        [-1.6011,  0.4968],
        [ 0.8257, -0.0965],
        [-0.4355,  0.5246],
        [ 0.8280,  0.7570],
        [-1.2060,  0.9860],
        [-0.2545,  0.6189]])

In [22]:
F.one_hot(torch.tensor(5),num_classes=27)

tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0])

In [30]:
# Take one hot vect and mulitply by C
# one_hot encoding by default is int . so need to convert to float.
F.one_hot(torch.tensor(5),num_classes=27).float() @ C


tensor([-0.1141, -2.6814])

In [26]:
C[5]

tensor([-0.1141, -2.6814])

- both output of above lines are same  .

- Pytorch indexing -- learn

In [15]:
emb = C[X]
emb.shape

torch.Size([32, 3, 2])

In [31]:
# weights
W1 = torch.randn((6,100))
# bias
b1 = torch.randn(100)   

In [36]:
torch.cat([emb[:,0,:],emb[:,1,:],emb[:,2,:]],1)

tensor([[ 1.1151, -0.0906,  1.1151, -0.0906,  1.1151, -0.0906],
        [ 1.1151, -0.0906,  1.1151, -0.0906, -0.0382,  1.1845],
        [ 1.1151, -0.0906, -0.0382,  1.1845,  0.9935,  1.3709],
        [-0.0382,  1.1845,  0.9935,  1.3709,  0.9935,  1.3709],
        [ 0.9935,  1.3709,  0.9935,  1.3709,  1.0786,  0.8933],
        [ 1.1151, -0.0906,  1.1151, -0.0906,  1.1151, -0.0906],
        [ 1.1151, -0.0906,  1.1151, -0.0906, -0.4630,  1.1816],
        [ 1.1151, -0.0906, -0.4630,  1.1816, -1.5877, -0.2891],
        [-0.4630,  1.1816, -1.5877, -0.2891,  0.7947,  0.1007],
        [-1.5877, -0.2891,  0.7947,  0.1007,  0.8257, -0.0965],
        [ 0.7947,  0.1007,  0.8257, -0.0965,  0.7947,  0.1007],
        [ 0.8257, -0.0965,  0.7947,  0.1007,  1.0786,  0.8933],
        [ 1.1151, -0.0906,  1.1151, -0.0906,  1.1151, -0.0906],
        [ 1.1151, -0.0906,  1.1151, -0.0906,  1.0786,  0.8933],
        [ 1.1151, -0.0906,  1.0786,  0.8933,  0.8257, -0.0965],
        [ 1.0786,  0.8933,  0.8257, -0.0

- **generalization of above code**

In [54]:
torch.cat(torch.unbind(emb,1),1)

tensor([[ 1.1151, -0.0906,  1.1151, -0.0906,  1.1151, -0.0906],
        [ 1.1151, -0.0906,  1.1151, -0.0906, -0.0382,  1.1845],
        [ 1.1151, -0.0906, -0.0382,  1.1845,  0.9935,  1.3709],
        [-0.0382,  1.1845,  0.9935,  1.3709,  0.9935,  1.3709],
        [ 0.9935,  1.3709,  0.9935,  1.3709,  1.0786,  0.8933],
        [ 1.1151, -0.0906,  1.1151, -0.0906,  1.1151, -0.0906],
        [ 1.1151, -0.0906,  1.1151, -0.0906, -0.4630,  1.1816],
        [ 1.1151, -0.0906, -0.4630,  1.1816, -1.5877, -0.2891],
        [-0.4630,  1.1816, -1.5877, -0.2891,  0.7947,  0.1007],
        [-1.5877, -0.2891,  0.7947,  0.1007,  0.8257, -0.0965],
        [ 0.7947,  0.1007,  0.8257, -0.0965,  0.7947,  0.1007],
        [ 0.8257, -0.0965,  0.7947,  0.1007,  1.0786,  0.8933],
        [ 1.1151, -0.0906,  1.1151, -0.0906,  1.1151, -0.0906],
        [ 1.1151, -0.0906,  1.1151, -0.0906,  1.0786,  0.8933],
        [ 1.1151, -0.0906,  1.0786,  0.8933,  0.8257, -0.0965],
        [ 1.0786,  0.8933,  0.8257, -0.0

In [41]:
a = torch.arange(18)

In [42]:
a.shape

torch.Size([18])

In [47]:
a.view(3,3,2)

tensor([[[ 0,  1],
         [ 2,  3],
         [ 4,  5]],

        [[ 6,  7],
         [ 8,  9],
         [10, 11]],

        [[12, 13],
         [14, 15],
         [16, 17]]])

In [48]:
a.view(9,2)

tensor([[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7],
        [ 8,  9],
        [10, 11],
        [12, 13],
        [14, 15],
        [16, 17]])

- storage remains same but seen as different 
-Blog below goes in depth 
- http://blog.ezyang.com/2019/05/pytorch-internals/

In [45]:
a.storage()

  a.storage()


 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
[torch.storage.TypedStorage(dtype=torch.int64, device=cpu) of size 18]

- more effecient way . 

In [53]:
emb.view([32,6]) == torch.cat(torch.unbind(emb,1),1)

tensor([[True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, True, True],
        [True, True, True, True, T