In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [4]:
#read in all the words and store the word as the list
words = open('Names(1).txt', 'r').read().splitlines()
words[:10]

['emma',
 'olivia',
 'ava',
 'isabella',
 'sophia',
 'charlotte',
 'mia',
 'amelia',
 'harper',
 'evelyn']

In [15]:
# build the vocabulary of characters and mappings to/from integers
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

stoi

{'a': 1,
 'b': 2,
 'c': 3,
 'd': 4,
 'e': 5,
 'f': 6,
 'g': 7,
 'h': 8,
 'i': 9,
 'j': 10,
 'k': 11,
 'l': 12,
 'm': 13,
 'n': 14,
 'o': 15,
 'p': 16,
 'q': 17,
 'r': 18,
 's': 19,
 't': 20,
 'u': 21,
 'v': 22,
 'w': 23,
 'x': 24,
 'y': 25,
 'z': 26,
 '.': 0}

In [56]:
#creating the dataset
block_size = 3 #context length: how many characters do we take to predict the next one
X=[]
Y=[]

for w in words[:5]:
    print(w)
    context = [0]*block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        print(''.join(itos[i] for i in context), '---->',itos[ix])
        context =context[1:] + [ix] #crop and append

X = torch.tensor(X)
Y = torch.tensor(Y)

emma
... ----> e
..e ----> m
.em ----> m
emm ----> a
mma ----> .
olivia
... ----> o
..o ----> l
.ol ----> i
oli ----> v
liv ----> i
ivi ----> a
via ----> .
ava
... ----> a
..a ----> v
.av ----> a
ava ----> .
isabella
... ----> i
..i ----> s
.is ----> a
isa ----> b
sab ----> e
abe ----> l
bel ----> l
ell ----> a
lla ----> .
sophia
... ----> s
..s ----> o
.so ----> p
sop ----> h
oph ----> i
phi ----> a
hia ----> .


In [57]:
X.shape,X.dtype,Y.shape,Y.dtype

(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

In [58]:
C = torch.randn(27,2)
C

tensor([[ 0.7381,  1.2096],
        [-1.4430,  0.9061],
        [-0.1986,  0.3821],
        [ 0.5676,  0.2617],
        [-0.9786,  0.7904],
        [-0.5567,  0.9380],
        [-0.7443, -0.9084],
        [ 0.3740, -0.1111],
        [ 0.2628, -0.0145],
        [ 1.4126, -0.8491],
        [-3.0383,  1.1061],
        [-0.6043,  0.9892],
        [ 1.1731, -0.3887],
        [ 0.3784,  1.1381],
        [-1.4229,  1.1972],
        [-0.4143, -1.0254],
        [-1.6760,  1.1273],
        [ 0.3584, -0.8080],
        [ 0.7452,  0.1732],
        [-0.9683, -0.9825],
        [-1.4335,  0.0485],
        [-0.8063, -1.6118],
        [-2.0905, -0.7433],
        [ 0.9798,  1.1296],
        [-2.5378,  0.5113],
        [-0.8040,  0.3486],
        [-0.3814, -1.6283]])

In [59]:
HOT = F.one_hot(torch.tensor(5),num_classes = 27).float()

In [60]:
print(HOT)

tensor([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.])


In [61]:
# for intuition -> this was because only the 5th place of the HOT vector was non zero
# so the result was simply the 5th row of the vector C
HOT @ C

tensor([-0.5567,  0.9380])

In [62]:
# or just use simple indexing
C[5]

tensor([-0.5567,  0.9380])

In [63]:
# the input X will be mapped into the C 
# and for every one of the 32,3 integers there is a 2 dimensional mapping in C
emd = C[X] 
print(emd.shape)
emd

torch.Size([32, 3, 2])


tensor([[[ 0.7381,  1.2096],
         [ 0.7381,  1.2096],
         [ 0.7381,  1.2096]],

        [[ 0.7381,  1.2096],
         [ 0.7381,  1.2096],
         [-0.5567,  0.9380]],

        [[ 0.7381,  1.2096],
         [-0.5567,  0.9380],
         [ 0.3784,  1.1381]],

        [[-0.5567,  0.9380],
         [ 0.3784,  1.1381],
         [ 0.3784,  1.1381]],

        [[ 0.3784,  1.1381],
         [ 0.3784,  1.1381],
         [-1.4430,  0.9061]],

        [[ 0.7381,  1.2096],
         [ 0.7381,  1.2096],
         [ 0.7381,  1.2096]],

        [[ 0.7381,  1.2096],
         [ 0.7381,  1.2096],
         [-0.4143, -1.0254]],

        [[ 0.7381,  1.2096],
         [-0.4143, -1.0254],
         [ 1.1731, -0.3887]],

        [[-0.4143, -1.0254],
         [ 1.1731, -0.3887],
         [ 1.4126, -0.8491]],

        [[ 1.1731, -0.3887],
         [ 1.4126, -0.8491],
         [-2.0905, -0.7433]],

        [[ 1.4126, -0.8491],
         [-2.0905, -0.7433],
         [ 1.4126, -0.8491]],

        [[-2.0905, -0

In [64]:
W1 = torch.randn((6, 100))
b1 = torch.randn(100)

In [65]:
# torch.cat() concatenates two tensors and it can be done along two dimensions 
# if it is done using dim 0 -> along rows and using dim1 -> columns
torch.cat([emd[:,0,:], emd[:,1,:], emd[:,2,:]], 1)

tensor([[ 0.7381,  1.2096,  0.7381,  1.2096,  0.7381,  1.2096],
        [ 0.7381,  1.2096,  0.7381,  1.2096, -0.5567,  0.9380],
        [ 0.7381,  1.2096, -0.5567,  0.9380,  0.3784,  1.1381],
        [-0.5567,  0.9380,  0.3784,  1.1381,  0.3784,  1.1381],
        [ 0.3784,  1.1381,  0.3784,  1.1381, -1.4430,  0.9061],
        [ 0.7381,  1.2096,  0.7381,  1.2096,  0.7381,  1.2096],
        [ 0.7381,  1.2096,  0.7381,  1.2096, -0.4143, -1.0254],
        [ 0.7381,  1.2096, -0.4143, -1.0254,  1.1731, -0.3887],
        [-0.4143, -1.0254,  1.1731, -0.3887,  1.4126, -0.8491],
        [ 1.1731, -0.3887,  1.4126, -0.8491, -2.0905, -0.7433],
        [ 1.4126, -0.8491, -2.0905, -0.7433,  1.4126, -0.8491],
        [-2.0905, -0.7433,  1.4126, -0.8491, -1.4430,  0.9061],
        [ 0.7381,  1.2096,  0.7381,  1.2096,  0.7381,  1.2096],
        [ 0.7381,  1.2096,  0.7381,  1.2096, -1.4430,  0.9061],
        [ 0.7381,  1.2096, -1.4430,  0.9061, -2.0905, -0.7433],
        [-1.4430,  0.9061, -2.0905, -0.7

In [69]:
# but the above block's code is not generalised what if the 
# instead of .cat on the previous code ([emd[:,0,:], emd[:,1,:], emd[:,2,:]]), use .unbind which will remove one dimension 
# note -> but both this and the code in the previous block is inefficient 
torch.cat(torch.unbind(emd,1),1)

tensor([[ 0.7381,  1.2096,  0.7381,  1.2096,  0.7381,  1.2096],
        [ 0.7381,  1.2096,  0.7381,  1.2096, -0.5567,  0.9380],
        [ 0.7381,  1.2096, -0.5567,  0.9380,  0.3784,  1.1381],
        [-0.5567,  0.9380,  0.3784,  1.1381,  0.3784,  1.1381],
        [ 0.3784,  1.1381,  0.3784,  1.1381, -1.4430,  0.9061],
        [ 0.7381,  1.2096,  0.7381,  1.2096,  0.7381,  1.2096],
        [ 0.7381,  1.2096,  0.7381,  1.2096, -0.4143, -1.0254],
        [ 0.7381,  1.2096, -0.4143, -1.0254,  1.1731, -0.3887],
        [-0.4143, -1.0254,  1.1731, -0.3887,  1.4126, -0.8491],
        [ 1.1731, -0.3887,  1.4126, -0.8491, -2.0905, -0.7433],
        [ 1.4126, -0.8491, -2.0905, -0.7433,  1.4126, -0.8491],
        [-2.0905, -0.7433,  1.4126, -0.8491, -1.4430,  0.9061],
        [ 0.7381,  1.2096,  0.7381,  1.2096,  0.7381,  1.2096],
        [ 0.7381,  1.2096,  0.7381,  1.2096, -1.4430,  0.9061],
        [ 0.7381,  1.2096, -1.4430,  0.9061, -2.0905, -0.7433],
        [-1.4430,  0.9061, -2.0905, -0.7

In [None]:
# a.view can be used to manipulate the tensor if the elements of a remain same
# a.storage() tells about how every tensor is stored in the computer

#the efficient way is to just use .view when doing the forward pass
#there is -1 because pytorch will determine itself what should be in here
