In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
with open("../data/input.txt", 'r') as f:
    text = f.read().splitlines()

In [3]:
words = sorted(list(set(" ".join(text).split())))
print(len(words))

stoi = {w:i for i, w in enumerate(words)}
itoi = {i:w for i, w in enumerate(words)}

encode = lambda text: [stoi[w] for w in text]
decode = lambda tokens: " ".join([itoi[token] for token in tokens])

15


In [4]:
text = " ".join(text).split()

In [5]:
data = torch.tensor(encode(text), dtype=torch.long)

decode(data.tolist())

'I love AI and NL I am ML and DL engineer it is good to see you'

In [6]:
block_size = len(data)-1

x = data[:block_size]
y = data[1:block_size+1:]

for i in range(block_size):
  context = x[:i+1]
  target = y[i]
  print(f"Input: {context} --> target={target}")

Input: tensor([2]) --> target=11
Input: tensor([ 2, 11]) --> target=0
Input: tensor([ 2, 11,  0]) --> target=6
Input: tensor([ 2, 11,  0,  6]) --> target=4
Input: tensor([ 2, 11,  0,  6,  4]) --> target=2
Input: tensor([ 2, 11,  0,  6,  4,  2]) --> target=5
Input: tensor([ 2, 11,  0,  6,  4,  2,  5]) --> target=3
Input: tensor([ 2, 11,  0,  6,  4,  2,  5,  3]) --> target=6
Input: tensor([ 2, 11,  0,  6,  4,  2,  5,  3,  6]) --> target=1
Input: tensor([ 2, 11,  0,  6,  4,  2,  5,  3,  6,  1]) --> target=7
Input: tensor([ 2, 11,  0,  6,  4,  2,  5,  3,  6,  1,  7]) --> target=10
Input: tensor([ 2, 11,  0,  6,  4,  2,  5,  3,  6,  1,  7, 10]) --> target=9
Input: tensor([ 2, 11,  0,  6,  4,  2,  5,  3,  6,  1,  7, 10,  9]) --> target=8
Input: tensor([ 2, 11,  0,  6,  4,  2,  5,  3,  6,  1,  7, 10,  9,  8]) --> target=13
Input: tensor([ 2, 11,  0,  6,  4,  2,  5,  3,  6,  1,  7, 10,  9,  8, 13]) --> target=12
Input: tensor([ 2, 11,  0,  6,  4,  2,  5,  3,  6,  1,  7, 10,  9,  8, 13, 12]) --

In [7]:
print("X",x)
print("y",y)

X tensor([ 2, 11,  0,  6,  4,  2,  5,  3,  6,  1,  7, 10,  9,  8, 13, 12])
y tensor([11,  0,  6,  4,  2,  5,  3,  6,  1,  7, 10,  9,  8, 13, 12, 14])


In [8]:
x.reshape(4, 4).float()

tensor([[ 2., 11.,  0.,  6.],
        [ 4.,  2.,  5.,  3.],
        [ 6.,  1.,  7., 10.],
        [ 9.,  8., 13., 12.]])

In [18]:
torch.manual_seed(42)
# lets create an attention block
X = x.reshape(4, 4).float()

head_size = 16
query = nn.Linear(4, head_size, bias=False)
key = nn.Linear(4, head_size, bias=False)
value = nn.Linear(4, head_size, bias=False)

q = query(X)
k = key(X)

tril = torch.tril(torch.ones(4, 4))

wei = q @ k.T

wei = wei.masked_fill(tril == 0, float('-inf'))

wei = F.softmax(wei, dim=-1)

v = value(X)

out = wei @ v

out

tensor([[ 0.6817,  1.8094, -6.9533, -6.3265,  3.3980, -4.7405, -6.4281,  0.8454,
         -0.5262,  2.1715, -2.9633, -1.8744, -1.6099,  2.0051,  2.8851, -0.5704],
        [ 4.0968, -2.2644, -2.2667,  0.7130,  2.3651,  1.0130,  0.2086,  1.4218,
          1.0374,  0.8327, -1.4444, -1.0789, -0.6146,  0.9002,  2.2406, -1.3113],
        [ 4.0968, -2.2644, -2.2667,  0.7130,  2.3651,  1.0130,  0.2086,  1.4218,
          1.0374,  0.8327, -1.4444, -1.0789, -0.6146,  0.9002,  2.2406, -1.3113],
        [ 4.0968, -2.2644, -2.2667,  0.7130,  2.3651,  1.0130,  0.2086,  1.4218,
          1.0374,  0.8327, -1.4444, -1.0789, -0.6146,  0.9002,  2.2406, -1.3113]],
       grad_fn=<MmBackward0>)

tensor([[1., 0., 0., 0.],
        [1., 1., 0., 0.],
        [1., 1., 1., 0.],
        [1., 1., 1., 1.]])