In [1]:
import torch 
import os 
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.functional as F

In [2]:
file_path = '/Users/baxtiyorbekmurodov/Desktop/math2LLM/data/ikki_eshik_orasi.txt'

with open(file_path, 'r', encoding='utf-8') as f:
    text = f.read()

In [110]:
chars = sorted(list(set(text)))
vocab_size = len(chars)

print(vocab_size)
print(chars)

116
['\n', '\x0c', ' ', '!', '"', '#', '%', '&', "'", '(', ')', '*', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '\\', '^', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '|', '}', '~', '¡', '£', '§', '«', '\xad', '°', '»', '¿', 'É', 'Ñ', 'Ó', 'é', 'í', 'î', 'ñ', 'ó', 'ú', 'ÿ', 'В', 'о', '—', '‘', '’', '„', '•', '™', '■']


In [112]:
stoi = {s:i for i, s in enumerate(chars)}
itoi = {i:s for i, s in enumerate(chars)}

encode = lambda text: [stoi[c] for c in text]
decode = lambda tokens: [itoi[token] for token in tokens]

In [113]:
data = torch.tensor(encode(text), dtype=torch.long)

In [118]:
# spliting data
n = len(data)
split_size = int(n*0.9)
train_data = data[:split_size]
val_data = data[split_size:]

In [119]:
print(len(train_data)/ n)
print(len(val_data) / n)

0.8999992746387097
0.10000072536129037


In [157]:
block_size = 8
batch_size = 32

def get_batch(split: str):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size, ))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])

    return x, y

### Mathematical trick of Self-Attention 

In [158]:
torch.manual_seed(1337)
B, T, C = 4, 8, 2

x = torch.randn((B, T, C))
x.shape

torch.Size([4, 8, 2])

In [164]:
xbow = torch.zeros((B, T, C))

for b in range(B):
    for t in range(T):
        xprev = x[b, :t+1]
        xbow[b, t] = torch.mean(xprev, 0)

In [167]:
x[0]

tensor([[ 0.1808, -0.0700],
        [-0.3596, -0.9152],
        [ 0.6258,  0.0255],
        [ 0.9545,  0.0643],
        [ 0.3612,  1.1679],
        [-1.3499, -0.5102],
        [ 0.2360, -0.2398],
        [-0.9211,  1.5433]])

In [168]:
xbow[0]

tensor([[ 0.1808, -0.0700],
        [-0.0894, -0.4926],
        [ 0.1490, -0.3199],
        [ 0.3504, -0.2238],
        [ 0.3525,  0.0545],
        [ 0.0688, -0.0396],
        [ 0.0927, -0.0682],
        [-0.0341,  0.1332]])

In [177]:
torch.manual_seed(42)
a = torch.tril(torch.ones(3, 3))
a = a / torch.sum(a, 1, keepdim=True)
b = torch.randint(0, 10, (3, 2)).float()
c = a @ b

print("a:\n", a)
print("b:\n", b)
print("c:\n", c)

a:
 tensor([[1.0000, 0.0000, 0.0000],
        [0.5000, 0.5000, 0.0000],
        [0.3333, 0.3333, 0.3333]])
b:
 tensor([[2., 7.],
        [6., 4.],
        [6., 5.]])
c:
 tensor([[2.0000, 7.0000],
        [4.0000, 5.5000],
        [4.6667, 5.3333]])


In [187]:
wei = torch.tril(torch.ones((T, T)))
wei = wei / torch.sum(wei, 1, keepdim=True)

xbow2 = wei @ x

torch.allclose(xbow, xbow2)

True

In [193]:
import torch.nn.functional as F

In [196]:
tril = torch.tril(torch.ones((T, T)))
wei = torch.zeros((T, T))
wei = wei.masked_fill(tril == 0, float('-inf'))
wei = F.softmax(wei, dim=-1)
xbow3 = wei @ x

torch.allclose(xbow, xbow3)

True