In [1]:
import torch
import matplotlib.pyplot as plt
import numpy as np


print(torch.__version__)

2.9.0


In [None]:
file_path = "/Users/baxtiyorbekmurodov/Desktop/math2LLM/data/input.txt"

with open(file_path, 'r') as f:
    text = f.read()

In [3]:
chars = list(set(text))
vocab_size = len(chars)
print(vocab_size)
print("".join(chars))

65
xhTEaKWXYtrA?sZOSz,Ln-G elqPgyMQ:;jI'm&3Vf!.$F
BpwuvicCHkNRbUodJD


In [4]:
stoi = {c:i for i,c in enumerate(chars)}
itoi = {i:c for i, c in enumerate(chars)}

encode = lambda s: [stoi[c] for c in s]
decode = lambda tokens: ''.join([itoi[t] for t in tokens])

print(encode("hii there"))
print(decode(encode("hii there")))

[1, 52, 52, 23, 9, 1, 24, 10, 24]
hii there


In [5]:
data = torch.tensor(encode(text), dtype=torch.long)

In [6]:
print(data.dtype)
print(data.ndim)
print(data[:5])

torch.int64
1
tensor([45, 52, 10, 13,  9])


In [7]:
# spliting data into train/ val: 90% for training
n = len(data)
tr_size = int(n *0.9)
train_data = data[:tr_size]
val_data = data[tr_size:]

print(n); print(len(train_data) / n); print(len(val_data)/n)

1115394
0.8999994620734916
0.10000053792650848


In [8]:
block_size = 8

x = train_data[:block_size]
y = train_data[1:block_size+1:]

for i in range(block_size):
  context = x[:i+1]
  target = y[i]
  print(f"Input: {context} --> target={target}")

Input: tensor([45]) --> target=52
Input: tensor([45, 52]) --> target=10
Input: tensor([45, 52, 10]) --> target=13
Input: tensor([45, 52, 10, 13]) --> target=9
Input: tensor([45, 52, 10, 13,  9]) --> target=23
Input: tensor([45, 52, 10, 13,  9, 23]) --> target=54
Input: tensor([45, 52, 10, 13,  9, 23, 54]) --> target=52
Input: tensor([45, 52, 10, 13,  9, 23, 54, 52]) --> target=9


In [9]:
torch.manual_seed(1332)

batch_size = 4
block_size = batch_size * 2


def get_batch(split :str):
    # generates small batch for X input and y target
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size, ))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])

    return x, y



In [10]:
xb, yb = get_batch("train")

print(xb)
print(yb)
print("-"*32)


for b in range(batch_size):
    for t in range(block_size):
        context = xb[b, :t+1]
        target = yb[b, t]

        print(f'Input: {context.tolist()} --> target={target}')


tensor([[41, 23, 49, 52,  9,  1, 23, 29],
        [15, 41, 23, 61, 50, 10, 23, 28],
        [59, 50,  9, 23,  4, 23, 13, 25],
        [23, 49,  1,  4,  9, 18, 23, 61]])
tensor([[23, 49, 52,  9,  1, 23, 29, 61],
        [41, 23, 61, 50, 10, 23, 28, 10],
        [50,  9, 23,  4, 23, 13, 25, 52],
        [49,  1,  4,  9, 18, 23, 61, 36]])
--------------------------------
Input: [41] --> target=23
Input: [41, 23] --> target=49
Input: [41, 23, 49] --> target=52
Input: [41, 23, 49, 52] --> target=9
Input: [41, 23, 49, 52, 9] --> target=1
Input: [41, 23, 49, 52, 9, 1] --> target=23
Input: [41, 23, 49, 52, 9, 1, 23] --> target=29
Input: [41, 23, 49, 52, 9, 1, 23, 29] --> target=61
Input: [15] --> target=41
Input: [15, 41] --> target=23
Input: [15, 41, 23] --> target=61
Input: [15, 41, 23, 61] --> target=50
Input: [15, 41, 23, 61, 50] --> target=10
Input: [15, 41, 23, 61, 50, 10] --> target=23
Input: [15, 41, 23, 61, 50, 10, 23] --> target=28
Input: [15, 41, 23, 61, 50, 10, 23, 28] --> target=1

In [21]:
import torch.nn as nn
from torch.nn import functional as F

torch.manual_seed(1332)


class BiagramModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)
    
    def forward(self, idx, targets=None):
                
        logits = self.token_embedding_table(idx)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(-1)  
            loss = F.cross_entropy(logits, targets)

        return logits, loss
    
    def generate(self, idx, max_new_tokens):

        for i in range(max_new_tokens):
            logits, loss = self(idx)
            logits = logits[:, -1, :]

            probs = F.softmax(logits, dim=-1)

            idx_next = torch.multinomial(probs, num_samples=1)

            idx = torch.cat((idx, idx_next), dim=1)
        
        return idx
        
    
model = BiagramModel(vocab_size)
out, loss = model(xb, yb)
print(out.shape); 
print(loss)

idx = torch.zeros((1, 1), dtype=torch.long)

print(decode(model.generate(idx, 60)[0].tolist()))

torch.Size([32, 65])
tensor(4.5335, grad_fn=<NllLossBackward0>)
xp3U-yjDYwK!eFwBWHgv.RfC:eu3'jA:-GavHqnolfKOVlexEg.:ukqyqGU:V


In [30]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)

In [37]:
batch_size = 32

epochs = 10000

for i in range(epochs):

    xb, yb = get_batch("train")

    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

print(f'Iter: {i+1} -> loss={loss.item()}')

Iter: 10000 -> loss=2.5197980403900146


In [41]:
idx = torch.zeros((1, 1), dtype=torch.long)

print(decode(model.generate(idx, 700)[0].tolist()))

xive bouse; ithent buthineveat;
Frisinoulerue yofeveco pouruther y, invem:
IUShimy al
severlmyouste p m tup;

bun!
Lariencolsime blfoust, are, w'lday hispume bige asheme

ARDiowod,
Ans
Nor anoke, sherye pscuancicelinpl, howendows h lllonery.
Al gaveathoth viceawe,
Thinorencrcthar lesthin ose, m, d cck, whasoruenor, y
Hat g anthars nnom? oara l KI wick, athares n lo Bur dldowanth keandem'eede nd ou at thive.
ARY b meren is.
INETUK: t s aifu RUSO therofu maten. RO:
A:
But.
Po fors ff ba, bugnrooveatthulat.
PRLI nan hy wigooweag,
Frs chor t ce morasathele ceen s s yo Cus mboninst hibr maroway saghacho t whie.
Pioun, havomyor,
Mit woupuksty
Herdelil INoom.
IOr ombl: wsou thed ffolse--art pe t wet
