<a href="https://colab.research.google.com/github/javohir-ergashboyev/UzGPT/blob/main/UzGPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [162]:
with open('1.txt', 'r', encoding='utf-8') as f:
  contents = f.read()


In [163]:
import re

def remove_special_characters(text):
  pattern = r"[^a-zA-Z\s\'-]"
  text = re.sub(pattern, "", text)
  text=text.lower()
  return text

contents = remove_special_characters(contents)


In [164]:
chars=sorted(list(set(contents)))
print(''.join(chars))


 '-abcdefghijklmnopqrstuvwxyz


In [165]:
st_idx={ch:i for i,ch in enumerate(chars)}
idx_st={i:ch for i,ch in enumerate(chars)}
vocab_size=len(chars)


encode=lambda x: [st_idx[i] for i in x]
print(encode("ta'lim"))
decode=lambda x:''.join([idx_st[i] for i in x])
print(decode(encode("ta'lim")))
print(vocab_size)

[23, 4, 2, 15, 12, 16]
ta'lim
30


In [166]:
import torch
data=torch.tensor(encode(contents), dtype=torch.long)
data.shape, data.dtype

(torch.Size([126327]), torch.int64)

In [167]:
data[:10]

tensor([ 0, 18, 14, 18, 29,  0,  1, 23,  4,  2])

In [168]:
n=int(0.9*len(data))
train_data=data[:n]
val_data=data[n:]

In [169]:
torch.manual_seed(1000)
chunk_size=8
batch_size=4

def get_batch(split):
  data=train_data if split=='train' else val_data
  ix=torch.randint(len(data)-chunk_size,(batch_size,))
  x=torch.stack([data[i:i+chunk_size] for i in ix])
  y=torch.stack([data[i+1:i+chunk_size+1] for i in ix])
  return x,y


xb,yb=get_batch('train')
print(xb.shape)
print(yb.shape)
print('inputs:')
print(xb)
print('outputs:')
print(yb)

torch.Size([4, 8])
torch.Size([4, 8])
inputs:
tensor([[17, 10,  2, 12,  6, 11,  1, 23],
        [16, 18, 23, 15,  4, 21,  1,  7],
        [10, 12,  1, 23,  4, 11, 21, 12],
        [ 5, 15, 12, 14,  4, 22, 12,  1]])
outputs:
tensor([[10,  2, 12,  6, 11,  1, 23,  4],
        [18, 23, 15,  4, 21,  1,  7,  4],
        [12,  1, 23,  4, 11, 21, 12, 21],
        [15, 12, 14,  4, 22, 12,  1, 25]])


In [170]:
for b in range(batch_size):
  for i in range(chunk_size):
    context=xb[b, :i+1]
    target=yb[b,i]
    print(f'When input is {context} output is {target}')

When input is tensor([17]) output is 10
When input is tensor([17, 10]) output is 2
When input is tensor([17, 10,  2]) output is 12
When input is tensor([17, 10,  2, 12]) output is 6
When input is tensor([17, 10,  2, 12,  6]) output is 11
When input is tensor([17, 10,  2, 12,  6, 11]) output is 1
When input is tensor([17, 10,  2, 12,  6, 11,  1]) output is 23
When input is tensor([17, 10,  2, 12,  6, 11,  1, 23]) output is 4
When input is tensor([16]) output is 18
When input is tensor([16, 18]) output is 23
When input is tensor([16, 18, 23]) output is 15
When input is tensor([16, 18, 23, 15]) output is 4
When input is tensor([16, 18, 23, 15,  4]) output is 21
When input is tensor([16, 18, 23, 15,  4, 21]) output is 1
When input is tensor([16, 18, 23, 15,  4, 21,  1]) output is 7
When input is tensor([16, 18, 23, 15,  4, 21,  1,  7]) output is 4
When input is tensor([10]) output is 12
When input is tensor([10, 12]) output is 1
When input is tensor([10, 12,  1]) output is 23
When input is

In [171]:
xb

tensor([[17, 10,  2, 12,  6, 11,  1, 23],
        [16, 18, 23, 15,  4, 21,  1,  7],
        [10, 12,  1, 23,  4, 11, 21, 12],
        [ 5, 15, 12, 14,  4, 22, 12,  1]])

In [172]:
import torch
from torch import nn
import torch.nn.functional as F
n_embed=32
head_size=n_embed//4
class Head(nn.Module):
  def __init__(self):
    super().__init__()
    self.key=nn.Linear(n_embed, head_size, bias=False)
    self.query=nn.Linear(n_embed,head_size, bias=False)
    self.value=nn.Linear(n_embed,head_size, bias=False)
    self.register_buffer('tril', torch.tril(torch.ones(chunk_size, chunk_size)))
    self.dropout=nn.Dropout(0.3)
  def forward(self, x):
    B,T,C=x.shape
    k=self.key(x)
    q=self.query(x)
    wei=q@k.transpose(-2,-1)*C**-0.5
    wei=wei.masked_fill(self.tril[:T,:T]==0, float('-inf'))
    wei=F.softmax(wei, dim=-1)
    wei=self.dropout(wei)
    v=self.value(x)
    out=wei@v
    return out
class MultiHeadAttention(nn.Module):
  def __init__(self, n_head):
    super().__init__()
    self.heads=nn.ModuleList([Head() for _ in range(n_head)])
    self.proj=nn.Linear(n_embed,n_embed)
    self.dropout=nn.Dropout(0.3)
  def forward(self, x):
    out=torch.cat([h(x) for h in self.heads], dim=-1)
    return self.dropout(self.proj(out))

class FeedForward(nn.Module):
  def __init__(self, n_embed):
    super().__init__()
    self.fwd=nn.Sequential(
        nn.Linear(n_embed, n_embed*4),
        nn.ReLU(),
        nn.Linear(n_embed*4, n_embed),
        nn.Dropout(0.3)
    )
  def forward(self, x):
    return self.fwd(x)
class Block(nn.Module):
  def __init__(self, n_embed, n_head):
    super().__init__()
    head_size=n_embed//n_head
    self.head=MultiHeadAttention(n_head)
    self.fwd=FeedForward(n_embed)
    self.lm1=nn.LayerNorm(n_embed)
    self.lm2=nn.LayerNorm(n_embed)
  def forward(self,x):
    x=x+self.head(self.lm1(x))
    x=x+self.fwd(self.lm2(x))
    return x

class BigramLM(nn.Module):
  def __init__(self):
    super().__init__()
    self.embed=nn.Embedding(vocab_size,n_embed)
    self.embed_pos=nn.Embedding(chunk_size, n_embed)
    self.block=nn.Sequential(
        Block(n_embed, 4),
        Block(n_embed, 4),
        Block(n_embed, 4),
        nn.LayerNorm(n_embed)
    )
    self.fc=nn.Linear(n_embed, vocab_size)
  def forward(self, idx, target=None):
    B,T=idx.shape
    token=self.embed(idx)
    pos=self.embed(torch.arange(T))
    x=token+pos
    x=self.block(x)
    logits=self.fc(x)
    if target is None:
      loss=None
    else:
      B,T,C=logits.shape
      logits=logits.view(B*T, C)
      target=target.view(B*T)
      loss=F.cross_entropy(logits, target)
    return logits, loss


  def generate(self, idx, max_tokens):
    for _ in range(max_tokens):
      idx_cond=idx[:, -chunk_size:]
      logits, loss=self(idx_cond)
      logits=logits[:,-1,:]
      probs=F.softmax(logits, dim=-1)
      sample=torch.multinomial(probs, num_samples=1)
      idx=torch.cat((idx,sample),dim=1)
    return idx

model=BigramLM()
example=torch.zeros((1,1), dtype=torch.long)
print(decode(model.generate(example, 100)[0].tolist()))



mmehgfnhxmsnu'qlnusoonchkze'mlqdvsqnxss'kvu'g-tuun qacxvcerupnxbywcv
stauvawgvhzkhchr'bj-g
vlajwqk-k


In [173]:
optimizer=torch.optim.AdamW(params=model.parameters(), lr=4e-3)

In [174]:
eval_iter=200
@torch.no_grad
def estimate_loss():
  out={}
  model.eval()
  for split in ['train', 'test']:
    losses=torch.zeros(eval_iter)
    for k in range(eval_iter):
      X,y=get_batch(split)
      log,loss=model(X,y)
      losses[k]=loss.item()
    out[split]=losses.mean()
  return out

In [175]:
from tqdm.auto import tqdm

batch_size=32
epochs=3000

for epoch in tqdm(range(epochs)):

  if epoch%eval_iter==0:
    losses=estimate_loss()
    print(f"step: {epoch} train loss: {losses['train']:.4f} | test loss: {losses['test']:.4f}")

  xb,yb=get_batch('train')
  logits,loss=model(xb,yb)
  optimizer.zero_grad(set_to_none=True)
  loss.backward()
  optimizer.step()


  0%|          | 0/3000 [00:00<?, ?it/s]

step: 0 train loss: 3.5753 | test loss: 3.5786
step: 200 train loss: 2.0607 | test loss: 2.2228
step: 400 train loss: 1.8415 | test loss: 2.0954
step: 600 train loss: 1.6879 | test loss: 1.9505
step: 800 train loss: 1.6003 | test loss: 1.8992
step: 1000 train loss: 1.5125 | test loss: 1.8708
step: 1200 train loss: 1.4798 | test loss: 1.8652
step: 1400 train loss: 1.4509 | test loss: 1.8219
step: 1600 train loss: 1.4249 | test loss: 1.7870
step: 1800 train loss: 1.3850 | test loss: 1.8092
step: 2000 train loss: 1.3648 | test loss: 1.8093
step: 2200 train loss: 1.3421 | test loss: 1.7526
step: 2400 train loss: 1.3389 | test loss: 1.7724
step: 2600 train loss: 1.3145 | test loss: 1.7437
step: 2800 train loss: 1.3244 | test loss: 1.7431


In [178]:
print(decode(model.generate(torch.tensor(encode("be"), dtype=torch.long).unsqueeze(0), 200)[0].tolist()))

bekiston respublikadri -peyiqlash  ta'lim tashorlari tuvoflanolarga ta'lim
tizimida izimini takomuzgan ma'dmirlik davlatablik o'zlashtirish va dajandartlarini senspublikasi mustatson yokil qiliy davlat 


In [None]:
torch.manual_seed(100)
embedding_layer = nn.Embedding(2, 3)

word_indices = torch.tensor([1])
embedded_vectors = embedding_layer(word_indices)

print("Word Indices:", word_indices)
print("Embedded Vectors:")
print(embedded_vectors)


Word Indices: tensor([1])
Embedded Vectors:
tensor([[ 0.2429, -1.3833, -2.3134]], grad_fn=<EmbeddingBackward0>)


In [None]:
a=torch.tril(torch.ones(3,3))
a

tensor([[1., 0., 0.],
        [1., 1., 0.],
        [1., 1., 1.]])

In [None]:
b=torch.randint(0,10,size=(3,2)).float()
b

tensor([[2., 7.],
        [0., 3.],
        [1., 6.]])

In [None]:
c=a@b
c

tensor([[ 2.,  7.],
        [ 2., 10.],
        [ 3., 16.]])

In [None]:
# a9jZ@quckMG7@&*