In [9]:
import torch
import torch.nn as nn
from torch.nn import functional as F
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
block_size = 8
batch_size = 4

cpu


In [2]:
with open('Iran.txt', 'r', encoding='utf-8') as f:
    text = f.read()

chars = sorted(set(text))
vocabulary_size = len(chars)

In [3]:
def encode(string) -> list:
    return [string_to_int[c] for c in string]

def decode(l) -> str:
    return ''.join([int_to_string[s] for s in l])

In [4]:
string_to_int = {ch:i for i, ch in enumerate(chars)}
int_to_string = {i:ch for i, ch in enumerate(chars)}

data = torch.tensor(encode(text), dtype=torch.long)
print(data[:100])

tensor([106,  48,  65,  62,   1,  44,  75,  72,  67,  62,  60,  77,   1,  35,
         78,  77,  62,  71,  59,  62,  75,  64,   1,  62,  30,  72,  72,  68,
          1,  72,  63,   1,  48,  65,  62,   1,  44,  62,  75,  76,  66,  58,
         71,   1,  40,  66,  77,  62,  75,  58,  77,  78,  75,  62,  11,   1,
         31,  72,  70,  73,  75,  66,  76,  66,  71,  64,   1,  48,  65,  62,
          1,  47,  65,  58,  65,   1,  42,  58,  70,  62,  65,  11,   1,  48,
         65,  62,   1,  46,  78,  59,  58,  66,  82,  58,  77,  11,   1,  48,
         65,  62])


In [8]:
n = int(0.8 * len(data))
train_data = data[:n]
val_data = data[n:]

def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    print(ix)
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

x, y = get_batch('train')
print('inputs:')
print(x)
print('targets:')
print(y)

tensor([283806, 651551, 660844, 663491])
inputs:
tensor([[64, 76,  1, 60, 65, 58, 75, 70],
        [71,  1, 37,  1, 80, 58, 76,  1],
        [71, 64,  1, 77, 65, 62,  1, 61],
        [58, 70, 62, 28,  3,  0,  0, 54]])
targets:
tensor([[76,  1, 60, 65, 58, 75, 70, 62],
        [ 1, 37,  1, 80, 58, 76,  1, 70],
        [64,  1, 77, 65, 62,  1, 61, 62],
        [70, 62, 28,  3,  0,  0, 54, 97]])


In [14]:
class BigramLanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)

    def forward(self, index, targets=None):
        logits = self.token_embedding_table(index)
        if targets is None: 
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)
        return logits

    def generate(self, index, max_new_tokens):
        for _ in range(max_new_tokens):
            logits = self.forward(index)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            index_next = torch.multinomial(probs, num_samples=1)
            index = torch.cat((index, index_next), dim=1)

        return index

model = BigramLanguageModel(vocab_size=vocabulary_size)
m = model.to(device)

context = torch.zeros((1, 1), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)


ud;‘:4,=”’d6äKg:aV*“aJTííe-gz]kfârl—3êH6‘q’ïádEMàyg—JBó•à3ükyf™C “Y3 Ud!fq!QlkO%v
lPar3Z6H﻿s"Gby;!riÁ]xÁ=óU9c*óâÍcâ﻿EUDDA2C(zQ“PWkC™™F8?Y‘vèU-vs2âQ]"á6iäêy’w/ AwáT—'ïàZCNz]eRzès6áFxC/Su3wä—u c!ü”jxMúU9﻿ázctó4HPHAúÁJeA
=fí8bBm]eq-a88[_TCNéàïou,#/prePH
úÁê%jèy[8x-’ä8íki-âEdEUäw
DThä(gs—QhW)9=â18?r‘:XHúÍ_wJÚ—àXNU™”3íjÁBfcÁ99êx-L2c.—J3“YEL tüqUDb#í8qmä0v) gàé‘'pi'R;55””mNäcM,3?w’‘:VncQ.qvOx9üC2WÍPKi=;‘?y’EL hiêt5%YtE—tJ4l.[*RH3yC!qüâAQhBéJ!KQ8 D“äl4.'“Yv*éA'ïóiH﻿zQcmäA2a:j]:=]7CC“,Yvdéan;èuv‘aREHb à
