<a href="https://colab.research.google.com/github/cannedhedgehog/Saturday/blob/main/4_1_lab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import numpy as np

Гиперпараметры

In [10]:
text = "hellohellohello"
chars = sorted(list(set(text)))
vocab_size = len(chars)
block_size = 4
n_embd = 16
learning_rate = 0.1
epochs = 500

Кодировка

In [11]:
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for ch, i in stoi.items()}
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[i] for i in l])

Данные

In [12]:
data = encode(text)
X, Y = [], []
for i in range(len(data) - block_size):
    X.append(data[i:i+block_size])
    Y.append(data[i+block_size])
X = np.array(X)  # (N, T)
Y = np.array(Y)  # (N,)

Параметры

In [13]:
np.random.seed(42)
token_embedding_table = np.random.randn(vocab_size, n_embd) * 0.01  # (vocab, C)
W1 = np.random.randn(block_size * n_embd, vocab_size) * 0.01  # (T*C, vocab)

Обучение

In [14]:
for epoch in range(epochs):
    # Прямой проход
    x_emb = token_embedding_table[X]  # (B, T, C)
    x_flat = x_emb.reshape(x_emb.shape[0], -1)  # (B, T*C)
    logits = x_flat @ W1  # (B, vocab)

    # Softmax
    logits -= np.max(logits, axis=1, keepdims=True)
    probs = np.exp(logits)
    probs /= probs.sum(axis=1, keepdims=True)

    # Loss
    loss = -np.log(probs[np.arange(len(Y)), Y]).mean()
    if epoch % 50 == 0 or epoch == epochs - 1:
        print(f"Epoch {epoch:3d} | Loss: {loss:.4f}")

    # Обратное распространение
    dlogits = probs
    dlogits[np.arange(len(Y)), Y] -= 1
    dlogits /= len(Y)

    dW1 = x_flat.T @ dlogits  # (T*C, vocab)
    dx_flat = dlogits @ W1.T  # (B, T*C)
    dx_emb = dx_flat.reshape(x_emb.shape)  # (B, T, C)

    # Градиент по эмбеддингам
    d_token_embedding = np.zeros_like(token_embedding_table)
    for i in range(X.shape[0]):
        for t in range(block_size):
            idx = X[i, t]
            d_token_embedding[idx] += dx_emb[i, t]

    # Обновление весов
    W1 -= learning_rate * dW1
    token_embedding_table -= learning_rate * d_token_embedding


Epoch   0 | Loss: 1.3864
Epoch  50 | Loss: 1.3286
Epoch 100 | Loss: 0.3958
Epoch 150 | Loss: 0.0797
Epoch 200 | Loss: 0.0347
Epoch 250 | Loss: 0.0207
Epoch 300 | Loss: 0.0143
Epoch 350 | Loss: 0.0107
Epoch 400 | Loss: 0.0085
Epoch 450 | Loss: 0.0070
Epoch 499 | Loss: 0.0059


Генерация

In [15]:
def generate(context, max_new_tokens=20):
    context = context[-block_size:]
    generated = context[:]
    for _ in range(max_new_tokens):
        x = np.array([generated[-block_size:]])  # (1, T)
        x_emb = token_embedding_table[x]  # (1, T, C)
        x_flat = x_emb.reshape(1, -1)  # (1, T*C)
        logits = x_flat @ W1  # (1, vocab)
        probs = np.exp(logits - np.max(logits))
        probs = probs / probs.sum()
        next_token = np.random.choice(vocab_size, p=probs.ravel())
        generated.append(next_token)
    return decode(generated)

Вывод

In [16]:
print("Сгенерированный текст:", generate(encode("hell")))

Сгенерированный текст: hellohellohellohellohell
