# MiniTransformer Demo – Praktyczne użycie

Ten notebook pokazuje działający przykład MiniTransformera w PyTorch, który uczy się rozpoznawać wzorzec sekwencyjny (np. ABABAB...).

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt


In [2]:
# Słownik i kodowanie
vocab = ['A', 'B','F']
stoi = {s: i for i, s in enumerate(vocab)}  # string to index
itos = {i: s for s, i in stoi.items()}      # index to string

# Generujemy dane: np. "ABABABAB..."
data = [stoi[c] for c in "ABABABFABABABFABABABFAB"]
block_size = 6

# Tworzymy zbiór treningowy
sequences, targets = [], []
for i in range(len(data) - block_size):
    seq = data[i:i+block_size]
    target = data[i+1:i+block_size+1]
    sequences.append(seq)
    targets.append(target)

x_train = torch.tensor(sequences)
y_train = torch.tensor(targets)


In [3]:
class TransformerBlock(nn.Module):
    def __init__(self, embed_dim):
        super().__init__()
        self.key = nn.Linear(embed_dim, embed_dim, bias=False)
        self.query = nn.Linear(embed_dim, embed_dim, bias=False)
        self.value = nn.Linear(embed_dim, embed_dim, bias=False)
        self.proj = nn.Linear(embed_dim, embed_dim)
        self.ln = nn.LayerNorm(embed_dim)

    def forward(self, x):
        B, T, C = x.shape
        k = self.key(x)
        q = self.query(x)
        v = self.value(x)
        scores = q @ k.transpose(-2, -1) / (C ** 0.5)
        mask = torch.tril(torch.ones(T, T)).to(x.device)
        scores = scores.masked_fill(mask == 0, float('-inf'))
        att = F.softmax(scores, dim=-1)
        out = att @ v
        return self.ln(self.proj(out) + x)


In [8]:
class MiniTransformer(nn.Module):
    def __init__(self, vocab_size, embed_dim, block_size):
        super().__init__()
        self.token_emb = nn.Embedding(vocab_size, embed_dim)
        self.pos_emb = nn.Embedding(1024, embed_dim)
        self.transformer = TransformerBlock(embed_dim)
        self.lm_head = nn.Linear(embed_dim, vocab_size)

    def forward(self, idx):
        B, T = idx.shape
        token_embeddings = self.token_emb(idx)
        position_embeddings = self.pos_emb(torch.arange(T).to(idx.device))
        x = token_embeddings + position_embeddings
        x = self.transformer(x)
        logits = self.lm_head(x)
        return logits


In [10]:
model = MiniTransformer(vocab_size=3, embed_dim=16, block_size=5)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(300):
    logits = model(x_train)
    B, T, C = logits.shape
    loss = loss_fn(logits.view(B*T, C), y_train.view(B*T))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 40 == 0:
        print(f"Epoka {epoch}, Strata: {loss.item():.4f}")


Epoka 0, Strata: 1.0327
Epoka 40, Strata: 0.1631
Epoka 80, Strata: 0.1596
Epoka 120, Strata: 0.1593
Epoka 160, Strata: 0.1592
Epoka 200, Strata: 0.1601
Epoka 240, Strata: 0.1591
Epoka 280, Strata: 0.1591


In [16]:
def generate(model, start, steps=8):
    model.eval()
    idx = torch.tensor([[stoi[s] for s in start]], dtype=torch.long)
    for _ in range(steps):
        logits = model(idx[:, -block_size:])
        last = logits[:, -1, :]
        probs = F.softmax(last, dim=-1)
        next_id = torch.multinomial(probs, num_samples=1)
        idx = torch.cat([idx, next_id], dim=1)
    return ''.join([itos[i.item()] for i in idx[0]])

# Przykład użycia
print("Wygenerowana sekwencja:", generate(model, start="FABB", steps=25))


Wygenerowana sekwencja: FABBABABFABABABFABABABFABABAB
