In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GPT(nn.Module):
    def __init__(self, vocab_size, block_size=256, embed_dim=64, num_layers=4):
        super().__init__()
        self.block_size = block_size
        self.transformer = nn.Sequential(
            nn.Embedding(vocab_size, embed_dim),
            nn.Sequential(*[Layer(embed_dim) for _ in range(num_layers)]),
            nn.LayerNorm(embed_dim),
        )
        self.lm_head = nn.Linear(embed_dim, vocab_size, bias=False)
    
    def forward(self, x):
        return self.lm_head(self.transformer(x))

class Layer(nn.Module):
    def __init__(self, embed_dim):
        super().__init__()
        self.ln1 = nn.LayerNorm(embed_dim)
        self.q = nn.Linear(embed_dim, embed_dim, bias=False)
        self.v = nn.Linear(embed_dim, embed_dim, bias=False)
        self.proj = nn.Linear(embed_dim, embed_dim, bias=False)
        self.ln2 = nn.LayerNorm(embed_dim)
        self.mlp = nn.Sequential(
            nn.Linear(embed_dim, 4 * embed_dim),
            nn.GELU(approximate="tanh"),
            nn.Linear(4 * embed_dim, embed_dim),
        )
        self.proj = nn.Linear(embed_dim, embed_dim, bias=False)

    def forward(self, x):
        B, T, C = x.shape
        xn = self.ln1(x)
        q = torch.sigmoid(self.q(xn))
        v = self.v(F.pad(xn, (0, 0, 1, -1)))
        attn = torch.fft.ifft(torch.fft.fft(q) * torch.fft.fft(v).conj()).real

        x = x + attn
        x = x + self.mlp(self.ln2(x))
        return x

In [2]:
import lightning as pl
from shared import corpus, tokenizers, trainers

text = corpus.shakespeare()
tokenizer = tokenizers.unique_chars(text)

pl.seed_everything(89026614)
model = GPT(tokenizer.get_vocab_size())
trainer = trainers.CausalTrainer(model, tokenizer, device = "cpu")
trainer.train(text, batch_size=36, epochs=25)

  from .autonotebook import tqdm as notebook_tqdm
Found cached dataset tiny_shakespeare (/Users/cztomsik/.cache/huggingface/datasets/tiny_shakespeare/default/1.0.0/b5b13969f09fe8707337f6cb296314fbe06960bd9a868dca39e713e163d27b5e)
100%|██████████| 3/3 [00:00<00:00, 764.55it/s]
Global seed set to 89026614
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name  | Type | Params
-------------------------------
0 | model | GPT  | 190 K 
-------------------------------
190 K     Trainable params
0         Non-trainable params
190 K     Total params
0.764     Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 18.82it/s]And nowArdqUJrmGGVyWierSYeY3O$,bvFh:imqlew JjDr,v&CXel&$IiJ!GcvjAcINfe!
                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 202/202 [00:22<00:00,  8.86it/s, loss=1.58, v_num=125]And now so her our cellentlemember in
Stranys hath woman:
I am holdes,

Epoch 1: 100%|██████████| 202/202 [00:23<00:00,  8.71it/s, loss=1.48, v_num=125, test_loss=1.750]And now;
Then more sorrow, sustle claif the grace
Of must did how that 
Epoch 2: 100%|██████████| 202/202 [00:23<00:00,  8.67it/s, loss=1.45, v_num=125, test_loss=1.600]And now; which we man chair too saint.

First Senator:
I would sweeted.
Epoch 3: 100%|██████████| 202/202 [00:23<00:00,  8.50it/s, loss=1.42, v_num=125, test_loss=1.510]And now her well; to consented at thy good for a fie hight!
In amend wh
Epoch 4: 100%|██████████| 202/202 [00:23<00:00,  8.71it/s, loss=1.39, v_num=125, test_loss=1.480]And now I his steed;
Which conspiracious she born.

MERCUTIO:
Nay, my f
Epoch 5: 100%|██████████| 202/202 [00:24<00:00,  8.31it/s, loss=1.38, v_num=125, test_loss=1.450]And now my breaths, that the wrong? and the plucks alike, that warrant 
Epo

`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24: 100%|██████████| 202/202 [00:26<00:00,  7.76it/s, loss=1.28, v_num=125, test_loss=1.400]


In [3]:
print(trainer.wrapper.generate("O God, O God!", 650))

O God, O God!

Lord:
My walk not seem they
Whereon the foul another, star, with me,
If spear than his
from my sudden sightst the his such sword,
Than to help me,
In brough I do children, sir; I have hold thee her own rewards or they
become to the foolishment of our agirst Watchman:
He hath nor son should supposed; though so he wish'd with so taken,
To bething shalt did now is all me,
My fair sigh wither, which heavy seems by woman:
Would I know you art a horse!'
Somerns first Citizen:
No, somethought we heard wong woman.

LUCIO:
Why dost thou shall never hither one hunt that do not
Betuity.
To die.

Second Servingman:
Whither, and both: it is the most gen
