In [49]:
import torch
from modules import LanguageModel
from modules.optimizers import AdamW
from utils import Checkpointer, Tokenizer
from pathlib import Path
from modules.schedulers import CosAnnealingScheduler


tokenizer = Tokenizer()
tokenizer.load()
# Model config.
d_model = 128
num_heads = 8
d_ff = int(8 / 3 * d_model)
vocab_size = len(tokenizer.vocab)
max_seq_len = 1024
num_layers = 4
model = LanguageModel(d_model, num_heads, d_ff, vocab_size, max_seq_len,
                      num_layers)

# Optimizer config.
max_lr = 3e-3
min_lr = 1e-3
warmup_steps = 20
max_steps = 100
scheduler = CosAnnealingScheduler(max_lr, min_lr, warmup_steps, max_steps)
optimizer = AdamW(list(model.parameters()),
                  lr=scheduler.max_lr, weight_decay=0.1)
checkpointer = Checkpointer(Path('data/models'))
checkpointer.load_checkpoint(model, optimizer, 2000)

2000

In [75]:
tokenizer.vocab[b'<|endoftext|>']

256

In [87]:
sequence = 'It takes practice, Lily'
max_gen_len = 200
end_token = '<|endoftext|>'
while max_gen_len:
    tokens = torch.tensor(tokenizer.encode(sequence))
    next_token = model.generate_next_token(tokens).item()
    decoded_next_token = tokenizer.decode([next_token])
    if sequence.endswith(end_token):
        break
    sequence += decoded_next_token
    max_gen_len -= 1
print(sequence)

It takes practice, Lily and Sam was very happy.
<|endoftext|>
