In [1]:
import torch
import nano_gpt

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

We get the data.

In [3]:
# wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
with open("input.txt", "r", encoding="utf-8") as f:
    corpus = f.read()

# The properties of the test
chars = sorted(list(set(corpus)))
vocab_size = len(chars)

# Encoding and decoding
string_to_int = {character: index for index, character in enumerate(chars)}
int_to_string = {index: character for index, character in enumerate(chars)}
encode = lambda string: [string_to_int[char] for char in string]
decode = lambda list_int: "".join([int_to_string[integer] for integer in list_int])

In [4]:
# Train and test splits
data = torch.tensor(encode(corpus), dtype=torch.long, device=device)
number_train = int(0.9 * len(data))
train_data = data[:number_train]
validation_data = data[:number_train]

Here we can set up the model hyperparameters.

In [5]:
batch_size = 64
block_size = 256
max_iters = 10000
eval_interval = 100
learning_rate = 3e-4
dropout = 0.2
eval_iters = 100
number_layers = 6
number_heads = 6
number_embeddings = 384  # 384 / 6 = 64 dimensional heads

We now define the model.

In [6]:
model = nano_gpt.BigramLanguageModel(
    vocab_size=vocab_size,
    number_embeddings=number_embeddings,
    block_size=block_size,
    number_heads=number_heads,
    number_layers=number_layers,
    dropout=dropout,
).to(device)

In [7]:
# Optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

We can now train the model.

In [8]:
# Train
model.train_model(
    max_iters,
    train_data,
    validation_data,
    optimizer,
    batch_size,
    eval_interval,
    eval_iters,
)

At step 0: train loss 4.3349, val loss 4.3343.
At step 100: train loss 2.4786, val loss 2.4773.
Training manually interrupted.


In [None]:
torch.save(model, "shakespeare.torch")

In [6]:
model = torch.load("shakespeare.torch")

Let's compute the final validation accuracy.

In [14]:
losses = nano_gpt.estimate_loss(model, 200, block_size, batch_size, train_data, validation_data)
print(f"Train loss {losses['train']:.4f}, val loss {losses['val']:.4f}.")

Train loss 0.3950, val loss 0.3948.


Let's see an example of the produced text.

In [15]:
# Generate from the model
print("\nAn example of text generated from the model.")
context = torch.zeros((1, 1), dtype=torch.long, device=device)
print(decode(model.generate(context, 300)[0].tolist()))


An example of text generated from the model.

end love's dooms! Sweet thou help!
O prince! thou dost uttake before I have loved thee,
But I with my age for an Edward's grave.

CATESBY:
Slandering and deliver to unfly the rage.

HENRY BOLINGBROKE:
My gracious urge is chair, I thank thee;
I will return to her at the pride;
And, if I die, to fill 


We can also have the model generate text forever.

In [16]:
model.generate_forever(decode, 0)

This Volsces
Hath greateful ladies.

ARCHIDAMUS:
Iffer the king must die their own perforce.

MONTAGUE:
But he's a man to do so, my lord.

MARCIUS:
As the was always poor infection, as they say
To the Marcius shall be honour to the people!

All:
Content that so my brother,
The mayor to the play with my tongue.
Cousin of Buckingham, my my brother wrongs:
But look to the earth, I am gall'd unto my face.
But, moreo, my good life, my crown, and fear.
Nothing come into his honour in your business,
The which he had mived your father forfeit,
Both with his condition with youth air, 'Will't not be;
From so your judgment act an our house,
And so harms come to 't.

POMPEY:
True, to the bloody battle, the frant, of them are this
To give your great errancon's sense; and these two beats
As he what in them, like an empty helm
With touch on Paris'ies. Lord Afort night,
Bloody though is hot befell may disposition
The narror of England's war, and more heart.
But, soft! where is this?

First Citizen:
Gi