## Loading Data

In [1]:
! wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

--2023-02-10 08:45:41--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘input.txt’


2023-02-10 08:45:42 (29.6 MB/s) - ‘input.txt’ saved [1115394/1115394]



## Training the GPT Model

In [2]:
import torch

split_size = 0.9
batch_size = 64
block_size = 256
dropout = 0.2
n_layers = 6
n_embd = 384
n_heads = 6

max_iters = 5000
eval_interval = 500
learning_rate = 3e-4
eval_iters = 200
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

import os
root = '/content/gdrive/MyDrive/tinyGPT/'
os.sys.path.append(root)

Mounted at /content/gdrive


In [4]:
from data import CharData

data = CharData(text, params={
    'split_size': split_size,
    'batch_size': batch_size,
    'block_size': block_size,
    'device': device,
})

data loader successfully initiated.


In [5]:
from gpt import GPTLanguageModel

model = GPTLanguageModel(params={
    'vocab_size': data.get_vocab_size(),
    'block_size': block_size,
    'n_layers': n_layers,
    'dropout': dropout,
    'n_heads': n_heads,
    'n_embd': n_embd,
    'device': device,
})
model.to(device)

GPTLanguageModel(
  (token_embedding_table): Embedding(65, 384)
  (pos_embedding_table): Embedding(256, 384)
  (transformer): Sequential(
    (0): Block(
      (self_attn_heads): MultiHeads(
        (heads): ModuleList(
          (0): Head(
            (key): Linear(in_features=384, out_features=64, bias=False)
            (query): Linear(in_features=384, out_features=64, bias=False)
            (value): Linear(in_features=384, out_features=64, bias=False)
            (dropout): Dropout(p=0.2, inplace=False)
          )
          (1): Head(
            (key): Linear(in_features=384, out_features=64, bias=False)
            (query): Linear(in_features=384, out_features=64, bias=False)
            (value): Linear(in_features=384, out_features=64, bias=False)
            (dropout): Dropout(p=0.2, inplace=False)
          )
          (2): Head(
            (key): Linear(in_features=384, out_features=64, bias=False)
            (query): Linear(in_features=384, out_features=64, bias=False)
 

In [6]:
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

In [7]:
from utils import estimate_loss

for iter in range(max_iters):

    # check the loss once every eval_iters intervals pass
    if iter % eval_interval == 0 or iter == max_iters - 1:
        losses = estimate_loss(model, data, eval_iters)
        print(f"iter {iter} - train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")
    
    # get a batch of data
    X, Y = data.get_batch('train')

    # evaluate the loss
    logits, loss = model(X, Y)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

iter 0 - train loss 4.3934, val loss 4.3954
iter 500 - train loss 2.0187, val loss 2.0917
iter 1000 - train loss 1.6057, val loss 1.7710
iter 1500 - train loss 1.4349, val loss 1.6414
iter 2000 - train loss 1.3418, val loss 1.5715
iter 2500 - train loss 1.2766, val loss 1.5339
iter 3000 - train loss 1.2251, val loss 1.4972
iter 3500 - train loss 1.1802, val loss 1.4866
iter 4000 - train loss 1.1400, val loss 1.4757
iter 4500 - train loss 1.1085, val loss 1.4832
iter 4999 - train loss 1.0697, val loss 1.4855


In [8]:
context = torch.zeros((1, 1), dtype=torch.long, device=device)
print(data.decode(model.generate(context, max_new_tokens=500)[0].tolist()))
open('output_gpt.txt', 'w').write(data.decode(model.generate(context, max_new_tokens=10000)[0].tolist()))


I know why, 'tis a brain. Pray thee, sit, a
cure to placket-pace; therow you pray whither they mave and
whether shutting by the fell of grief
look of Capitol!

LADY CAPULET:
What lost you mistakes in this better maid, there him
witness be sworn and wolf to him triar,
Shall be sad on the word i like mind.

JULIET:
Ay, that hair since strange have.

LADY CAPULET:
Alack, we'll not duns, of not hand he wear'd off,
As I stoop out of chidity deserves of
The abundant troops at it minds of it. And grace


10001