In [2]:
from gpt_download import download_and_load_gpt2
settings, params = download_and_load_gpt2(
    model_size="355M", models_dir="gpt2"
)

File already exists and is up-to-date: gpt2\355M\checkpoint
File already exists and is up-to-date: gpt2\355M\encoder.json
File already exists and is up-to-date: gpt2\355M\hparams.json
File already exists and is up-to-date: gpt2\355M\model.ckpt.data-00000-of-00001
File already exists and is up-to-date: gpt2\355M\model.ckpt.index
File already exists and is up-to-date: gpt2\355M\model.ckpt.meta
File already exists and is up-to-date: gpt2\355M\vocab.bpe


In [3]:
model_configs = {
    "gpt2-medium (355M)": {
        "vocab_size": 50257,
        "context_length": 1024,
        "emb_dim": 1024,
        "n_heads": 16,
        "n_layers": 24,
        "drop_rate": 0.1,
        "qkv_bias": True
    }
}

In [None]:
from MyGPT2.gpt2_model import GPT2Model
from MyGPT2.train_utils import load_weights_into_gpt

model = GPT2Model(
    model_configs["gpt2-medium (355M)"]
)

load_weights_into_gpt(model, params)

In [11]:
from MyGPT2.finetune_data_utils import create_data_loader
import tiktoken
from MyGPT2.train_utils import train_model_simple
import torch

tokenizer = tiktoken.get_encoding("gpt2")
train_loader, test_loader, val_loader = create_data_loader(
    file_path="instruction-data.json",
    tokenizer=tokenizer
)
optimizer = torch.optim.AdamW(
    model.parameters(), lr=0.00005, weight_decay=0.1
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_losses, val_losses, token_seen = train_model_simple(
    model=model,
    train_loader=train_loader,
    optimizer=optimizer,
    val_loader=val_loader,
    device=device,
    num_epochs=1,
    eval_freq=5,
    eval_iter=5,
    start_context=None,
    tokenizer=None
)


Ep 1 (Step 000000): Train loss 3.067, Val loss 3.056
Ep 1 (Step 000005): Train loss 1.365, Val loss 1.365
Ep 1 (Step 000010): Train loss 1.102, Val loss 1.193
Ep 1 (Step 000015): Train loss 1.055, Val loss 1.144
Ep 1 (Step 000020): Train loss 0.998, Val loss 1.082
Ep 1 (Step 000025): Train loss 0.925, Val loss 1.060
Ep 1 (Step 000030): Train loss 0.983, Val loss 1.032
Ep 1 (Step 000035): Train loss 0.907, Val loss 1.001
Ep 1 (Step 000040): Train loss 0.882, Val loss 0.987
Ep 1 (Step 000045): Train loss 0.823, Val loss 0.972
Ep 1 (Step 000050): Train loss 0.721, Val loss 0.956
Ep 1 (Step 000055): Train loss 0.778, Val loss 0.941
Ep 1 (Step 000060): Train loss 0.791, Val loss 0.938
Ep 1 (Step 000065): Train loss 0.798, Val loss 0.917
Ep 1 (Step 000070): Train loss 0.713, Val loss 0.917
Ep 1 (Step 000075): Train loss 0.789, Val loss 0.912
Ep 1 (Step 000080): Train loss 0.645, Val loss 0.901
Ep 1 (Step 000085): Train loss 0.726, Val loss 0.886
Ep 1 (Step 000090): Train loss 0.731, Val loss

In [13]:
from MyGPT2.finetune_data_utils import format_input
from MyGPT2.text_utils import (
    generate_text_simple,
    text_to_ids
)

input = {
        "instruction": "Rewrite the following sentence to remove redundancy.",
        "input": "This is really a great and wonderful show."
    }
input = format_input(input)

output = generate_text_simple(
    model,
    text_to_ids(input, tokenizer),
    100,
    model_configs["gpt2-medium (355M)"]["context_length"]
)

print(output)


tensor([[21106,   318,   281, 12064,   326,  8477,   257,  4876,    13, 19430,
           257,  2882,   326, 20431, 32543,   262,  2581,    13,   198,   198,
         21017, 46486,    25,   198, 30003,  6525,   262,  1708,  6827,   284,
          4781, 49052,    13,   198,   198, 21017, 23412,    25,   198,  1212,
           318,  1107,   257,  1049,   290,  7932,   905,    13,   198,   198,
         21017, 18261,    25,   198,  1212,   318,   257,  1049,   290,  7932,
           905,    13, 50256,   464,  1708,   318,   281, 12064,   326,  8477,
           257,  4876,    13, 19430,   257,  2882,   326, 20431, 32543,   262,
          2581,    13,   198,   198, 21017, 46486,    25,   198, 30003,  6525,
           262,  6827,  1262,   257,   985,   576,    13,   198,   198, 21017,
         18261,    25,   198,   464,  3797, 11687,   625,   262, 13990,    13,
         50256,   464,  1708,   318,   281, 12064,   326,  8477,   257,  4876,
            13, 19430,   257,  2882,   326, 20431, 3

In [14]:
from MyGPT2.text_utils import ids_to_text
ids_to_text(output, tokenizer)

'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nRewrite the following sentence to remove redundancy.\n\n### Input:\nThis is really a great and wonderful show.\n\n### Response:\nThis is a great and wonderful show.<|endoftext|>The following is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nRewrite the sentence using a simile.\n\n### Response:\nThe cat jumped over the fence.<|endoftext|>The following is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat is the capital of the United States?\n\n###'

In [15]:
SAVE_PATH = "gpt2_instruct.pth"
torch.save(model.state_dict(), SAVE_PATH)