## Build a GPT model

In [8]:
from model import *
import torch

torch.manual_seed(42)

model = GPTModel(GPT_CONFIG_124M)

## inspect the model
input_ids = torch.randint(0, 100, (1, 10))
output = model(input_ids)
print(f"input shape: {input_ids.shape}")
print(f"output shape: {output.shape}")
print("Number of parameters:", sum(p.numel() for p in model.parameters()))

input shape: torch.Size([1, 10])
output shape: torch.Size([1, 10, 50257])
Number of parameters: 163059793


## Check the tokenizer

In [7]:
import tiktoken
enc = tiktoken.get_encoding("gpt2")
print(f"Number of vocab: {enc.n_vocab}")
## example of tokenization
text = "Hello, world!"
tokens = enc.encode(text)
print("Example of tokenization:",text)
print(f"Encoded tokens: {tokens}")
decoded_text = enc.decode(tokens)
print(f"Decoded text: {decoded_text}")

Number of vocab: 50257
Example of tokenization: Hello, world!
Encoded tokens: [15496, 11, 995, 0]
Decoded text: Hello, world!


#### Check the torch function

In [1]:
import torch

In [12]:
## Check the mask function for the attention to prevent the attention from attending to future tokens
n = 8
t = torch.triu(torch.ones(n, n))
print(f"torch.triu:{t}")
t = torch.tril(torch.ones(n, n))
print(f"torch.tril:{t}")

torch.triu:tensor([[1., 1., 1., 1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0., 1.]])
torch.tril:tensor([[1., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1.]])


In [3]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.9.0-cp39-cp39-macosx_10_12_x86_64.whl.metadata (6.7 kB)
Collecting regex>=2022.1.18 (from tiktoken)
  Downloading regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Downloading tiktoken-0.9.0-cp39-cp39-macosx_10_12_x86_64.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl (287 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m287.7/287.7 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they