In [8]:
import torch

In [6]:
from importlib.metadata import version
pkgs = ["matplotlib", "numpy", "pandas", "torch", "tensorflow", "tiktoken"]


for p in pkgs:
    try:
        print(f"{p} version: {version(p)}")
    except Exception as e:
        print(f"Could not get version for package {p}: {e}")

matplotlib version: 3.10.6
numpy version: 2.3.3
pandas version: 2.3.3
torch version: 2.8.0
tensorflow version: 2.20.0
tiktoken version: 0.11.0


In [10]:
from chapter3 import GPTModel
GPT_CONFIG_124M = {
    "vocab_size" : 50257, # Vocabulary Size
    "context_length" : 256, # Context length
    "emb_dim":768, # Embedding dimensions
    "n_heads":12,    # Number of attention heads
    "n_layers":12, # Number of layers
    "drop_rate":0.1, # Dropout rate
    "qkv_bias":False # Query-Key_value bias

}

In [14]:
torch.manual_seed(123)
model = GPTModel(GPT_CONFIG_124M)
model.eval();

In [15]:
import tiktoken
from chapter3 import generate_text_simple

def text_to_token_ids(text, tokenizer):
    encoded = tokenizer.encode(text, allowed_special = {"<|endoftext|> "})
    encoded_tensor = torch.tensor(encoded).unsqueeze(0) # Add batch dimension
    return encoded_tensor

In [19]:
start_context = "Every effort moves you"
tokenizer = tiktoken.get_encoding("gpt2")

token_ids = text_to_token_ids(start_context, tokenizer)
token_ids

tensor([[6109, 3626, 6100,  345]])

In [21]:
def token_ids_to_text(token_ids, tokenizer):
    flat = token_ids.squeeze(0)  # Remove batch dimension
    return tokenizer.decode(flat.tolist())


token_ids_to_text(token_ids, tokenizer)

'Every effort moves you'

In [22]:
token_ids = generate_text_simple(
    model = model, 
    idx = text_to_token_ids(start_context, tokenizer),
    max_new_tokens = 10,
    context_size = GPT_CONFIG_124M["context_length"]
)

In [24]:
token_ids.squeeze(0).shape

torch.Size([14])

In [25]:
token_ids_to_text(token_ids, tokenizer) 

'Every effort moves you rentingetic wasnم refres RexMeCHicular stren'

### Calculating the text generationn loss: Cross - entropy and perplexity:

In [26]:
inputs = torch.tensor([[16833, 3626, 6100],   # ["every effort moves"
                       [40, 1107, 588]])      # "I really like"]

targets = torch.tensor([[3626, 6100, 345],    # ["effort moves you", 
                        [1107, 588, 11311]])   # "really like chcolate"]

In [27]:
with torch.no_grad():
    logits = model(inputs)

In [28]:
logits

tensor([[[ 0.1113, -0.1057, -0.3666,  ...,  0.2843, -0.8824,  0.1074],
         [-0.6109, -0.5167, -0.7613,  ...,  0.5450, -1.0319, -0.2175],
         [ 0.5707, -0.6459, -0.0701,  ...,  0.7419, -0.1806, -0.2217]],

        [[-0.2968,  0.1949, -0.1649,  ..., -0.4867,  0.7218, -0.1714],
         [-0.8375,  0.0612, -0.4641,  ...,  0.2327, -0.3889, -0.0770],
         [ 0.5614,  0.6919,  0.8915,  ..., -0.9472,  1.2411, -0.2056]]])

In [29]:
logits.shape

torch.Size([2, 3, 50257])

In [30]:
probas = torch.softmax(logits, dim=-1)
probas.shape

torch.Size([2, 3, 50257])

In [31]:
probas

tensor([[[1.8849e-05, 1.5172e-05, 1.1687e-05,  ..., 2.2409e-05,
          6.9776e-06, 1.8776e-05],
         [9.1569e-06, 1.0062e-05, 7.8786e-06,  ..., 2.9090e-05,
          6.0103e-06, 1.3571e-05],
         [2.9877e-05, 8.8507e-06, 1.5741e-05,  ..., 3.5456e-05,
          1.4094e-05, 1.3526e-05]],

        [[1.2561e-05, 2.0537e-05, 1.4332e-05,  ..., 1.0389e-05,
          3.4784e-05, 1.4239e-05],
         [7.2731e-06, 1.7864e-05, 1.0565e-05,  ..., 2.1207e-05,
          1.1390e-05, 1.5559e-05],
         [2.9496e-05, 3.3605e-05, 4.1029e-05,  ..., 6.5249e-06,
          5.8203e-05, 1.3698e-05]]])

In [32]:
token_ids = torch.argmax(probas, dim=-1, keepdim = True)
print("Toekn IDs:\n", token_ids)

Toekn IDs:
 tensor([[[16657],
         [  339],
         [42826]],

        [[49906],
         [29669],
         [41751]]])


In [34]:
print(f"Target Batch 1: {token_ids_to_text(targets[0], tokenizer)}")
print(f"Outputs batch 1: {token_ids_to_text(token_ids[0].flatten(), tokenizer)}")

Target Batch 1:  effort moves you
Outputs batch 1:  Armed heNetflix


In [35]:
text_ids = 0
target_probas_1 = probas[text_ids, [0, 1, 2], targets[text_ids]]
print("Text 1:", target_probas_1)

Text 1: tensor([7.4541e-05, 3.1061e-05, 1.1563e-05])


In [36]:
text_ids = 1
target_probas_2 = probas[text_ids, [0, 1, 2], targets[text_ids]]
print("Text 2:", target_probas_2)

Text 2: tensor([1.0337e-05, 5.6776e-05, 4.7559e-06])


In [37]:
# Compute logarithm of all token probabilities:

log_probas = torch.log(torch.cat((target_probas_1, target_probas_2)))
print(log_probas)

tensor([ -9.5042, -10.3796, -11.3677, -11.4798,  -9.7764, -12.2561])


In [38]:
-1 * torch.mean(log_probas)

tensor(10.7940)

In [40]:
logits.shape

torch.Size([2, 3, 50257])

In [41]:
logits_flat = logits.flatten(0, 1)
logits_flat.shape

torch.Size([6, 50257])

In [42]:
target_flat = targets.flatten(0, 1)
target_flat.shape

torch.Size([6])

In [46]:
import torch.nn as nn
torch.nn.functional.cross_entropy(logits_flat, target_flat)

tensor(10.7940)

### Calculating the training and validation set losses: