In [1]:
%%capture
%run 6.1_Coding_The_GPT_Model.ipynb

In [2]:
# Exercise 4.1 Number of Parameters in Feed Forward and MHA Modules

trf_block = TransformerBlock(GPT_CONFIG_124M)

ffn_params = sum(p.numel() for p in trf_block.ff.parameters())
ffn_params_total = sum(p.numel() for p in block.ff.parameters()) * 12

mha_params = sum(p.numel() for p in block.att.parameters())
mha_params_total = sum(p.numel() for p in block.att.parameters()) * 12

In [3]:
print(f"01 Feed forward Module Parameters: {ffn_params:,}")
print(f"12 Feed forward Module Parameters: {ffn_params_total:,}")

01 Feed forward Module Parameters: 4,722,432
12 Feed forward Module Parameters: 56,669,184


In [4]:
print(f"01 Multi-Head Atttention Module Parameters: {mha_params:,}")
print(f"12 Multi-Head Atttention Module Parameters: {mha_params_total:,}")

01 Multi-Head Atttention Module Parameters: 2,360,064
12 Multi-Head Atttention Module Parameters: 28,320,768


In [5]:
# Exercise 4.2 Initializing larger GPT models

GPT_CONFIG = {
    "vocab_size": 50257,        # Vocabulary Size
    "context_length": 1024,     # Context Length
    "emb_dim": 768,             # Embedding Dimension
    "n_heads": 12,              # Number of Attention Heads
    "n_layers": 12,             # Number of Transformer Blocks
    "drop_rate": 0.1,           # Dropout Rate
    "qkv_bias": False           # Query-Key-Value bias
}

In [6]:
def get_config(base_config, model_name="gpt2-small"):
    GPT_CONFIG = base_config.copy()

    if model_name == "gpt2-small":
        GPT_CONFIG["emb_dim"] = 768
        GPT_CONFIG["n_layers"] = 12
        GPT_CONFIG["n_heads"] = 12
    
    elif model_name == "gpt2-medium":
        GPT_CONFIG["emb_dim"] = 1024
        GPT_CONFIG["n_layers"] = 24
        GPT_CONFIG["n_heads"] = 16
    
    elif model_name == "gpt2-large":
        GPT_CONFIG["emb_dim"] = 1280
        GPT_CONFIG["n_layers"] = 36
        GPT_CONFIG["n_heads"] = 20
    
    elif model_name == "gpt2-xl":
        GPT_CONFIG["emb_dim"] = 1600
        GPT_CONFIG["n_layers"] = 48
        GPT_CONFIG["n_heads"] = 25
    
    else:
        raise ValueError(f"Incorrect Model Name {model_name}")
    
    return GPT_CONFIG

In [7]:
def calculate_size(model):

    total_params = sum(p.numel() for p in model.parameters())
    
    # Total Parameters using Weight Tying
    total_params_gpt2 = total_params - sum(p.numel() for p in model.out_head.parameters())
    print(f"Number of Trainable Parameters using Weight Tying => {total_params_gpt2:,}")
    
    # Total size in Bytes (assuming float32, 4 bytes per parameter)
    total_size_bytes = total_params * 4
    
    # Convert to Megabytes
    total_size_mb = total_size_bytes / (1024 * 1024)

    print(f"Total Size of the Model: {total_size_mb:.2f} MB")

In [8]:
model_sizes = ["small", "medium", "large", "xl"]

model_name = f"gpt2-{model_sizes[2]}"
CONFIG = get_config(GPT_CONFIG, model_name=model_name)

model = GPTModel(CONFIG)
print(f"{model_name}:")
calculate_size(model)

gpt2-large:
Number of Trainable Parameters using Weight Tying => 773,891,840
Total Size of the Model: 3197.56 MB
