In [6]:
import torch
import torch.nn as nn
from GPT_module import TransformerBlock, LayerNorm


In [7]:
GPT_CONFIG_124M = {
    "vocab_size": 50257,  # vocabsize from BPE tokenizer
    "context_length": 1024,  # context length
    "emb_dim": 768,  # embedding dimension
    "n_heads": 12,  # number of attention heads
    "n_layers": 12,  # number of layers
    "drop_rate": 0.1,  # dropout rate
    "qkv_bias": False  # query-key-value bias
}


**GPT Model**

In [8]:

class GPTModel(nn.Module):
    def __init__(self,cfg):
        super().__init__()
        self.tok_emb=nn.Embedding(cfg["vocab_size"], cfg["emb_dim"])
        self.pos_emb=nn.Embedding(cfg["context_length"], cfg["emb_dim"])
        self.drop_emb=nn.Dropout(cfg["drop_rate"])

        self.trf_blocks=nn.Sequential(
            *[TransformerBlock(cfg=cfg) for _ in range(cfg["n_layers"])]
        )

        self.final_norm=LayerNorm(cfg["emb_dim"])
        self.out_head=nn.Linear(
            cfg["emb_dim"], cfg["vocab_size"], bias=False
        )


    def forward(self,in_idx): # in_idx: batch, no_tokens
        batch_size, seq_len=in_idx.shape
        tok_embeds=self.tok_emb(in_idx)
        pos_embeds=self.pos_emb(torch.arange(seq_len, device=in_idx.device))
        x=tok_embeds+pos_embeds
        x=self.drop_emb(x)
        x=self.trf_blocks(x)
        x=self.final_norm(x)
        logits=self.out_head(x)
        return logits




In [9]:
import tiktoken
tokenizer=tiktoken.get_encoding("gpt2")
batch = []
txt1 = 'Every effort moves you'
txt2 = "Every day holds a"

batch.append(torch.tensor(tokenizer.encode(txt1)))
batch.append(torch.tensor(tokenizer.encode(txt2)))
batch = torch.stack(batch, dim=0)

print(batch)  # token IDs for 2 texts
print(batch.shape)


tensor([[6109, 3626, 6100,  345],
        [6109, 1110, 6622,  257]])
torch.Size([2, 4])


In [10]:
torch.manual_seed(123)
model=GPTModel(GPT_CONFIG_124M)
out=model(batch)

print(out)
print(out.shape)

tensor([[[ 0.3613,  0.4222, -0.0711,  ...,  0.3483,  0.4661, -0.2838],
         [-0.1792, -0.5660, -0.9485,  ...,  0.0477,  0.5181, -0.3168],
         [ 0.7120,  0.0332,  0.1085,  ...,  0.1018, -0.4327, -0.2553],
         [-1.0076,  0.3418, -0.1190,  ...,  0.7195,  0.4023,  0.0532]],

        [[-0.2564,  0.0900,  0.0335,  ...,  0.2659,  0.4454, -0.6806],
         [ 0.1230,  0.3653, -0.2074,  ...,  0.7705,  0.2710,  0.2246],
         [ 1.0558,  1.0318, -0.2800,  ...,  0.6936,  0.3205, -0.3178],
         [-0.1565,  0.3926,  0.3288,  ...,  1.2630, -0.1858,  0.0388]]],
       grad_fn=<UnsafeViewBackward0>)
torch.Size([2, 4, 50257])


**GPT Model to generate text**

In [11]:
def generate_text_simple(model, idx, max_new_tokens, context_size):
    for _ in range(max_new_tokens):
        idx_cond=idx[:,-context_size:]
        with torch.no_grad():
            logits=model(idx_cond)

        logits=logits[:,-1,:]
        probs=torch.softmax(logits, dim=-1)
        idx_next=torch.argmax(probs, dim=-1, keepdim=True)
        idx=torch.cat((idx, idx_next),dim=1)

    return idx


In [12]:
start_context="Hello, I live in"
encoded=tokenizer.encode(start_context)
print(encoded)

[15496, 11, 314, 2107, 287]


In [13]:
encoded_tensor=torch.tensor(encoded).unsqueeze(0)
print(encoded_tensor)
print(encoded_tensor.shape)


tensor([[15496,    11,   314,  2107,   287]])
torch.Size([1, 5])


In [14]:
model.eval()
out=generate_text_simple(model=model, idx=encoded_tensor,max_new_tokens=6, context_size=GPT_CONFIG_124M["context_length"])
print(out)
print(len(out[0]))


tensor([[15496,    11,   314,  2107,   287, 26863, 22468, 21839,  2906, 11955,
         47269]])
11


In [15]:
decoded_text=tokenizer.decode(out.squeeze(0).tolist())
print(decoded_text)


Hello, I live in implicationfloat Mouse ce GreURI


*changing context_length, drop_rate for examples*

In [16]:
GPT_CONFIG_124M = {
    "vocab_size": 50257,  # vocabsize from BPE tokenizer
    "context_length": 256,  # context length
    "emb_dim": 768,  # embedding dimension
    "n_heads": 12,  # number of attention heads
    "n_layers": 12,  # number of layers
    "drop_rate": 0.1,  # dropout rate
    "qkv_bias": False  # query-key-value bias
}


**Utility functions for text to token ID conversion**

In [19]:
def text_to_token_ids(text, tokenizer):
    encoded=tokenizer.encode(text, allowed_special={'<|endoftext|>'})
    encoded_tensor=torch.tensor(encoded).unsqueeze(0) # added batch dim
    return encoded_tensor

def token_ids_to_text(token_ids, tokenizer):
    flat=token_ids.squeeze(0) # remove batch dim
    return tokenizer.decode(flat.tolist())


start_context="Every effort moves you"
# token_ids=text_to_token_ids(start_context, tokenizer)
# print(token_ids)

token_ids=generate_text_simple(model=model,idx=text_to_token_ids(text=start_context,tokenizer=tokenizer),max_new_tokens=10, context_size=GPT_CONFIG_124M["context_length"])
print(token_ids)


print(token_ids_to_text(token_ids=token_ids,tokenizer=tokenizer))


tensor([[ 6109,  3626,  6100,   345, 37532, 24086, 47843, 30961, 42348, 15635,
         24110, 43231, 30967, 42175]])
Every effort moves you Aeiman Byeswickattributeometer inspector Normandy freezerigrate
