In [1]:
import os
import sys

# Add the parent directory to Python path to import from src
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'src'))

from GPT_model import GPTModel
import torch

In [None]:
GPT_CONFIG_124M = {
    "vocab_size": 50304,    # Vocabulary size
    "context_length": 512, # Context length
    "emb_dim": 768,         # Embedding dimension
    "n_heads": 12,          # Number of attention heads
    "n_layers": 12,         # Number of layers
    "drop_rate": 0.1,       # Dropout rate
    "qkv_bias": False       # Query-Key-Value bias
}

In [3]:
torch.manual_seed(123)

model = GPTModel(GPT_CONFIG_124M)



In [4]:
model.load_state_dict(torch.load("../model.pth", map_location=torch.device("cpu")))
model.eval()  # Set model to evaluation mode

GPTModel(
  (tok_emb): Embedding(50304, 768)
  (pos_emb): Embedding(512, 768)
  (drop_emb): Dropout(p=0.1, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (att): PyTorchMultiHeadAttention(
        (qkv): Linear(in_features=768, out_features=2304, bias=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Linear(in_features=3072, out_features=768, bias=True)
        )
      )
      (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (drop_shortcut): Dropout(p=0.1, inplace=False)
    )
    (1): TransformerBlock(
      (att): PyTorchMultiHeadAttention(
        (qkv): Linear(in_features=768, out_features=2304, bias=False)
        (proj): Linear(in_features=768, out_feature

In [5]:
import tiktoken

from utils.generate_text_simple import generate_text_simple

def text_to_token_ids(text , tokenizer):
    encoded = tokenizer.encode(text , allowed_special={"<|endoftext|>"})
    encoded_tensot = torch.tensor(encoded).unsqueeze(0)
    return encoded_tensot


def token_ids_to_text(token_ids , tokenizer : tiktoken):
    return tokenizer.decode(token_ids.squeeze(0).tolist())

In [6]:
from tokenize import TokenInfo


text = "عبد الله "

tokenizer = tiktoken.get_encoding("gpt2")

token_ids =  text_to_token_ids(text , tokenizer)

token_ids

tensor([[44690, 39848, 38843, 28981, 13862, 29519,   220]])

In [7]:
token_ids_to_text(token_ids , tokenizer)

'عبد الله '

In [8]:
token_ids = generate_text_simple(
    model = model,
    idx = text_to_token_ids(text , tokenizer),
    max_new_tokens=50,
    context_size=GPT_CONFIG_124M["context_length"]
    
)
token_ids

tensor([[44690, 39848, 38843, 28981, 13862, 29519,   220, 13862, 12919, 17550,
            96,   148,   106, 39848, 26897, 23338, 12919, 17550,   117, 39848,
         38843, 28981, 13862, 29519, 17550,   101, 23338, 17550,   117, 39848,
         38843, 28981, 13862, 29519, 17550,    96, 23338, 29519, 17550,    96,
         23338, 17550,   109, 45692, 30335, 13862, 28981, 13862, 29519, 17550,
           113, 13862,   149,   231, 28981, 13862, 29519]])

In [9]:
token_ids_to_text(token_ids , tokenizer)

'عبد الله لا أخبرنا عبد الله بن عبد الله أنه أن رسول الله صلى الله'

# Temperature scaling

In [10]:
vocab = { 
    "closer": 0,
    "every": 1, 
    "effort": 2, 
    "forward": 3,
    "inches": 4,
    "moves": 5, 
    "pizza": 6,
    "toward": 7,
    "you": 8,
} 

inverse_vocab = {v: k for k, v in vocab.items()}

# Suppose input is "every effort moves you", and the LLM
# returns the following logits for the next token:
next_token_logits = torch.tensor(
    [4.51, 0.89, -1.90, 6.75, 1.63, -1.62, -1.89, 6.28, 1.79]
)

probas = torch.softmax(next_token_logits, dim=0)
next_token_id = torch.argmax(probas).item()

# The next generated token is then as follows:
print(inverse_vocab[next_token_id])


forward


In [14]:
# torch.manual_seed(123)
next_token_id = torch.multinomial(probas, num_samples=1).item()
print(inverse_vocab[next_token_id])

forward


In [17]:
10/5

2.0

In [19]:
def softmax_with_temperature(logits , temperature):
    scaled_logits = logits / temperature
    return torch.softmax(scaled_logits , dim =0)

# Top-K

# modifay the generation function 

In [20]:
import torch



In [23]:
def generate(model, idx, max_new_tokens, context_size, temperature=0.0, top_k=None, eos_id=None):

    # For-loop is the same as before: Get logits, and only focus on last time step
    for _ in range(max_new_tokens):
        idx_cond = idx[:, -context_size:]
        with torch.no_grad():
            logits = model(idx_cond)
        logits = logits[:, -1, :]

        # New: Filter logits with top_k sampling
        if top_k is not None:
            # Keep only top_k values
            top_logits, _ = torch.topk(logits, top_k)
            min_val = top_logits[:, -1]
            logits = torch.where(logits < min_val, torch.tensor(float("-inf")).to(logits.device), logits)

        # New: Apply temperature scaling
        if temperature > 0.0:
            logits = logits / temperature

            # Apply softmax to get probabilities
            probs = torch.softmax(logits, dim=-1)  # (batch_size, context_len)

            # Sample from the distribution
            idx_next = torch.multinomial(probs, num_samples=1)  # (batch_size, 1)

        # Otherwise same as before: get idx of the vocab entry with the highest logits value
        else:
            idx_next = torch.argmax(logits, dim=-1, keepdim=True)  # (batch_size, 1)

        if idx_next == eos_id:  # Stop generating early if end-of-sequence token is encountered and eos_id is specified
            break

        # Same as before: append sampled index to the running sequence
        idx = torch.cat((idx, idx_next), dim=1)  # (batch_size, num_tokens+1)

    return idx

In [30]:
from tokenize import TokenInfo


text = "قال رسول الله صلي الله عليه وسلم"

tokenizer = tiktoken.get_encoding("gpt2")

token_ids =  text_to_token_ids(text , tokenizer)

token_ids
token_ids_to_text(token_ids , tokenizer)


'قال رسول الله صلي الله عليه وسلم'

In [37]:
token_ids = generate(
    model = model,
    idx = text_to_token_ids(text , tokenizer),
    max_new_tokens=50,
    context_size=GPT_CONFIG_124M["context_length"],
    temperature=0.0,
    top_k=3
    
)
token_ids

tensor([[  149,   224, 23525, 17550,   109, 45692, 30335, 13862, 28981, 13862,
         29519, 17550,   113, 13862, 22654, 28981, 13862, 29519, 17550,   117,
         13862, 22654, 29519, 42092, 45692, 13862, 25405, 18923,   232,   149,
           224, 30335, 13862,   357,   148,    96,   148,   255,   148,   105,
             8, 18923,   223, 22654, 28981,   148,    96,   148,   255,   148,
           105, 17550,   101, 34247,   101, 17550,   255, 38843, 22654,   148,
           104,   220,   151,   109,   151,   111,   151,   111,   151,   111,
           151,   111,   151,   111,   151,   111, 17550]])

In [38]:
token_ids_to_text(token_ids , tokenizer)

'قال رسول الله صلي الله عليه وسلم يقول (أحج) في الأحج باب حديث ۱۳۳۳۳۳۳ �'