# Model Testing Notebook

Testing notebook to ensure the GPT model class is working as expected.

In [6]:
from src.models.gpt import GPT
from src.models.configs.gpt_config import GPTConfig
import torch


In [7]:
class GPTConfig:
    block_size: int = 256
    vocab_size: int = 65  # GPT-2 use 50304
    n_layer: int = 12
    n_head: int = 12
    n_embd: int = 768
    dropout: float = 0.0
    bias: bool = True  # True: bias in Linears and LayerNorms, like GPT-2. False: a bit better and faster

model_config = GPTConfig

In [8]:
model = GPT(model_config)

INFO:GPT:Initializing GPT model...
INFO:GPT:Model initialized. Ready to go...
INFO:GPT:number of parameters: 85.105920M


## Training Step Check

In [9]:
sample_input = torch.randint(0, model_config.vocab_size-1, [32, model_config.block_size])
sample_label = torch.randint(0, model_config.vocab_size-1, [32, model_config.block_size])

In [10]:
model.train()
logits, loss = model(sample_input, sample_label)
print(logits.shape, loss)

torch.Size([32, 256, 65]) tensor(4.3526, grad_fn=<NllLossBackward0>)


## Inference Step Check

In [11]:
start_ids = torch.zeros([1])
idx = torch.tensor(start_ids, dtype=torch.long, device="cpu")[
            None, ...
        ]

  idx = torch.tensor(start_ids, dtype=torch.long, device="cpu")[


In [13]:
model.eval()
tokens = model.generate(idx, 1, 1, 10)
print(tokens.shape)
print(tokens)

torch.Size([1, 2])
tensor([[ 0, 10]])
