In [9]:
import sys
import os

sys.path.append("/Users/benjawesome/coding/positional-gpt-2")\

import torch
from models.gpt_implementation import GPT2Model
from data.dataloader import GPTLoader

data_dir = os.path.join("..", "files")
model_path = os.path.join(data_dir, "gpt2_model.pt")
val_path = os.path.join(data_dir, "encoded_val.pt")

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

ACTIVATION = torch.nn.GELU()
NUM_EPOCHS = 5
LR = 0.0003
BATCH_SIZE = 32
D_MODEL = 256
MAX_LEN = 512
STRIDE = 256
VOCAB_SIZE = 5001
N_LAYERS = 6
NUM_HEADS = 8
DROPOUT = 0.1   
D_FF = D_MODEL * 4
WEIGHT_DECAY = 0.01

model = GPT2Model(VOCAB_SIZE, N_LAYERS, D_MODEL, NUM_HEADS, D_FF, ACTIVATION, DROPOUT, device, MAX_LEN)

model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

print("Model loaded successfully.")

val_data = torch.load(val_path)

val_dataset = GPTLoader(val_path, MAX_LEN, 10)

val_dataloader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,        
    shuffle=False
)

total_correct = 0
total_tokens = 0

model.eval()
print("Running validation (Accuracy)...")

with torch.no_grad():
    for inputs, targets in val_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        outputs, _ = model(inputs)
        
        predictions = torch.argmax(outputs, dim=-1)
        
        correct_preds = (predictions == targets)
        
        total_correct += correct_preds.sum().item()
        total_tokens += targets.numel()

# 4. Calculate final accuracy
final_accuracy = (total_correct / total_tokens) * 100

print("-" * 30)
print(f"Validation Accuracy: {final_accuracy:.2f}%")
print(f"Total Tokens Checked: {total_tokens}")
print("-" * 30)

Model loaded successfully.
Running validation (Accuracy)...
------------------------------
Validation Accuracy: 25.88%
Total Tokens Checked: 14650368
------------------------------
