# Loading and Using Finetuned Models

This notebook contains simplified code to load a fine-tuned classification model, which in this case is based on GPT-2 Medium (355M). The detailed code can be found in [Notebook 05-finetuning-text-classification.ipynb](05-finetuning-text-classification.ipynb)

In [1]:
from importlib.metadata import version

pkgs = [
    "tiktoken",
    "torch"
]

for p in pkgs:
    print(f"{p} version: {version(p)}")

tiktoken version: 0.9.0
torch version: 2.7.0


In [2]:
from pathlib import Path

ft_model_path = Path("models/review_classifier_gpt2_medium.pth")

if not ft_model_path.exists():
    print(f"Couldn't find {ft_model_path}.\n" 
           "Re-run NB 05-finetuning-text-classification to fine tune and save a model.")

In [4]:
from utils.components import GPTModel

BASE_CONFIG = {
    "vocab_size": 50257,     # Vocabulary size
    "context_length": 1024,  # Context length
    "drop_rate": 0.0,        # Dropout rate
    "qkv_bias": True         # Query-key-value bias
}

model_configs = {
    "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
    "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
    "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
    "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
}

CHOOSE_MODEL = "gpt2-medium (355M)"

BASE_CONFIG.update(model_configs[CHOOSE_MODEL])

# Initialize 
model = GPTModel(BASE_CONFIG)

In [7]:
import torch 

# Convert model to classifier 
num_classes = 2
model.out_head = torch.nn.Linear(in_features=BASE_CONFIG["emb_dim"], out_features=num_classes)

# Load the pretrained weights 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(ft_model_path, map_location=device, weights_only=True))
model.to(device)
model.eval();

In [8]:
import tiktoken

tokenizer = tiktoken.get_encoding("gpt2")

In [9]:
# Function to classify a review
def classify_review(text, model, tokenizer, device, max_length=None, pad_token_id=50256):
    model.eval()
    
    # Prepare inputs to the model
    input_ids = tokenizer.encode(text)
    supported_context_length = model.pos_emb.weight.shape[0]
    
    # Truncate sequences if they are too long
    input_ids = input_ids[:min(max_length, supported_context_length)]
    
    # Pad sequences to the longest sequence
    input_ids += [pad_token_id] * (max_length - len(input_ids))
    input_tensor = torch.tensor(input_ids, device=device).unsqueeze(0) # added batch dim
    
    # Inference
    with torch.no_grad():
        logits = model(input_tensor.to(device))[:, -1, :] # Logits of the last output token
    predicted_label = torch.argmax(logits, dim=-1).item()
    
    # Return classified result
    return "spam" if predicted_label == 1 else "not spam"

In [17]:
text_1 = (
    "Narrated by Liam Neeson, Everest follows a daring team of mountaineers on a quest to summit Earthâ€™s highest peak."
    " Captured with astonishing clarity, this awe-inspiring film takes you deep into the majesty of the Himalayas."
)

print(classify_review(text_1, model, tokenizer, device, max_length=120))

spam


In [26]:
text_2 = (
    "Hi Bilal, I have attached the rental ledger. " 
    " We would like to start advertising this property from now."
)

print(classify_review(text_2, model, tokenizer, device, max_length=120))


not spam
