In [1]:
import torch
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from tqdm import tqdm
import math

# Config
base_model = r"D:\AI-Inosuke\models\Qwen2.5-3B-Instruct"
lora_model = r"D:\AI-Inosuke\models\inosuke-lora\checkpoint-2605"
test_file = r"D:\AI-Inosuke\test\test_set.jsonl"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

# Load model with 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto"   
)

print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(model, lora_model)

model.eval()  #

# Load test set 
print("Loading test data...")
test_data = []
with open(test_file, "r", encoding="utf-8") as f:
    for line in f:
        ex = json.loads(line)
        
        instr = ex["instruction"]
        inp = ex.get("input", "")
        out = ex["output"]

        if inp and inp.strip():
            text = f"### Instruction:\n{instr}\n\n### Input:\n{inp}\n\n### Response:\n{out}"
        else:
            text = f"### Instruction:\n{instr}\n\n### Response:\n{out}"
        test_data.append(text)

# Compute perplexity 
print("Computing perplexity...")

total_loss = 0
total_tokens = 0

with torch.no_grad():
    for text in tqdm(test_data):
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
        input_ids = inputs["input_ids"].to(model.device)

        # Shift labels = input_ids
        outputs = model(input_ids=input_ids, labels=input_ids)
        loss = outputs.loss
        num_tokens = input_ids.numel()

        total_loss += loss.item() * num_tokens
        total_tokens += num_tokens

avg_loss = total_loss / total_tokens
ppl = math.exp(avg_loss)

print(f"\n=== Perplexity on test set: {ppl:.2f} ===")


  from .autonotebook import tqdm as notebook_tqdm


Loading base model...


Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00,  4.92s/it]


Loading LoRA adapter...
Loading test data...
Computing perplexity...


100%|██████████| 5/5 [00:01<00:00,  3.84it/s]


=== Perplexity on test set: 8.23 ===



