# Quick Benchmark Test

빠른 검증을 위한 최소 샘플 벤치마크

In [None]:
import os
import sys
import torch
from transformers import AutoTokenizer

current_dir = os.getcwd()
if current_dir not in sys.path:
    sys.path.append(current_dir)

from modeling_llada import LLaDAModelLM
from configuration_llada import LLaDAConfig
import experiment_utils
import decoding

print("✅ Modules loaded")

In [None]:
# Load model
HF_MODEL_ID = "GSAI-ML/LLaDA-8B-Base"
config = LLaDAConfig.from_pretrained(HF_MODEL_ID)
model = LLaDAModelLM.from_pretrained(HF_MODEL_ID, config=config, torch_dtype="auto")

if torch.cuda.is_available():
    model.cuda()
model.eval()

tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_ID)
print("✅ Model loaded")

In [None]:
# Quick test with minimal samples
print("Starting quick test (2 samples, 2 configurations)...")

results_df = experiment_utils.run_academic_benchmark(
    model=model,
    tokenizer=tokenizer,
    thresholds=[0.05, 0.10],  # Only 2 configurations
    samples=2,  # Only 2 samples
    steps=32,  # Reduced steps
    gen_length=32,  # Reduced length
    block_length=32,
    remask_budget=0.05,
    alpha_decay=0.05
)

print("\n✅ Test completed!")
print(f"Results shape: {results_df.shape}")
print("\nFirst few rows:")
print(results_df.head())

In [None]:
# Quick analysis
print("\n=== Quick Summary ===")
summary = results_df.groupby('Threshold')[['Acc_Exp', 'PPL_Delta', 'Stability_Delta']].mean()
print(summary)