In [7]:
import os
from transformers import LlamaForSequenceClassification
from sklearn.metrics import roc_auc_score
import torch
from tqdm import tqdm

def compute_auroc_for_checkpoints(checkpoint_dir, tokenizer, eval_dataset):
    checkpoint_paths = sorted(
        [os.path.join(checkpoint_dir, d) for d in os.listdir(checkpoint_dir) if d.startswith("checkpoint-")],
        key=lambda x: int(x.split("-")[-1])
    )

    auroc_scores = {}

    for checkpoint_path in tqdm(checkpoint_paths, desc="Evaluating checkpoints"):
        try:
            # 모델 로드
            model = LlamaForSequenceClassification.from_pretrained(
                checkpoint_path,
                num_labels=2,
                torch_dtype=torch.bfloat16,
                device_map="auto"
            )
            model.eval()

            # DataLoader
            dataloader = torch.utils.data.DataLoader(eval_dataset, batch_size=4)  # 배치 사이즈 조절 가능
            all_preds, all_labels = [], []

            for batch in dataloader:
                input_ids = batch['input_ids'].to(model.device)
                attention_mask = batch['attention_mask'].to(model.device)
                labels = batch['labels'].cpu().numpy()

                with torch.no_grad():
                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                    probs = torch.softmax(outputs.logits, dim=-1)[:, 1].detach().cpu().numpy()

                all_preds.extend(probs)
                all_labels.extend(labels)

            auroc = roc_auc_score(all_labels, all_preds)
            step = checkpoint_path.split("-")[-1]
            auroc_scores[step] = auroc
            print(f"[checkpoint-{step}] AUROC: {auroc:.4f}")
        except Exception as e:
            print(f"⚠️ Failed to evaluate {checkpoint_path}: {e}")
            continue

    return auroc_scores

In [12]:

from transformers import AutoTokenizer
import pandas as pd
from datasets import Dataset, DatasetDict, ClassLabel
import json
import pickle
from sklearn.metrics import roc_auc_score
from transformers import LlamaTokenizer, LlamaForSequenceClassification, TrainingArguments, Trainer, TrainerCallback,TrainerState, TrainerControl
from peft import get_peft_model, LoraConfig, TaskType


model_name = "meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token


path = '../results/deepseek-ai/'
model = 'deepseek-r1-distill-qwen-14b'
hash_map = {}
cnt = 0
reasonings = []

with open(path + model + '_label.pkl', 'rb') as f:
    labels = pickle.load(f)

new_labels = []
with open(path + model + '.jsonl', 'r') as f:
    for i, line in enumerate(f):
        data = json.loads(line)
        question, answer, output, reasoning = data['question'], data['expected_output'], data['response'], data['reasoning']
        if question == None or output == None or reasoning == None:
            continue
        cnt += 1
        reasonings.append(data['reasoning'])
        new_labels.append(labels[cnt])

data_dict = {"text": reasonings, "label": new_labels}

raw_dataset = Dataset.from_dict(data_dict)

features = raw_dataset.features.copy()
features["label"] = ClassLabel(names=["negative", "positive"])  # 혹은 0/1이면 이렇게

raw_dataset = raw_dataset.cast(features)
raw_dataset = raw_dataset.rename_column("label", "labels")

train_valtest = raw_dataset.train_test_split(test_size=0.2, seed=42, stratify_by_column="labels")
val_test = train_valtest['test'].train_test_split(test_size=0.5, seed=42, stratify_by_column="labels")

# wrap in DatasetDict
dataset = DatasetDict({
    'train': train_valtest['train'],
    'validation': val_test['train'],
    'test': val_test['test'],
})
max_length = 8096
dataset = dataset.map(lambda e: tokenizer(e['text'], truncation=True, padding='max_length', max_length=max_length), batched=True)
dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

eval_dataset = dataset["test"]



Casting the dataset: 100%|██████████| 1733/1733 [00:00<00:00, 80281.08 examples/s]
Map: 100%|██████████| 1386/1386 [00:05<00:00, 256.92 examples/s]
Map: 100%|██████████| 173/173 [00:00<00:00, 308.14 examples/s]
Map: 100%|██████████| 174/174 [00:00<00:00, 288.52 examples/s]
Evaluating checkpoints: 0it [00:00, ?it/s]


{}


Fetching 4 files: 100%|██████████| 4/4 [00:00<00:00, 388.51it/s]
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.81it/s]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ValueError: max() arg is an empty sequence

In [13]:
checkpoint_dir = "/home/sangwon/deliberativealignment/llama-binary-cls/checkpoint-13860"
# predictions = trainer.predict(eval_dataset)
auroc_scores = compute_auroc_for_checkpoints(checkpoint_dir, tokenizer, eval_dataset)
print(auroc_scores)

base_model = LlamaForSequenceClassification.from_pretrained(
    checkpoint_dir,
    num_labels=2,
    torch_dtype=torch.float16,
    device_map="auto"  
)
base_model.config.pad_token_id = tokenizer.pad_token_id

# ========== 3. Setting Lora ==========
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS, 
    inference_mode=False,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"]  # Set proper modules for Llama
)

model = get_peft_model(base_model, peft_config)
model = model.bfloat16()
model.eval()


best_step = max(auroc_scores, key=auroc_scores.get)
print(f"\n✅ Best checkpoint: checkpoint-{best_step} with AUROC: {auroc_scores[best_step]:.4f}")

Evaluating checkpoints: 0it [00:00, ?it/s]


{}


Fetching 4 files: 100%|██████████| 4/4 [00:00<00:00, 539.23it/s]
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.81it/s]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.74it/s]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ValueError: max() arg is an empty sequence

In [16]:
model.loa

FileNotFoundError: [Errno 2] No such file or directory: '/home/sangwon/deliberativealignment/llama-binary-cls/checkpoint-13860/pytorch_model.bin'