In [None]:
import os
import pandas as pd
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_metric

BASE_DIR = "/root/nlp_data"
MODEL_DIR = os.path.join(BASE_DIR, "t5_large_v5", "final_model")  # 네 output_dir/final_model 경로로 수정
DEV_PATH = os.path.join(BASE_DIR, "dev.csv")

tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_DIR).to("cuda")

rouge = load_metric("rouge")

# dev 로드
df_dev = pd.read_csv(DEV_PATH)[["dialogue", "summary"]]
dev_ds = Dataset.from_pandas(df_dev)

def generate_batch(batch):
    inputs = tokenizer(
        batch["dialogue"],
        max_length=512,
        truncation=True,
        padding=True,
        return_tensors="pt"
    ).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=128,
            num_beams=4,
            early_stopping=True
        )
    preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return preds

all_preds = []
all_labels = []

batch_size = 8
for i in range(0, len(dev_ds), batch_size):
    batch = dev_ds[i:i+batch_size]
    preds = generate_batch(batch)
    labels = [s.strip() for s in batch["summary"]]
    all_preds.extend([p.strip() for p in preds])
    all_labels.extend(labels)

scores = rouge.compute(predictions=all_preds, references=all_labels, use_stemmer=True)
print("ROUGE-1:", scores["rouge1"].mid.fmeasure)
print("ROUGE-2:", scores["rouge2"].mid.fmeasure)
print("ROUGE-L:", scores["rougeL"].mid.fmeasure)


In [None]:
SAMSUM_VALID = os.path.join(BASE_DIR, "samsum", "samsum_valid_ko.csv")
df_sv = pd.read_csv(SAMSUM_VALID)[["dialogue_ko", "summary_ko"]]
df_sv = df_sv.rename(columns={"dialogue_ko": "dialogue", "summary_ko": "summary"})
sv_ds = Dataset.from_pandas(df_sv)

# 위에서 만든 generate_batch 재사용
all_preds, all_labels = [], []

for i in range(0, len(sv_ds), batch_size):
    batch = sv_ds[i:i+batch_size]
    preds = generate_batch(batch)
    labels = [s.strip() for s in batch["summary"]]
    all_preds.extend([p.strip() for p in preds])
    all_labels.extend(labels)

scores_sv = rouge.compute(predictions=all_preds, references=all_labels, use_stemmer=True)
print("SamSum-ko ROUGE-1:", scores_sv["rouge1"].mid.fmeasure)
print("SamSum-ko ROUGE-2:", scores_sv["rouge2"].mid.fmeasure)
print("SamSum-ko ROUGE-L:", scores_sv["rougeL"].mid.fmeasure)
