In [None]:
import torch
import pandas as pd
from tqdm import tqdm
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    AutoModelForSequenceClassification
)
from peft import PeftModel

# 表示設定（DataFrameの全文が見えるように）
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 100)

# デバイス設定
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

In [None]:
# ================= CONFIGURATION =================
# ベースモデル
BASE_MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"

# ★ここに学習後の保存フォルダパスを指定してください★
# 例: "./saved_models/run-007-large-batch-final"
ADAPTER_PATH = "./saved_models/run-007-large-batch-final" 

# 評価用Reward Model (学習時と同じもの)
RM_MODEL_NAME = "OpenAssistant/reward-model-deberta-v3-large-v2"

# 生成パラメータ
GEN_PARAMS = {
    "max_new_tokens": 256,
    "do_sample": True,
    "temperature": 0.7,
    "top_p": 0.9,
    "repetition_penalty": 1.1,
}
# =================================================

In [None]:
# 学習時と同じシステムプロンプト
base_inst = (
    "You are an expert LLM researcher. Propose a novel and concrete research idea "
    "about large language models.\n"
    "Output ONLY in the following format:\n\n"
    "Title: <concise LLM research title>\n"
    "Abstract: <150-220 word abstract with motivation, approach, and contribution>\n"
)

# 評価用のお題リスト (学習データにないような少しひねったものを混ぜる)
test_topics = [
    "Neuromorphic computing integration with LLMs",
    "LLM-based autonomous scientific discovery agents",
    "Privacy-preserving distributed training via Homomorphic Encryption",
    "Understanding sarcasm and humor in ancient languages",
    "Reducing energy consumption of inference by 90%",
    "explainable AI for medical diagnosis using causality",
]

prompts = []
for t in test_topics:
    txt = f"Draft a research proposal about {t}.\n{base_inst}"
    prompts.append(txt)

print(f"Test Prompts: {len(prompts)} items")

In [None]:
# 結果格納用リスト
results = []

print("=== 1. Loading Base Model ===")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, use_fast=False)
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto",
)

# --- A. ベースモデルでの生成 ---
print("Generating with BASE model...")
base_responses = []

for p in tqdm(prompts):
    inputs = tokenizer(p, return_tensors="pt").to(base_model.device)
    with torch.no_grad():
        outputs = base_model.generate(**inputs, **GEN_PARAMS)
    
    # プロンプト部分を除去して回答だけ抽出
    resp = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
    base_responses.append(resp)

# --- B. 学習済みモデル(LoRA)での生成 ---
print("=== 2. Loading LoRA Adapter ===")
# ベースモデルにLoRAを合体させる（メモリ効率良し）
tuned_model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
tuned_model.eval()

print("Generating with TRAINED model...")
tuned_responses = []

for p in tqdm(prompts):
    inputs = tokenizer(p, return_tensors="pt").to(tuned_model.device)
    with torch.no_grad():
        outputs = tuned_model.generate(**inputs, **GEN_PARAMS)
    
    resp = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
    tuned_responses.append(resp)

# メモリ解放
del base_model
del tuned_model
torch.cuda.empty_cache()

In [None]:
print("=== 3. Scoring with Reward Model ===")
# RMのロード
rm_tokenizer = AutoTokenizer.from_pretrained(RM_MODEL_NAME)
rm_model = AutoModelForSequenceClassification.from_pretrained(
    RM_MODEL_NAME, torch_dtype=torch.float16
).to(device)
rm_model.eval()

def get_score(prompt, response):
    # RMに入力する形式に整形 (User/Assistant形式)
    # ※モデルによってフォーマットが異なりますが、OpenAssistant系はこれで概ねOK
    text = f"User: {prompt}\nAssistant: {response}"
    inputs = rm_tokenizer(text, return_tensors="pt", truncation=True, max_length=1024).to(device)
    with torch.no_grad():
        score = rm_model(**inputs).logits[0].cpu().item()
    return score

# データフレーム作成
df_data = []
for p, b_resp, t_resp in zip(prompts, base_responses, tuned_responses):
    b_score = get_score(p, b_resp)
    t_score = get_score(p, t_resp)
    
    df_data.append({
        "Topic": p.split("about ")[1].split(".")[0], # トピック名抽出（簡易）
        "Base_Reward": b_score,
        "Tuned_Reward": t_score,
        "Diff": t_score - b_score,
        "Win": "Trained" if t_score > b_score else "Base",
        "Base_Response": b_resp,
        "Tuned_Response": t_resp
    })

df = pd.DataFrame(df_data)

In [None]:
print("=== Evaluation Report ===")

# 平均スコアの比較
avg_base = df["Base_Reward"].mean()
avg_tuned = df["Tuned_Reward"].mean()
win_rate = (df["Win"] == "Trained").mean() * 100

print(f"Average Reward (Base)   : {avg_base:.4f}")
print(f"Average Reward (Trained): {avg_tuned:.4f}")
print(f"Improvement             : {avg_tuned - avg_base:+.4f}")
print(f"Win Rate (Trained > Base): {win_rate:.1f}%")

print("\n=== Detailed Examples ===")
# 詳細テーブルを表示（スコア差が大きい順）
display_cols = ["Topic", "Base_Reward", "Tuned_Reward", "Diff", "Win"]
display(df[display_cols].sort_values("Diff", ascending=False))

# 具体的な文章の中身を確認したい場合
# 一番改善したケースを表示
best_case = df.sort_values("Diff", ascending=False).iloc[0]
print("\n" + "="*50)
print(f"★ Best Improvement Case (Topic: {best_case['Topic']})")
print("="*50)
print(f"[Base Model] (Score: {best_case['Base_Reward']:.3f})\n{best_case['Base_Response'][:300]}...\n")
print(f"[Trained Model] (Score: {best_case['Tuned_Reward']:.3f})\n{best_case['Tuned_Response'][:300]}...")