In [17]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# =========================
# 1. 데이터셋 불러오기
# =========================
dataset = load_dataset("piqa", trust_remote_code=True)

# =========================
# 2. 모델과 토크나이저 불러오기
# =========================
model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
labels = ["entailment", "neutral", "contradiction"]

# =========================
# 3. PIQA 샘플 평가 함수 (방법2 적용)
# =========================
def evaluate_piqa_sample(index: int, split="train"):
    """주어진 index의 PIQA 샘플을 NLI 모델로 평가하고 최종 선택지 추천"""
    sample = dataset[split][index]
    
    premise = sample['goal']
    choices = [sample['sol1'], sample['sol2']]
    answer = sample['label']  # 0 또는 1

    print(f"=== Sample #{index} ({split}) ===")
    print(f"Goal: {premise}")
    print(f"Choice 0: {choices[0]}")
    print(f"Choice 1: {choices[1]}")
    print(f"Answer: Choice {answer}\n")

    entailment_minus_contradiction = []

    for i, choice in enumerate(choices):
        # 토크나이징
        inputs = tokenizer(premise, choice, return_tensors="pt", truncation=True)

        # 모델 예측
        with torch.no_grad():
            logits = model(**inputs).logits

        probs = F.softmax(logits, dim=-1)

        # entailment - contradiction 점수 계산
        score = probs[0][0].item() - probs[0][2].item()
        entailment_minus_contradiction.append(score)

        # 결과 출력
        print(f"--- Choice {i} ---")
        for label, prob in zip(labels, probs[0]):
            print(f"{label}: {prob:.4f}")
        pred_label = labels[probs.argmax()]
        print(f"Prediction: {pred_label}")
        print(f"Entailment - Contradiction score: {score:.4f}\n")

    # 최종 추천 선택지
    final_choice = entailment_minus_contradiction.index(max(entailment_minus_contradiction))
    print(f"Recommended Choice: {final_choice} (score {max(entailment_minus_contradiction):.4f})\n")

# =========================
# 4. 사용 예시
# =========================
evaluate_piqa_sample(0)   # train셋 2번 샘플 평가
evaluate_piqa_sample(4, split="validation")  # validation셋 5번 샘플 평가


=== Sample #0 (train) ===
Goal: When boiling butter, when it's ready, you can
Choice 0: Pour it onto a plate
Choice 1: Pour it into a jar
Answer: Choice 1

--- Choice 0 ---
entailment: 0.0048
neutral: 0.9884
contradiction: 0.0068
Prediction: neutral
Entailment - Contradiction score: -0.0020

--- Choice 1 ---
entailment: 0.0035
neutral: 0.9784
contradiction: 0.0181
Prediction: neutral
Entailment - Contradiction score: -0.0146

Recommended Choice: 0 (score -0.0020)

=== Sample #4 (validation) ===
Goal: ice box
Choice 0: will turn into a cooler if you add water to it
Choice 1: will turn into a cooler if you add soda to it
Answer: Choice 0

--- Choice 0 ---
entailment: 0.0023
neutral: 0.9925
contradiction: 0.0052
Prediction: neutral
Entailment - Contradiction score: -0.0029

--- Choice 1 ---
entailment: 0.0027
neutral: 0.9896
contradiction: 0.0077
Prediction: neutral
Entailment - Contradiction score: -0.0050

Recommended Choice: 0 (score -0.0029)



## Instructured LLM + Chain-of-Reasoning + pairwise 평가

### flan-t5

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import json, re

# =========================
# 1. PIQA 데이터셋 불러오기
# =========================
dataset = load_dataset("piqa", trust_remote_code=True)

# =========================
# 2. Instruction-tuned LLM 불러오기
# =========================
model_name = "google/flan-t5-base"  # 예시, 다른 instruction LLM 가능
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# =========================
# 3. 평가 함수 (CoT + pairwise 점수화)
# =========================
def evaluate_piqa_sample_llm(index: int, split="train", max_new_tokens=200):
    sample = dataset[split][index]
    goal = sample['goal']
    choices = [sample['sol1'], sample['sol2']]
    answer = sample['label']

    print(f"=== Sample #{index} ({split}) ===")
    print(f"Goal: {goal}")
    print(f"Choice 0: {choices[0]}")
    print(f"Choice 1: {choices[1]}")
    print(f"Answer: Choice {answer}\n")

    # Prompt 생성
    prompt = f"""
Goal: {goal}

Choice 0: {choices[0]}
Choice 1: {choices[1]}

For each choice:
1. Explain step-by-step reasoning why this choice may or may not achieve the goal.
2. Give a score from 0 to 1 for effectiveness.

Respond in JSON format:
{{
 "Choice 0": {{"reasoning": "...", "score": 0.0}},
 "Choice 1": {{"reasoning": "...", "score": 0.0}}
}}
"""

    # Tokenize + Generate
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        output_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # JSON 파싱 및 점수 추출 (robust)
    try:
        output_json = json.loads(output_text)
    except:
        output_json = output_text  # JSON 파싱 실패 시 문자열 그대로

    scores = []
    for i in range(2):
        # case 1: output_json이 dict일 경우
        if isinstance(output_json, dict) and f"Choice {i}" in output_json:
            entry = output_json[f"Choice {i}"]
            if isinstance(entry, dict):
                reasoning = entry.get("reasoning", "")
                score = float(entry.get("score", 0.0))
            else:
                # dict 아닌 경우 숫자로 변환
                reasoning = ""
                score = float(entry)
        else:
            # case 2: 단순 문자열에서 점수 추출
            reasoning = ""
            match = re.search(f"Choice {i}.*?([0-9]*\\.?[0-9]+)", str(output_json))
            score = float(match.group(1)) if match else 0.0

        scores.append(score)
        print(f"--- Choice {i} ---")
        print(f"Reasoning: {reasoning}")
        print(f"Score: {score}\n")

    recommended_choice = scores.index(max(scores))
    print(f"Recommended Choice: {recommended_choice} (score {max(scores):.4f})\n")
    
    
# 4. 사용 예시
# =========================
evaluate_piqa_sample_llm(2)   # train셋 2번 샘플 평가
evaluate_piqa_sample_llm(5, split="validation")  # validation셋 5번 샘플 평가


=== Sample #2 (train) ===
Goal: how do you indent something?
Choice 0: leave a space before starting the writing
Choice 1: press the spacebar
Answer: Choice 0

--- Choice 0 ---
Reasoning: 
Score: 0.0

--- Choice 1 ---
Reasoning: 
Score: 0.0

Recommended Choice: 0 (score 0.0000)

=== Sample #5 (validation) ===
Goal: Remove soap scum from shower door.
Choice 0: Rub hard with bed sheets, then rinse.
Choice 1: Rub hard with dryer sheets, then rinse.
Answer: Choice 1

--- Choice 0 ---
Reasoning: 
Score: 0.0

--- Choice 1 ---
Reasoning: 
Score: 0.0

Recommended Choice: 0 (score 0.0000)



In [None]:
# =========================
# 1️⃣ XNLI 점수 계산 및 저장
# =========================
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import json
from tqdm import tqdm

# 데이터셋
dataset = load_dataset("piqa", trust_remote_code=True)

# XNLI 모델
xnli_model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"
xnli_tokenizer = AutoTokenizer.from_pretrained(xnli_model_name)
xnli_model = AutoModelForSequenceClassification.from_pretrained(xnli_model_name)
labels = ["entailment", "neutral", "contradiction"]

# XNLI 점수 계산 함수
def get_xnli_scores(premise, hypothesis):
    inputs = xnli_tokenizer(premise, hypothesis, return_tensors="pt", truncation=True)
    with torch.no_grad():
        logits = xnli_model(**inputs).logits
    probs = F.softmax(logits, dim=-1)[0].tolist()
    return {
        "entailment": probs[0],
        "neutral": probs[1],
        "contradiction": probs[2],
        "ec_score": probs[0] - probs[2]
    }

# 데이터셋별 XNLI 점수 계산 및 저장
def save_xnli_scores(split="train", output_file="piqa_train_xnli.json"):
    results = []
    for idx in tqdm(range(len(dataset[split]))):
        sample = dataset[split][idx]
        goal = sample['goal']
        choices = [sample['sol1'], sample['sol2']]
        answer = sample['label']

        scores = [get_xnli_scores(goal, c) for c in choices]

        result = {
            "index": idx,
            "goal": goal,
            "choices": choices,
            "answer": answer,
            "xnli_scores": scores
        }
        results.append(result)

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    print(f"Saved {len(results)} samples to {output_file}")

# 예시 실행
save_xnli_scores(split="train", output_file="piqa_train_xnli.json")
save_xnli_scores(split="validation", output_file="piqa_validation_xnli.json")
