<a href="https://colab.research.google.com/github/jjbmsda/Kaggle/blob/main/llms_you_cant_please_them_all/llms_you_cant_please_them_all_v4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import torch
import random
import gc
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# ✅ Kaggle에서 제공하는 test.csv 불러오기
test_data = pd.read_csv("/kaggle/input/llms-you-cant-please-them-all/test.csv")

# ✅ 불필요한 변수 정리 및 GPU 캐시 비우기
gc.collect()
torch.cuda.empty_cache()

# ✅ 1. GPU 사용 가능 여부 확인
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

if device == "cuda":
    print(f"✅ GPU is available: {torch.cuda.get_device_name(0)}")
else:
    print("⚠ GPU is NOT available. Check Kaggle settings.")

# ✅ 2. `cuda:0`에서 실행되도록 강제 설정
torch.cuda.set_device(0)

# ✅ 3. 모델 로드 (🚀 더 강력한 모델 사용)
MODEL_PATH = "/kaggle/input/mistral-7b/transformers/mistral-7b-instruct/1/"
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    torch_dtype=torch.bfloat16,  # 🔥 `bfloat16` 사용하여 메모리 절약
    device_map={"": 0},  # 🔥 `cuda:0`에서만 실행되도록 강제 설정
    trust_remote_code=True
)

# ✅ 모델이 GPU에 있는지 확인
print(f"Model is on: {next(model.parameters()).device}")

# ✅ 4. 텍스트 생성 파이프라인 (🚀 더 강력한 모델 최적화)
llm_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    temperature=2.5,  # 🔥 논란적인 문장을 많이 생성하도록 창의성 증가
    top_p=0.5,  # 🔥 같은 표현 반복 줄이기
    top_k=50,
    do_sample=True
)

# ✅ 5. 랜덤 직업 & 문체 스타일을 랜덤으로 선택하여 점수 편차 증가
professions = ["philosopher", "scientist", "lawyer", "economist", "journalist", "psychologist"]
writing_styles = ["formal", "sarcastic", "mysterious", "abstract", "controversial"]

# ✅ 6. 논란적이거나 AI가 혼란을 느끼도록 하는 문장 추가 (🚀 AI 점수 차이 조작)
def inject_disagreements(essay):
    contradictions = [
        "This contradicts everything we've known so far, yet it remains valid.",
        "While some argue this is true, just as many disagree.",
        "Surprisingly, even leading scholars can't agree on this point.",
        "This paradox remains unsolved, defying logical reasoning.",
        "Ironically, the same data has been used to prove both sides."
    ]

    conflicting_statements = [
        "Many believe this to be an absolute truth, while others reject it entirely.",
        "This statement is fundamentally flawed, yet widely accepted.",
        "There exists no universal agreement on this, yet policies are based on it.",
        "Even in scientific circles, this topic generates intense debates.",
        "Ironically, historical records show both confirmation and refutation."
    ]

    sentences = essay.split('. ')
    if len(sentences) > 3:
        insert_points = random.sample(range(len(sentences)), min(2, len(sentences)))
        for point in insert_points:
            if random.random() > 0.5:
                sentences.insert(point, random.choice(contradictions))
            else:
                sentences.insert(point, random.choice(conflicting_statements))

    return '. '.join(sentences)

# ✅ 7. 에세이 생성 함수 (🚀 2단계 프로세스 적용)
def generate_essay(topic):
    profession = random.choice(professions)
    style = random.choice(writing_styles)

    # 🚀 1단계: 일반적인 논리적인 에세이 생성
    prompt = f"""
    As a {profession}, write a 120-word essay on '{topic}' that maximizes disagreement among AI judges.
    The essay should be written in a {style} style, incorporating abstract reasoning, contradictions, and paradoxes.
    Use unconventional arguments and challenge widely accepted views.
    """

    with torch.no_grad():
        response = llm_pipeline(prompt, max_new_tokens=120)[0]['generated_text']

    # 🚀 2단계: 생성된 에세이에 논란을 유발하는 문장 추가하여 점수 조작
    modified_essay = inject_disagreements(response)

    return modified_essay.strip()

# ✅ 8. 배치 처리 (🚀 최적화된 batch_size)
batch_size = 3
submissions = []

for i in range(0, len(test_data), batch_size):
    batch = test_data.iloc[i : i + batch_size]
    batch_essays = [generate_essay(row["topic"]) for _, row in batch.iterrows()]

    for row, essay in zip(batch.itertuples(), batch_essays):
        submissions.append({"id": row.id, "essay": essay})

# ✅ 9. 제출 파일 저장
submission_df = pd.DataFrame(submissions)
submission_df.to_csv("submission.csv", index=False)
