<a href="https://colab.research.google.com/github/jjbmsda/Kaggle/blob/main/llms_you_cant_please_them_all/llms_you_cant_please_them_all_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 필요한 라이브러리 로드
import pandas as pd
import torch
import random
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# 1️⃣ Kaggle에서 제공하는 test.csv 불러오기
test_data = pd.read_csv("/kaggle/input/llms-you-cant-please-them-all/test.csv")

# 2️⃣ 작은 모델을 사용하여 실행 시간 단축 (Phi-3.5-mini 사용)
MODEL_PATH = "/kaggle/input/phi-3/pytorch/phi-3.5-mini-instruct/2/"

# 3️⃣ 모델 로드 (FP16 또는 bfloat16 사용하여 속도 향상)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    torch_dtype=torch.bfloat16,  # 실행 속도 최적화
    device_map="auto",
    trust_remote_code=True
)

# 4️⃣ 텍스트 생성 파이프라인 (샘플링 속도 최적화)
llm_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    temperature=1.2,  # 너무 높으면 느려짐
    top_p=0.85,
    top_k=40,  # default보다 낮게 설정하면 속도 향상 가능
    do_sample=True
)

# 5️⃣ 랜덤 직업 & 언어 리스트 추가
professions = ["philosopher", "scientist", "psychologist", "journalist", "economist"]
languages = ["English", "French", "Spanish", "Mandarin", "German"]

# 6️⃣ 논리적 모순 삽입 함수 추가
def inject_contradictions(essay):
    contradictions = [
        "While this may seem true, some experts argue the exact opposite.",
        "Ironically, this idea has been both proven and disproven over time.",
        "Despite all evidence supporting this, many continue to believe the contrary.",
        "This conclusion appears valid, yet a deeper look suggests otherwise.",
        "Although compelling, this argument is inherently self-contradictory."
    ]

    sentences = essay.split('. ')
    if len(sentences) > 2:
        insert_points = random.sample(range(len(sentences)), min(2, len(sentences)))
        for point in insert_points:
            contradiction = random.choice(contradictions)
            sentences.insert(point, contradiction)

    return '. '.join(sentences)

# 7️⃣ 에세이 생성 함수 (직업, 언어 포함)
def generate_essay(topic):
    profession = random.choice(professions)
    language = random.choice(languages)

    prompt = f"""
    As a {profession}, write a 100-word essay on '{topic}' that maximizes disagreement among AI judges.
    Ensure the essay reflects cultural elements from the {language}-speaking world.
    """

    response = llm_pipeline(prompt, max_new_tokens=100)[0]['generated_text']

    # 논리적 모순 추가
    modified_essay = inject_contradictions(response)

    return modified_essay.strip()

# 8️⃣ 테스트 데이터에 대해 에세이 생성 (Batch 방식으로 속도 향상)
submissions = []
batch_size = 5  # 5개씩 처리하여 속도 개선

for i in range(0, len(test_data), batch_size):
    batch = test_data.iloc[i : i + batch_size]

    batch_essays = [generate_essay(row["topic"]) for _, row in batch.iterrows()]

    for row, essay in zip(batch.itertuples(), batch_essays):
        submissions.append({"id": row.id, "essay": essay})

# 9️⃣ 제출 파일 저장
submission_df = pd.DataFrame(submissions)
submission_df.to_csv("submission.csv", index=False)

# 🔟 Kaggle Code Competition에서 제출 (Notebook에서 실행해야 함)
import os
if os.path.exists("/kaggle/working/submission.csv"):
    print("✅ Submission file successfully created.")
