<a href="https://colab.research.google.com/github/jjbmsda/Kaggle/blob/main/llms_you_cant_please_them_all/llms_you_cant_please_them_all_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import torch
import random
import gc
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Kaggle에서 제공하는 test.csv 불러오기
test_data = pd.read_csv("/kaggle/input/llms-you-cant-please-them-all/test.csv")

# 불필요한 변수 정리 및 GPU 캐시 비우기
gc.collect()
torch.cuda.empty_cache()

# 1. GPU 사용 가능 여부 확인
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

if device == "cuda":
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
else:
    print("⚠ GPU is NOT available. Check Kaggle settings.")

# `cuda:0`에서만 실행되도록 강제 설정
torch.cuda.set_device(0)

# 2. 모델 로드 (`cuda:0`에서 실행 강제)
MODEL_PATH = "/kaggle/input/phi-3/pytorch/phi-3.5-mini-instruct/2/"
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    torch_dtype=torch.bfloat16,  # `bfloat16` 사용하여 메모리 절약
    device_map={"": 0},  # `cuda:0`에서만 실행되도록 강제 설정
    trust_remote_code=True
)

# 모델이 GPU에 있는지 확인
print(f"Model is on: {next(model.parameters()).device}")

# 3. 텍스트 생성 파이프라인 (`device` 제거)
llm_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    temperature=1.8,  # 창의성을 증가하여 다양한 문장 생성
    top_p=0.7,  # 반복적인 표현 줄이기
    top_k=50,
    do_sample=True
)

# 4. 랜덤 직업 & 언어 리스트
professions = ["philosopher", "scientist", "psychologist", "journalist", "economist"]
languages = ["English", "French", "Spanish", "Mandarin", "German"]

# 5. 논리적 모순 & 논란적인 요소 추가 함수
def inject_disagreements(essay):
    contradictions = [
        "This may seem obvious, but a closer look suggests otherwise.",
        "While some believe this is true, many experts strongly disagree.",
        "Ironically, history has shown both sides of this argument.",
        "This is a paradox that continues to challenge scholars.",
        "It appears logical, yet countless counterexamples exist."
    ]

    opposing_statements = [
        "For centuries, scholars have debated this issue with no clear consensus.",
        "This topic has been the center of controversy among professionals.",
        "Surprisingly, there is little agreement on this seemingly simple idea.",
        "Despite all evidence, the debate remains unresolved.",
        "Philosophers and scientists take opposing stances on this question."
    ]

    sentences = essay.split('. ')
    if len(sentences) > 3:
        insert_points = random.sample(range(len(sentences)), min(2, len(sentences)))
        for point in insert_points:
            if random.random() > 0.5:
                sentences.insert(point, random.choice(contradictions))
            else:
                sentences.insert(point, random.choice(opposing_statements))

    return '. '.join(sentences)

# 6. 에세이 생성 함수 (`max_new_tokens` 줄여서 메모리 절약)
def generate_essay(topic):
    profession = random.choice(professions)

    # 논란의 여지가 있는 어조 사용
    prompt = f"""
    As a {profession}, write a 90-word essay on '{topic}' that maximizes disagreement among AI judges.
    Your essay should present multiple perspectives, create contradictions, and challenge existing ideas.
    """

    with torch.no_grad():  # 그래디언트 계산 비활성화하여 속도 증가
        response = llm_pipeline(prompt, max_new_tokens=90)[0]['generated_text']  # 75 → 90로 변경

    # 논리적 모순 & 논란적인 요소 추가
    modified_essay = inject_disagreements(response)

    return modified_essay.strip()

# 7. 배치 처리 (`batch_size` 줄여서 메모리 절약)
batch_size = 2  # 기존 5 → 2로 줄이기
submissions = []

for i in range(0, len(test_data), batch_size):
    batch = test_data.iloc[i : i + batch_size]
    batch_essays = [generate_essay(row["topic"]) for _, row in batch.iterrows()]

    for row, essay in zip(batch.itertuples(), batch_essays):
        submissions.append({"id": row.id, "essay": essay})

# 8. 제출 파일 저장
submission_df = pd.DataFrame(submissions)
submission_df.to_csv("submission.csv", index=False)
