# Prompt만 조정한 baseline

실행환경: Colab

## Parameters

In [None]:
BASE_DIR = "/content/drive/MyDrive/강화학습"
INPUT_DATA = "test.csv"
MODEL_DIR = "llama3"
TEMPERATURE = 0.1
MAX_NEW_TOKENS = 16
LAST_CHECK_POINT = 0 # 이전에 저장한 체크포인트
CHECK_POINT_STEP = 500 # 몇 턴마다 체크포인트를 저장할지

## Import

In [None]:
!pip install -q \
  accelerate bitsandbytes \
  transformers huggingface_hub
!pip install -U bitsandbytes

In [None]:
import os
import ast
import time
import pandas as pd

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from google.colab import drive

drive.mount("/content/drive", force_remount=False)


def join_path(*args):
    return os.path.join(BASE_DIR, *args)

In [None]:
# Model, Tokenizer 준비
model_path = join_path(MODEL_DIR)
tokenizer = AutoTokenizer.from_pretrained(model_path)

quat_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map={"": 0},
    quantization_config=quat_config,
    torch_dtype=torch.float16,
)

## Inference

In [None]:
def generate_prompt(
    context: str, question: str, answer_list: list[str]
) -> tuple[list, str]:
    """프롬프트 생성"""
    options_with_index = "\n".join(
        [f"- {i}. {option}" for i, option in enumerate(answer_list, 1)]
    )
    sys_prompt = (
        "제공된 정보를 기반으로 중립적이고 정확하게 판단하세요.\n"
        "다음 보기 중 오직 하나만 선택해야 합니다:\n"
        f"{options_with_index}\n"
        "답으로 숫자만 출력하세요."
    )
    user_prompt = f"정보: {context.strip()}\n" f"질문: {question.strip()}\n" "답:"
    prompt = [
        {"role": "system", "content": sys_prompt},
        {"role": "user", "content": user_prompt},
    ]
    promt_str = "\n".join([sys_prompt, user_prompt])
    return prompt, promt_str


def get_llama_result(
    context: str, question: str, answer_list: list[str], idx: int
) -> dict:
    """Llama3 답변 생성"""
    # 프롬프트 준비
    prompt, promt_str = generate_prompt(context, question, answer_list)
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id
    input_data = tokenizer.apply_chat_template(
        prompt, return_tensors="pt", tokenize=True
    )
    input_data = input_data.to("cuda")
    attention_mask = input_data.ne(tokenizer.pad_token_id)

    # 모델 답변 생성
    with torch.no_grad():
        output = model.generate(
            input_ids=input_data,
            attention_mask=attention_mask,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=True,
            temperature=TEMPERATURE,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )

    # 최종 답변 추출
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    assistant_answer = decoded.split("assistant")[-1].strip()
    assistant_choice = assistant_answer.split("\n")[0].lstrip()[
        0
    ]  # 한 자리 수라고 가정

    if assistant_choice.isdigit():
        # 출력한 숫자가 범위 내에 있는지 검사
        assistant_choice = int(assistant_choice)
        if 1 <= assistant_choice <= len(answer_list):
            # 선택지에서 답변 추출
            assistant_choice = answer_list[assistant_choice - 1]

    if assistant_choice not in answer_list:
        # 선택지가 아닌 답변을 출력했을 때
        print(f"⚠️[{idx}] Options: {answer_list} -> {assistant_choice}")
    return {
        "raw_input": promt_str,
        "raw_output": assistant_answer,
        "answer": assistant_choice,
    }

In [None]:
# 질문 데이터 준비
origianl_data = pd.read_csv(join_path(INPUT_DATA), encoding="utf-8-sig")

# Check point 확인
check_point_data = join_path(
    "checkpoint", f"submission_checkpoint_{LAST_CHECK_POINT}.csv"
)
data_start_index = LAST_CHECK_POINT

if os.path.exists(check_point_data):
    check_point_data = pd.read_csv(check_point_data)
else:
    # Check point가 없을 때 초기화
    check_point_data = origianl_data
    data_start_index = 0
    for col in ["raw_input", "raw_output", "answer"]:
        if col not in check_point_data.columns:
            check_point_data[col] = ""
        check_point_data[col] = check_point_data[col].astype("string")

In [None]:
os.makedirs(join_path("checkpoint"), exist_ok=True)

start_time = time.time()
for idx in range(data_start_index, len(origianl_data)):
    row = origianl_data.loc[idx]
    # Llama3 답변 생성
    llm_result = get_llama_result(
        row["context"], row["question"], ast.literal_eval(row["choices"]), idx
    )

    # 답변 임시 저장
    check_point_data.at[idx, "raw_input"] = llm_result["raw_input"]
    check_point_data.at[idx, "raw_output"] = llm_result["raw_output"]
    check_point_data.at[idx, "answer"] = llm_result["answer"]

    if idx % CHECK_POINT_STEP == 0:
        # Check point에서 파일로 저장
        end_time = time.time()
        check_point_data[["ID", "raw_input", "raw_output", "answer"]].to_csv(
            join_path("checkpoint", f"submission_checkpoint_{str(idx)}.csv"),
            index=False,
            encoding="utf-8-sig",
        )
        print(
            f"✅{idx}/{len(origianl_data)} 저장. ({(end_time - start_time) / 60:.1f}분)"
        )
        start_time = time.time()

In [None]:
# 최종 파일 저장
submission = check_point_data[["ID", "raw_input", "raw_output", "answer"]]
submission.to_csv(join_path("submission.csv"), index=False, encoding="utf-8-sig")