In [None]:
### EN -> KO 번역 ###

import csv
import requests

# DeepL API 키 설정
DEEPL_API_KEY = " "
DEEPL_API_URL = "https://api-free.deepl.com/v2/translate"

def translate_text_deepl(text, source_lang="EN", target_lang="KO"):
    """DeepL API를 사용하여 텍스트 번역."""
    try:
        params = {
            "auth_key": DEEPL_API_KEY,
            "text": text,
            "source_lang": source_lang,
            "target_lang": target_lang,
        }
        response = requests.post(DEEPL_API_URL, data=params)
        response.raise_for_status()
        data = response.json()
        return data["translations"][0]["text"]
    except Exception as e:
        print(f"번역 실패: {e}")
        return text  # 번역 실패 시 원본 반환

def process_csv(input_file, output_file):
    """CSV 파일 읽고 paragraph, question, answer 번역 후 저장."""
    with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', encoding='utf-8', newline='') as outfile:
        reader = csv.DictReader(infile)
        fieldnames = reader.fieldnames
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()
        
        for row in reader:
            # Paragraph 번역
            if 'paragraph' in row and row['paragraph']:
                row['paragraph'] = translate_text_deepl(row['paragraph'])

            if 'question' in row and row['question']:
                row['question'] = translate_text_deepl(row['question'])
            if 'choice1' in row and row['choice1']:
                row['choice1'] = translate_text_deepl(row['choice1'])
            if 'choice2' in row and row['choice2']:
                row['choice2'] = translate_text_deepl(row['choice2'])
            if 'choice3' in row and row['choice3']:
                row['choice3'] = translate_text_deepl(row['choice3'])
            if 'choice4' in row and row['choice4']:
                row['choice4'] = translate_text_deepl(row['choice4'])
            if 'choice5' in row and row['choice5']:
                row['choice5'] = translate_text_deepl(row['choice5'])

#            if 'question_plus' in row and row['question_plus']:
#                row['question_plus'] = translate_text_deepl(row['question_plus'])

            # 변환된 row를 새 파일에 작성
            writer.writerow(row)

# 입력 CSV와 출력 CSV 경로 지정
input_csv = 'khan_raw.csv'
output_csv = 'khan_trans.csv'

# 변환 실행
process_csv(input_csv, output_csv)


In [None]:
### 형식 전환 코드 ###

import pandas as pd

# 데이터 로드
data = pd.read_csv("khan_trans.csv")

# DataFrame으로 변환
df = pd.DataFrame(data)

# 변환 데이터를 담을 리스트
transformed_data = []

for idx, row in df.iterrows():

    # 선택지 리스트와 정답 매핑
    choices = [row["choice1"], row["choice2"], row["choice3"], row["choice4"], row["choice5"]]
    
    # 문제 데이터 구성
    problem = {
        "question": row["question"],
        "choices": choices,
        "answer": row["answer"]
    }
    
    # 최종 데이터 구성
    transformed_data.append({
        "id": row["id"],
        "paragraph": row["paragraph"],
        "problems": problem,
        "question_plus": ""
    })

# 변환된 데이터프레임 생성
transformed_df = pd.DataFrame(transformed_data)

output_file = "khan_final.csv"
transformed_df.to_csv(output_file, index=False)
