In [None]:
import os
import json
from openai import OpenAI
import re

# OpenAI API 키 설정
client = OpenAI(api_key=OPENAI_API_KEY)

def chunk_text(text, chunk_size=2000):
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

def create_qa_pairs(input_folder, output_file, game_name="메이플스토리"):
    qa_pairs = []

    for filename in os.listdir(input_folder):
        if filename.endswith('.json'):
            file_path = os.path.join(input_folder, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                
                if isinstance(data, list):
                    for entry in data:
                        full_text = f"제목: {entry['title']}\n날짜: {entry['date']}\n내용: {entry['content']}"
                        process_text(full_text, qa_pairs, game_name)
                else:
                    full_text = f"제목: {data['title']}\n날짜: {data['date']}\n내용: {data['content']}"
                    process_text(full_text, qa_pairs, game_name)

    with open(output_file, 'w', encoding='utf-8') as outfile:
        for item in qa_pairs:
            json.dump(item, outfile, ensure_ascii=False)
            outfile.write('\n')

    print(f"Question-answer pairs have been saved to {output_file}")

def process_text(full_text, qa_pairs, game_name):
    chunks = chunk_text(full_text)
    
    for i, chunk in enumerate(chunks):
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": f"You are an assistant that generates question-answer pairs about the game {game_name} based on the given text. Generate 2 relevant question-answer pairs. Each question should start with 'Q: ' and each answer should start with 'A: '. Make sure all content is in Korean and directly related to {game_name}. This is part {i+1} of the full text."},
                {"role": "user", "content": f"Generate question-answer pairs based on this text: {chunk}"}
            ]
        )
        
        qa_content = response.choices[0].message.content
        
        qa_pairs_found = re.findall(r'Q: (.*?)\nA: (.*?)(?=\nQ:|$)', qa_content, re.DOTALL | re.MULTILINE)
        
        for question, answer in qa_pairs_found:
            qa_pairs.append({
                "prompt": f"Q: {question.strip()}",
                "completion": f"A: {answer.strip()}"
            })

# 실행
input_folder = 'script'
output_file = 'game_qa_data.jsonl'
create_qa_pairs(input_folder, output_file, game_name="메이플스토리")

In [None]:
import json
import re

def convert_to_child_speech(text):
    # '다'로 끝나는 문장을 '담'으로 변경

    # 정중한 표현을 반말로 변환 (예시)
    text = re.sub(r'합니다', '해야 한담', text)
    text = re.sub(r'있습니다', '있담', text)
    text = re.sub(r'없습니다', '없담', text)
    text = re.sub(r'없다', '없담', text)
    text = re.sub(r'입니다', '이담', text)
    text = re.sub(r'없습니다', '없담', text)
    text = re.sub(r'됩니다', '된담', text)
    text = re.sub(r'있어요', '있담', text)
    text = re.sub(r'하다', '한담', text)
    text = re.sub(r'합니다', '한담', text)
    text = re.sub(r'했습니다', '했담', text)
    text = re.sub(r'많다', '많담', text)
    text = re.sub(r'많아요', '많담', text)
    text = re.sub(r'주라','주람', text)
    text = re.sub(r'해라','해람', text)
    text = re.sub(r'됐다','됐담', text)
    text = re.sub(r'좋습니다','좋담', text)



    return text

def process_jsonl_file(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', encoding='utf-8') as outfile:
        for line in infile:
            data = json.loads(line)
            
            # 'completion' 필드의 내용을 캐릭터 말투로 변환
            if 'completion' in data:
                data['completion'] = convert_to_child_speech(data['completion'])
            
            # 변환된 데이터를 새 파일에 쓰기
            json.dump(data, outfile, ensure_ascii=False)
            outfile.write('\n')

    print(f"처리가 완료되었습니다. 결과가 {output_file}에 저장되었습니다.")

# 사용 예
input_file = 'game_qa_data.jsonl'
output_file = 'game_qa_data_child_speech.jsonl'
process_jsonl_file(input_file, output_file)

In [None]:
import json

def convert_to_chat_format(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', encoding='utf-8') as outfile:
        for line in infile:
            data = json.loads(line)
            chat_format = {
                "messages": [
                    {"role": "user", "content": data['prompt']},
                    {"role": "assistant", "content": data['completion']}
                ]
            }
            json.dump(chat_format, outfile, ensure_ascii=False)
            outfile.write('\n')

# 사용 예
convert_to_chat_format('cleaned_game_qa_data_final_1.jsonl', 'chat_formatted_game_qa_data.jsonl')

In [None]:
import json
import tkinter as tk
from tkinter import scrolledtext, messagebox, simpledialog
import os

class JSONLEditor:
    def __init__(self, master):
        self.master = master
        self.master.title("JSONL 파일 편집기")
        self.master.geometry("1200x900")

        self.data = []
        self.current_index = -1
        self.last_index_file = 'last_index.txt'

        self.load_data()
        self.load_last_index()

        self.create_widgets()
        self.show_current_item()

        # 창 닫힐 때 자동 저장 설정
        self.master.protocol("WM_DELETE_WINDOW", self.on_closing)

    def load_data(self):
        with open('cleaned_game_qa_data_final_1.jsonl', 'r', encoding='utf-8') as file:
            self.data = [json.loads(line) for line in file]

    def load_last_index(self):
        if os.path.exists(self.last_index_file):
            with open(self.last_index_file, 'r') as file:
                self.current_index = int(file.read().strip())
        else:
            self.current_index = 0

    def save_last_index(self):
        with open(self.last_index_file, 'w') as file:
            file.write(str(self.current_index))

    def create_widgets(self):
        self.line_number_label = tk.Label(self.master, text="줄 번호: 0/0", font=("Arial", 20))
        self.line_number_label.pack(pady=5)

        tk.Label(self.master, text="질문:", font=("Arial", 20)).pack(pady=5)
        self.prompt_text = scrolledtext.ScrolledText(self.master, height=6, width=100, font=("Arial", 20))
        self.prompt_text.pack(pady=10)

        tk.Label(self.master, text="답변:", font=("Arial", 20)).pack(pady=5)
        self.completion_text = scrolledtext.ScrolledText(self.master, height=10, width=100, font=("Arial", 20))
        self.completion_text.pack(pady=10)

        button_frame = tk.Frame(self.master)
        button_frame.pack(pady=10)

        tk.Button(button_frame, text="이전", command=self.previous_item, width=10).pack(side=tk.LEFT, padx=5)
        tk.Button(button_frame, text="다음", command=self.next_item, width=10).pack(side=tk.LEFT, padx=5)
        tk.Button(button_frame, text="수정", command=self.edit_item, width=10).pack(side=tk.LEFT, padx=5)
        tk.Button(button_frame, text="삭제", command=self.delete_item, width=10).pack(side=tk.LEFT, padx=5)
        tk.Button(button_frame, text="저장", command=self.save_data, width=10).pack(side=tk.LEFT, padx=5)
        tk.Button(button_frame, text="줄 이동", command=self.goto_line, width=10).pack(side=tk.LEFT, padx=5)
        tk.Button(button_frame, text="새 항목 추가", command=self.add_new_item, width=10).pack(side=tk.LEFT, padx=5)

    def show_current_item(self):
        if 0 <= self.current_index < len(self.data):
            item = self.data[self.current_index]
            self.prompt_text.delete('1.0', tk.END)
            self.prompt_text.insert(tk.END, item['prompt'])
            self.completion_text.delete('1.0', tk.END)
            self.completion_text.insert(tk.END, item['completion'])
            self.line_number_label.config(text=f"줄 번호: {self.current_index + 1}/{len(self.data)}")
        else:
            self.prompt_text.delete('1.0', tk.END)
            self.completion_text.delete('1.0', tk.END)
            self.line_number_label.config(text=f"줄 번호: 0/{len(self.data)}")
        self.save_last_index()

    def next_item(self):
        if self.current_index < len(self.data) - 1:
            self.current_index += 1
            self.show_current_item()

    def previous_item(self):
        if self.current_index > 0:
            self.current_index -= 1
            self.show_current_item()

    def edit_item(self):
        if 0 <= self.current_index < len(self.data):
            self.data[self.current_index]['prompt'] = self.prompt_text.get('1.0', tk.END).strip()
            self.data[self.current_index]['completion'] = self.completion_text.get('1.0', tk.END).strip()
            messagebox.showinfo("알림", "수정되었습니다.")

    def delete_item(self):
        if 0 <= self.current_index < len(self.data):
            del self.data[self.current_index]
            messagebox.showinfo("알림", "삭제되었습니다.")
            if self.current_index >= len(self.data):
                self.current_index = len(self.data) - 1
            self.show_current_item()

    def save_data(self):
        with open('cleaned_game_qa_data_final_1.jsonl', 'w', encoding='utf-8') as file:
            for item in self.data:
                json.dump(item, file, ensure_ascii=False)
                file.write('\n')
        self.save_last_index()
        messagebox.showinfo("알림", "파일이 저장되었습니다.")

    def goto_line(self):
        line_number = simpledialog.askinteger("줄 이동", "이동할 줄 번호를 입력하세요:", minvalue=1, maxvalue=len(self.data))
        if line_number:
            self.current_index = line_number - 1
            self.show_current_item()

    def add_new_item(self):
        new_prompt = simpledialog.askstring("새 항목 추가", "새로운 질문을 입력하세요:")
        if new_prompt:
            new_completion = simpledialog.askstring("새 항목 추가", "새로운 답변을 입력하세요:")
            if new_completion:
                new_item = {"prompt": new_prompt, "completion": new_completion}
                self.data.append(new_item)
                self.current_index = len(self.data) - 1
                self.show_current_item()
                messagebox.showinfo("알림", "새 항목이 추가되었습니다.")

    def on_closing(self):
        """ 창 닫힘 이벤트 처리 - 데이터 저장 후 창 닫기 """
        self.save_data()  # 창이 닫힐 때 자동 저장
        self.master.destroy()

if __name__ == "__main__":
    root = tk.Tk()
    editor = JSONLEditor(root)
    root.mainloop()
