<a href="https://colab.research.google.com/github/hail-members/llm-based-services/blob/main/Chapter_5_%EC%8B%A4%EC%8A%B5%EC%BD%94%EB%93%9C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Train 데이터 다운로드
!wget https://korquad.github.io/dataset/KorQuAD_v1.0_train.json -O KorQuAD_v1.0_train.json

# Dev 데이터 다운로드
!wget https://korquad.github.io/dataset/KorQuAD_v1.0_dev.json -O KorQuAD_v1.0_dev.json

import json

# Train 데이터 로드
with open("KorQuAD_v1.0_train.json", "r", encoding="utf-8") as f:
    train_data = json.load(f)

# Dev 데이터 로드
with open("KorQuAD_v1.0_dev.json", "r", encoding="utf-8") as f:
    dev_data = json.load(f)

# 데이터 구조 확인
print("Train Data Keys:", train_data.keys())
print("Example Data:", train_data["data"][0])  # 첫 번째 문단 출력

--2025-04-04 10:52:23--  https://korquad.github.io/dataset/KorQuAD_v1.0_train.json
Resolving korquad.github.io (korquad.github.io)... 185.199.108.153, 185.199.111.153, 185.199.110.153, ...
Connecting to korquad.github.io (korquad.github.io)|185.199.108.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 38527475 (37M) [application/json]
Saving to: ‘KorQuAD_v1.0_train.json’


2025-04-04 10:52:28 (11.1 MB/s) - ‘KorQuAD_v1.0_train.json’ saved [38527475/38527475]

--2025-04-04 10:52:29--  https://korquad.github.io/dataset/KorQuAD_v1.0_dev.json
Resolving korquad.github.io (korquad.github.io)... 185.199.110.153, 185.199.111.153, 185.199.108.153, ...
Connecting to korquad.github.io (korquad.github.io)|185.199.110.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3881058 (3.7M) [application/json]
Saving to: ‘KorQuAD_v1.0_dev.json’


2025-04-04 10:52:29 (11.2 MB/s) - ‘KorQuAD_v1.0_dev.json’ saved [3881058/3881058]

Train Data Keys: dict_keys

In [2]:
# 라이브러리 임포트
import torch
from transformers import GPT2LMHeadModel, AutoTokenizer
from torch.utils.data import Dataset, DataLoader


# KoGPT2 모델과 토크나이저 로드
model_name = "skt/kogpt2-base-v2"
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    bos_token='</s>',
    eos_token='</s>',
    unk_token='<unk>',
    pad_token='<pad>',
    mask_token='<mask>'
)
tokenizer.add_special_tokens({'additional_special_tokens': ['<usr>', '<sys>']})
model = GPT2LMHeadModel.from_pretrained(model_name)

In [3]:
# 데이터셋 정의 (질문과 답변을 GPT2 입력 형식으로 변환)
class KorQuADDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=50):
        self.data = data["data"]
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.total_qas = []  # 모든 질문-답변 쌍을 저장

        # 전체 질문-답변 쌍을 리스트로 저장
        for article in self.data:
            for paragraph in article["paragraphs"]:
                for qa in paragraph["qas"]:
                    self.total_qas.append((paragraph["context"], qa))

    def __len__(self):
        return len(self.total_qas)

    # def __getitem__(self, idx):
    #     # 질문과 답변 추출
    #     context, qa = self.total_qas[idx]
    #     question = qa["question"]
    #     answer_text = qa["answers"][0]["text"]

    #     # GPT2 입력 텍스트 생성 (질문 + 컨텍스트 + 답변)
    #     # input_text = f"<usr> {question} <sys> {context} </s>"
    #     # GPT2 입력 텍스트 생성 (질문+ 답변)
    #     input_text = f"<usr> {question} </s>"
    #     target_text = f"<sys> {answer_text} </s>"

    #     # 토큰화 및 인코딩
    #     input_ids = self.tokenizer.encode(input_text, max_length=self.max_length, truncation=True, padding="max_length")
    #     target_ids = self.tokenizer.encode(target_text, max_length=self.max_length, truncation=True, padding="max_length")

    #     return {
    #         "input_ids": torch.tensor(input_ids),
    #         "labels": torch.tensor(target_ids)
    #     }
    def __getitem__(self, idx):
        context, qa = self.total_qas[idx]
        question = qa["question"]
        answer = qa["answers"][0]["text"]

        # 전체 시퀀스 생성 (질문+답변)
        full_text = f"<usr> {question} </s> <sys> {answer} </s>"
        
        # 통합 토큰화
        encoding = self.tokenizer(
            full_text,
            max_length=self.max_length,
            truncation=True,
            padding="max_length",
            return_tensors="pt"
        )

        # 레이블 마스킹 처리
        input_ids = encoding.input_ids.squeeze()
        labels = input_ids.clone()

        # 질문 부분 마스킹 (</s> 토큰 직전까지)
        sep_token = self.tokenizer.convert_tokens_to_ids("</s>")
        sep_positions = (input_ids == sep_token).nonzero(as_tuple=True)[0]
        if len(sep_positions) >= 1:
            labels[:sep_positions[0]+1] = -100  # 질문 부분 무시

        return {
            "input_ids": input_ids,
            "labels": labels
        }


train_dataset = KorQuADDataset(train_data, tokenizer)
dev_dataset = KorQuADDataset(dev_data, tokenizer)

train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
dev_dataloader = DataLoader(dev_dataset, batch_size=4)


In [4]:
# 저장된 데이터 보기
for batch in train_dataloader:
    print(batch)
    print(tokenizer.decode(batch["input_ids"][0], skip_special_tokens=False))
    break

{'input_ids': tensor([[    2, 32341,  7426,  9489,  9060, 17823, 19344, 24204,  9717, 22375,
         12619,  9057,  7055,   406,   739,     1,   739,     4,  9097, 12287,
         15215,   739,     1,     3,     3,     3,     3,     3,     3,     3,
             3,     3,     3,     3,     3,     3,     3,     3,     3,     3,
             3,     3,     3,     3,     3,     3,     3,     3,     3,     3],
        [    2, 40391,  8042,  9123, 10583, 13400,  9023,  9981,  7689,  8470,
          8137, 10648, 17161,   406,   739,     1,   739,     4, 10592,   409,
          9563,   739,     1,     3,     3,     3,     3,     3,     3,     3,
             3,     3,     3,     3,     3,     3,     3,     3,     3,     3,
             3,     3,     3,     3,     3,     3,     3,     3,     3,     3],
        [    2, 10364,  9382,  9439, 23113, 39560,   406,   739,     1,   739,
             4, 10364, 11696, 10352,   739,     1,     3,     3,     3,     3,
             3,     3,     3,     3,

In [5]:
# GPU 사용 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 문장 생성 함수 정의
def generate_sentence(model, seed_text, max_length=50):
    input_ids = tokenizer.encode(seed_text, return_tensors="pt").to(device)
    gen_ids = model.generate(
        input_ids,
        max_length=max_length,
        repetition_penalty=2.0,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
        bos_token_id=tokenizer.bos_token_id,
        use_cache=True,
    )
    generated_text = tokenizer.decode(gen_ids[0], skip_special_tokens=True)
    return generated_text

question = "인공지능이란?"
context = ""

# 입력 텍스트 생성
input_text = f"<usr> {question} <sys> {context} </s>"
input_ids = tokenizer.encode(
    input_text,
    max_length=100,
    truncation=True,
    padding="max_length",
    return_tensors="pt"
).to(model.device)

# KoGPT2로 문장 생성
model.eval()
with torch.no_grad():
    output_ids = model.generate(
        input_ids=input_ids,
        max_length=150,
        repetition_penalty=2.0,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
        bos_token_id=tokenizer.bos_token_id,
    )

generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

# 결과 출력
print(f"질문: {question}")
print(f"컨텍스트: {context}")
print(f"생성된 답변: {generated_text}")


질문: 인공지능이란?
컨텍스트: 
생성된 답변: 인공지능이란?   
, , . (중략) 이번에 출시된 신제품은 '스마트폰용 스마트패드'다.
이 제품은 스마트폰을 통해 다양한 콘텐츠를 즐길 수 있는 것이 특징이다.
특히 기존 제품보다 최대 2배 이상 빠른 속도로 데이터를 전송


In [None]:

from torch.optim import AdamW
from tqdm import tqdm

# 옵티마이저 설정
optimizer = AdamW(model.parameters(), lr=5e-3)

# 학습 루프 정의 (tqdm으로 Progress Bar 추가)
epochs = 10
model.train()

# 조기 종료 조건 설정
# max_batches_per_epoch = 1000  # 한 에포크에서 최대 실행할 배치 수

for epoch in range(epochs):
    epoch_loss = 0
    progress_bar = tqdm(enumerate(train_dataloader), desc=f"Epoch {epoch + 1}", total=len(train_dataloader))

    for batch_idx, batch in progress_bar:
        # if batch_idx >= max_batches_per_epoch:  # 조기 종료 조건
        #     print(f"Stopping early at batch {batch_idx} in epoch {epoch + 1}")
        #     break

        input_ids = batch["input_ids"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss
        print(f"질문: {tokenizer.decode(input_ids[0], skip_special_tokens=False)}")

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        progress_bar.set_postfix({"Batch Loss": loss.item()})

    print(f"Epoch {epoch + 1} completed. Average Loss: {epoch_loss / (batch_idx + 1)}")

# 모델 저장
model.save_pretrained("./kogpt2-korquad-finetuned")
tokenizer.save_pretrained("./kogpt2-korquad-finetuned")

Epoch 1:   0%|          | 0/15102 [00:00<?, ?it/s]

질문: <usr> 독립선언 이전의 영국 식민지들은 영국의 왕권에 대하여 어떤 의무를 실행해야 했는가? </s> <sys> 충성의 의무 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   0%|          | 3/15102 [00:00<33:43,  7.46it/s, Batch Loss=1.28]   

질문: <usr> 김정은의 성형수술설의 보도경위와 근거를 추적한 매체는? </s> <sys> 신화통신 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 멩겔레가 유태인의 인종적 차이점에 대한 논문을 써서 인류학 박사 학위를 받은 년도는? </s> <sys> 1935 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 러스티의 논문은 어떤 사람의 이론의 기반이 되었나? </s> <sys> 파스퇴르 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 기원전 1세기 중반에 율리우스 카이사르와 폼페이우스 마그누스, 크라수스 세 사람이 공화국을 농단했던 정치는? </s> <sys> 삼두정치 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> Ac와 섞여서 뼈에 축적되는 악티늄을 줄이는데 효과가 있는 물질은? </s> <sys> 시트르산염, 에틸렌다이아민테트라아세트산(EDTA), 다이에틸렌트라이아민펜타아


Epoch 1:   0%|          | 9/15102 [00:00<15:50, 15.87it/s, Batch Loss=1.18] 

질문: <usr> 오스트레일리아 북부에서 오스트레일리아까치의 번식기는 언제인가? </s> <sys> 6월 ~ 9월 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 프로파일러들은 피해망상 조현병에 의한 무슨 범죄라고 결론을 내렸나? </s> <sys> 묻지 마 범죄 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 시대 변혁의 추진 역할을 한 무사들의 이야기를 쓴 책의 이름은? </s> <sys> 군기모노가타리 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 하우스 4피트 존을 막아서려면 센터 라인 가드가 어느 쪽 센터 라인에 걸쳐지도록 투구해야 합니까? </s> <sys> 하우스 앞 쪽 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 15U시즌에서는 트루퍼를 잡으면 아군 5명에게 무엇을 지급하는가? </s> <sys> 글로벌 코인 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   0%|          | 15/15102 [00:01<13:18, 18.90it/s, Batch Loss=0.787]

질문: <usr> 1967년 아이스하키 구단은 총 몇개였는가? </s> <sys> 12개 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 한국의 정부부처는 총 몇개로 이루어져 있는가? </s> <sys> 44개 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2012년 당시 새누리당 대변인 이름은 무엇인가? </s> <sys> 박선규 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 대한민국 임시의정원이 설립된 것은 몇 년의 일인가? </s> <sys> 1919년 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 국공합작이 결렬된 후에 공산당원들이 국민당을 피해 몰려든 지역은? </s> <sys> 장시 소비에트 지역 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><p

Epoch 1:   0%|          | 18/15102 [00:01<12:38, 19.88it/s, Batch Loss=0.932]

질문: <usr> 새민중정당과 민중연합당이 합당하여 어떠한 당을 새롭게 만들었나? </s> <sys> 민중당 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 소련에서 스탈린 격하 운동이 일어난건 언제인가? </s> <sys> 1956년 3월 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 조선민주주주의인민공화국의 주축은? </s> <sys> 김일성 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> BPD처럼 특정한 정신과적 질환에서, 성별의 차이는 몇 가지 요인 사이의 상호작용으로부터 기원한다고 할 수 있는가? </s> <sys> 세 요인 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 첫 에피소드에 대해 명작이라는 호평을 한 IGN의 작가는 누구인가? </s> <sys> 매트 파울러 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   0%|          | 24/15102 [00:01<12:20, 20.36it/s, Batch Loss=0.903]

질문: <usr> 판문점 도끼 살인 사건이 벌어지자  워싱턴에서는 공동성명을 당일에 발표하였는데 이때 미국 대통령의 이름은? </s> <sys> 제럴드 포드 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 이전에 문학에서 유일하게 연구대상으로 삼은 것은? </s> <sys> 순수문학 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 말러의 친구이자 동료로 최종판 악보의 포켓 스코어를 인쇄해야 한다고 힌리센을 들볶은 사람은 누구입니까? </s> <sys> 멩겔베르크 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2004년 더 매트릭스와 녹음해뒀던 데뷔 앨범이 아이튠스를 통해 발매된 날은? </s> <sys> 2009년 1월 27일 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 1894년 유진 데브스가 참여한 파업은? </s> <sys> 풀먼사 파업 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   0%|          | 30/15102 [00:01<11:46, 21.34it/s, Batch Loss=0.705]

질문: <usr>  스위프트가 처음 방문한 아시아 국가는 어디인가? </s> <sys> 싱가포르 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 더불어민주당원 댓글 조작 사건으로 드루킹 일당의 연간 운영비는? </s> <sys> 11억원 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 판문점 도끼 살인 사건이 일어난 후 스틸웰 주한미군 사령관에 따라 인민군이 설치한 불법 방벽을 제거하는 작전이 무엇인가? </s> <sys> 폴 버니언 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 곡 Better가 들어가있는 부활의 전 보컬이었던 정단의 앨범의 제목은 무엇인가? </s> <sys> 내 마음이 그래 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 김정은은 어느 매체를 통해 평창올림픽의 성공적 개최기원 입장을 보였는가? </s> <sys> 조선중앙텔레비죤 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   0%|          | 36/15102 [00:01<11:08, 22.53it/s, Batch Loss=0.903]

질문: <usr> 라그나로트의 과정에 대한 상세한 설명이 위해 인용된 것은 무엇인가? </s> <sys> 무녀의 예언 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 쿰스가 1902년 입학한 학교는? </s> <sys> 콜빈 컬리지 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 장면은 누구인가? </s> <sys> 국무총리 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 무한도전 3기는 언제 봄개편을 했나? </s> <sys> 2006년 5월 6일 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> Telephone에 피처링으로 참여한 미국의 알앤비 가수는 누구인가? </s> <sys> 비욘세 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad

Epoch 1:   0%|          | 39/15102 [00:02<11:24, 21.99it/s, Batch Loss=1.11] 

질문: <usr> 2001년 4월 18일 이명박이 김경준과 결별하고 사임했다고 주장한 자리는?  </s> <sys> LKe뱅크 대표이사직 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 1645년 한인들이 변발을 극렬히 반대하며 든 효경의 구절을 들어 극렬히 반대한 것은 무엇인가? </s> <sys> 변발 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 소수 남은 시위대들도 자진 해산하며 모든 집회가 마무리 되었던 시간은 대략 몇시인가? </s> <sys> 오전 7시 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 천도 책임자로 오토모 씨에게 암살당한 인물은? </s> <sys> 후지와라노 다네쓰구 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 아사미 유마가 응모한 오디션은? </s> <sys> 모닝구 무스메 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   0%|          | 45/15102 [00:02<11:41, 21.46it/s, Batch Loss=0.753]

질문: <usr> 대통령 선거에서 전두환이 2위와 몇 표 차이가 났는가? </s> <sys> 2500표 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 신경영양요소들의 방출을 통해 뉴런과 시냅스의 발달을 증가시키는 활동은?  </s> <sys> 운동 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 민변이 사법절차에 회부된 표창원 의원에 법률적 지원을 아끼지 않겠다고 발표를 한 날짜는? </s> <sys> 12월 4일 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 매카트니가 머무르던 농장은? </s> <sys> 하이드 파크의 농장 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 현재 게임 버전에서 궤도 연산은 무엇을 대신하여 Patched conic approximation을 사용하고 있는가? </s> <sys> 완전한 N체 시뮬레이션 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   0%|          | 48/15102 [00:02<11:42, 21.44it/s, Batch Loss=1.2]  

질문: <usr> 인천 상륙 작전 당시 유엔군 총사령관은? </s> <sys> 더글러스 맥아더 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 외계 행성을 발견했다는 주장은 언제부터 있었는가? </s> <sys> 19세기 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> '일본에서 대지진이 발생하지 않을 것'이라는 예측에 대한 반증이 되었던 사건은? </s> <sys> 2004년 남아시아 대지진 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 푸리에가 저술한 책의 이름은? </s> <sys> 여성의 종속 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 소나타 형식은 반복을 포함하지 않고 최대 몇 마디 까지 있는가? </s> <sys> 468마디 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>

Epoch 1:   0%|          | 54/15102 [00:02<11:25, 21.96it/s, Batch Loss=0.561]

질문: <usr> 학생운동 중에 검거된 학생들이 받은 형량은? </s> <sys> 최고 29일, 최하 15일 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 뉴캐슬의 현재 엠블럼에는 타인사이드의 바다를 상징하는 어떤 동물이 등장하는가? </s> <sys> 해마 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 조건부 세비 반납 약속을 한 새누리당 소속 후보자는 몇 명인가? </s> <sys> 40명 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 제3의 위치 이론가들은 정치적으로는 어떠한 것을 원하는가? </s> <sys> 급진적인 것 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 올림픽이 다시 성공하기 시작한 1906년 올림픽 개최장소는? </s> <sys> 아테네 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><

Epoch 1:   0%|          | 60/15102 [00:03<11:21, 22.07it/s, Batch Loss=0.854]

질문: <usr> 싸이의 강남스타일은 빌보드 싱글 메인 차트 "핫 100"에서 몇 주 연속 2위를 기록하였나? </s> <sys> 7주 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 오버워치에서 경기가 끝나기도 전에 일부러 게임에서 나가는 행위를 무엇이라고 하는가? </s> <sys> 탈주 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 최초의 교회 분열을 일으키고 파문당한 사람은? </s> <sys> 몬타누스 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> NPC를 창설한 여성회를 주도한 사람은? </s> <sys> 노마 엘리자베스 보이드 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 비틀즈가 제작한 영화 데뷔작은? </s> <sys> 하드 데이즈 나이트 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   0%|          | 66/15102 [00:03<11:09, 22.46it/s, Batch Loss=0.531]

질문: <usr> 현대백화점 베트남 법인은 몇년도에 설립이 되었나? </s> <sys> 2014년 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2011년 신규 국가사업으로 공모를 거쳐 실시설계를 완료하고 2014년에 완공된 순천대학교의 건축물은? </s> <sys> 국제문화컨벤션관 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 자유여성주의자들이 주장하는 사회와 국가가 여성에게 강요하는 관습은? </s> <sys> 모성애 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 로마의 두 집정관이 잠정적으로 화해하며 기원전 40년에 이루어진 조약은 무엇인가? </s> <sys> 브룬디시움 조약 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 칼리스토의 다중 대야 중 가장  큰 것은? </s> <sys> 발할라 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   0%|          | 69/15102 [00:03<10:55, 22.94it/s, Batch Loss=0.826]

질문: <usr> 정종의 동생으로 후손들이 연이어 왕위에 오른 조선의 왕은? </s> <sys> 태종 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 프로토스가 테란에게 우세한 맵은? </s> <sys> 아즈텍 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2008년 조 조나스와 사귀고 헤어진 후에 작곡한 곡은 무엇인가요? </s> <sys> Forever & Always </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2008년 광물로 분류돼 왔던 것이 식품으로 인정된 것은?  </s> <sys> 천일염 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 빅히트는 2012년 누구와 합작하여 걸그룹을 제작하였는가? </s> <sys> 쏘스 뮤직 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   0%|          | 75/15102 [00:03<11:05, 22.59it/s, Batch Loss=0.977]

질문: <usr> 검은등오스트레일리아까치는 어디에서만 발견되는가? </s> <sys> 호크스베이 지역 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 제 2차 취리히 전투 이후 마세나는 수보로프가 이끄는 러시아군을 격파하기 위해 몇 명의 병사를 이끌고 진군하는가? </s> <sys> 80,000명 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 아데만 여명 사건 전에 앰바고를 지키지 않은 언론사들에게 징계를 내리던 주체는 무엇인가? </s> <sys> 기자단 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 조직적 댓글 조작으로 구속된 사람 중 친문재인 성향의 파워블로그는 누구인가? </s> <sys> 드루킹 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 테일러 스위프트의 6집 앨범이 정식 발매된 것은 언제인가? </s> <sys> 2017년 11월 10일 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 78/15102 [00:03<10:50, 23.09it/s, Batch Loss=0.83] 

질문: <usr> 박격포소대장 중위의 이름은 무엇입니까? </s> <sys> 잭 허드스페스 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 공수부대가 방어군 본부에게 몇시에 공격이 시작될거라고 했나? </s> <sys> 2시 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 토르를 의심하는 스<unk>에게 변명을 한 인물의 이름은? </s> <sys> 로키 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 오픈 액세스를 자유로이 이용할 수 있는 장소는 어디인가? </s> <sys> 공중 인터넷 망 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 탈 집중화 이론이 나온 년도는? </s> <sys> 1990년대 초반 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad

Epoch 1:   1%|          | 84/15102 [00:04<10:48, 23.17it/s, Batch Loss=0.876]

질문: <usr> 브이 포 벤데타를 준비하면서 맥테이그가 주요한 영향을 받은 영화는? </s> <sys> 알제리 전투 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 조선인을 강제 징용하여 석탄 노동을 시킨 하시마 섬의 우리나라 명칭은? </s> <sys> 군함도 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 영빈관에 머물라는 박정희 대통령의 말을 무시하고 주한 미군 내에 숙소를 정하는 등 박정희 대통령과 대립각을 세웠던 인물은? </s> <sys> 카터 대통령 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 이준용이 일본 공사 오토리 공사를 설득하기 위해 두 차례나 일본공사관을 방문한 달은 몇 월인가? </s> <sys> 6월 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 이집트 신전에서 가장 신성한 장소는 어디인가? </s> <sys> 사원 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 90/15102 [00:04<11:13, 22.28it/s, Batch Loss=1.21] 

질문: <usr> 안산 반월신도시의 하수도 처리방식은? </s> <sys> 우수와 오수의 분리식 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 두툽상어는 몇 센티미터까지 자랄 수 있는가? </s> <sys> 50 센티미터까지 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 목탁수구리는 몇 킬로그램까지 자라는가? </s> <sys> 170 킬로그램 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 블랙홀로부터의 탈출이 불가능해지는 경계를 무엇이라고 하는가? </s> <sys> 사건의 지평선 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 3월 10일 에스비에스에서 방송된 알파고 대 이세돌의 바둑경기 제2국의 최고 시청률은? </s> <sys> 10.87% </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 93/15102 [00:04<11:13, 22.27it/s, Batch Loss=0.982]

질문: <usr> 모치히토 왕의 동생이자 천황이었던 사람은? </s> <sys> 다카쿠라 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 람보르기니 컨셉트 S가 공개된 행사는? </s> <sys> 제네바 모터쇼 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 행성의 공전은 어떠한 과학 이론에 따라 발생하는 현상인가? </s> <sys> 뉴턴의 법칙 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 강희제가 티베트를 합병한 해는? </s> <sys> 1721년 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 한 대의 트리옹팡급 잠수함에 탑재되어 있는 핵 탄두는 총 발인가? </s> <sys> 96발 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><p

Epoch 1:   1%|          | 99/15102 [00:04<11:22, 21.99it/s, Batch Loss=1.59] 

질문: <usr> 신공항 후보지인 밀양 부동산의 소유주 중 절반은 누구인가? </s> <sys> 외지인 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 오스트레일리아까치 암수가 자기 텃세를 각인하거나 지키기 위해 내는 울음소리는? </s> <sys> 캐럴 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2010년 발매된 소녀시대의 두 번째 정규앨범 제목은? </s> <sys> Oh </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 일본이 화해 치유재단에 출연한 금액은? </s> <sys> 10억 엔 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 믹스테이프 May 25에서 B.o.B의 데뷔 싱글 Nothin' on You를 피처링한 가수는? </s> <sys> 브루노 마스 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 105/15102 [00:05<11:35, 21.55it/s, Batch Loss=0.723]

질문: <usr> 예수가 배드로에게 나를 사랑하느냐고 물은 횟수는? </s> <sys> 세 번 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 일베저장소에서 노무현 대통령과 코알라를 합성한 이미지를 무엇이라고 하는가? </s> <sys> 노알라 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 구록에서 영허스밴드가 티베트군에 요구한 것은 무엇인가? </s> <sys> 무장해제 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 일본은 조현병을 어떻게 개명하여 부르고있는가? </s> <sys> 통합실조증 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 108/15102 [00:05<11:31, 21.68it/s, Batch Loss=1.08] 

질문: <usr> 미국 육군 주일 미 제8군제24사단 21연대 제1대대가 7월 1일 상륙한 곳은 어디인가? </s> <sys> 부산 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 큰개미핥기는 항문 주위에 있는 분비샘으로부터 나오는 분비물로 무엇을 하는가? </s> <sys> 의사소통 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2009년 소녀시대의 Gee가 엠넷 차트에서 1위를 차지한 기간은? </s> <sys> 8주 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 743년 대불 조헝 교지를 내려 주조하기 시작해 752년에 완성된 이 대불의 이름은 무엇인가? </s> <sys> 도다이지 대불 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 평창 올림픽 북한 예술단으로 청봉악단 가수들의 도움으로 사랑이란 노래를 부른 북한 가수는? </s> <sys> 리혁철 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 하현우가 군제대를 한 년도는? </s> <sys> 2006년 </s><pad><pad><pad><pad

Epoch 1:   1%|          | 114/15102 [00:05<10:47, 23.13it/s, Batch Loss=0.758]

질문: <usr> 비교적 가까운 장기기억은 뇌의 어느 부분에서 이루어지는가? </s> <sys> 내측 측두엽 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 레알 마드리드로 이적한 그의 이적료는 얼마였는가? </s> <sys> 100.8M (<unk>85.1M) </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 개신교도들의 중요한 동맹으로서 개신교도들에 자금을 지원해주고 라인란트 지역에 군대 주둔을 허용한 공국은? </s> <sys> 사보이 공국 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2000년부터 이어오던 테런스 롱의 연속 출장 기록은 몇경기까지 이루어졌나? </s> <sys> 456경기 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 대머리 카를이 이혼을 강요했지만 거부했던 사람은? </s> <sys> 루이 2세 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 인류전반에 대해 깊고 폭 넓은 사상을 전개한 책은? </s> <sys> 일반 은총 </

Epoch 1:   1%|          | 120/15102 [00:05<10:48, 23.10it/s, Batch Loss=0.811]

질문: <usr> 철구는 어떤 장면을 방송에 내보내 서비스 정지 처분을 받았나? </s> <sys> 10대 청소년에게 유해한 장면 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 가상 조직의 핵심 부분은 어떻게 구성되어 있는가? </s> <sys> 소수의 경영진 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 최희서의 데뷔 영화 제목은? </s> <sys> 킹콩을 들다 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 펜들턴 워드와 리베카 슈거가 직접 부른 곡명은 무엇인가? </s> <sys> Fry Song </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 여러 도로들과 서울을 잇는 도로로 전략적으로 매우 중요한 이 도로의 이름은? </s> <sys> 29번 도로 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad

Epoch 1:   1%|          | 126/15102 [00:05<11:35, 21.53it/s, Batch Loss=0.91] 

질문: <usr> 변희재의 선거운동원으로 항의하는 남성 시민에게 욕을 한 인물은? </s> <sys> 성호스님 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 1923년 김규식은 푸단 대학 상하이 캠퍼스에 어떤 과목의 교수로 초빙되었나? </s> <sys> 영문학 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 산란원반 천체들의 궤도는 어느 행성의 중력에 영향을 받는가? </s> <sys> 해왕성 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 조성민이 최진실의 자택을 찾아가 폭행한 때는? </s> <sys> 2004년 8월 1일 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 인간에 대한 오해의 저자는? </s> <sys> 스티븐 제이 굴드 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 129/15102 [00:06<11:25, 21.84it/s, Batch Loss=0.633]

질문: <usr> 교황이 권력의 중심이 되어 교회 영역을 초월하는 권위를 발휘하기 시작한 계기는? </s> <sys> 서로마 제국이 멸망 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2002년 8월 현재 일본에는 총 몇 기 전투기가 조달되었는가? </s> <sys> 98기 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 선거운동에 필요한 국민성금을 모으기 위해 벌였던 '희망돼지' 저금통 사업에서 추미애가 얻은 별명은? </s> <sys> 돼지엄마 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 오늘날 아프리카에 존재하는 독립 주권 국가의 수는? </s> <sys> 54개 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 의상과 분장 등으로 자신의 겉모습을 꾸미기도 하는 이들은? </s> <sys> 무대 예술가 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 135/15102 [00:06<11:51, 21.05it/s, Batch Loss=1.26] 

질문: <usr> 1963년 이탈리아 정부에서 포도주 산업 관리를 위해 만든 법의 약자는 무엇인가? </s> <sys> DOC </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> <영원>의 PV는 언제 촬영되었는가? </s> <sys> 1997년 3월 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2008년 UEFA 유로 예선전 당시 잉글랜드와 에스토니아 간의 경기에서 잉글랜드의 성적은 무엇인가? </s> <sys> 3-0 완승 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 소녀시대가 2011년 컴백하여 각종 음원 사이트에 1위로 오르고, 가온 디지털 종합 차트 및 빌보드 K-Pop 핫 100에서 1위에 오른 노래는 무엇인가? </s> <sys> "The Boys </s><pad><pad><pad><pad>
질문: <usr> 기독교 전승에서 사탄의 유혹에 굴복한 여자는 누구인가? </s> <sys> 하와 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 141/15102 [00:06<11:09, 22.34it/s, Batch Loss=0.794]

질문: <usr> 반월지구의 명칭이 안산시로 확정되어 승격된 해는? </s> <sys> 1986년 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 칙프렌드가 제공하고 있던 서비스는? </s> <sys> 해충 통제 서비스 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 1882년 7월 23일 구식 군인들에 의해 일어난 전쟁은? </s> <sys> 임오군란 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 북한이 한번에 동시 발사할 수 있는 미사일 예상 수량은? </s> <sys> 200 개 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 하드모드에서는 최대 체력이 메인에 비해 어느 정도인가? </s> <sys> 절반 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><p

Epoch 1:   1%|          | 147/15102 [00:06<10:47, 23.08it/s, Batch Loss=0.572]

질문: <usr> 랄프와 노턴이라는 고래상어가 삶을 마친 장소는? </s> <sys> 조지아 수족관 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 북한의 남침을 정당화하는 주장에서 대한민국 국군이 침입한 횟수는 얼마인가? </s> <sys> 432회 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 허리 부상을 입고도 김연아가 이코노미 석에서 비행했던 시간은? </s> <sys> 13시간 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 2002년 4월 10일 데포르티보와의 경기에서 베컴이 부상당한 곳은 어디인가? </s> <sys> 왼발 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 나카모리 아키나의 데뷔전 소속사 이름은? </s> <sys> 켄온 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 150/15102 [00:07<10:54, 22.84it/s, Batch Loss=0.781]

질문: <usr> 전두환이 육군소위에 임관하여 보임된 곳은? </s> <sys> 제 25보병사단 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 오버워치는 어떤 타이틀의 개발이 취소되면서 만들어지게 되었는가? </s> <sys> 타이탄 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 로마 원로원이 율리우스 카이사르를 신으로 선포한 날짜는? </s> <sys> 기원전 42년 1월 1일 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 두 번째 시즌에서 비디오게임 형식의 3D를 도입한 에피소드는 몇 번째 에피소드인가? </s> <sys> 열여섯 번째 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 이휘소가 페르미 연구소의 이론물리학 부장에 취임하면서 겸임한 것은 무엇인가? </s> <sys> 시카고 대학교의 교수 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 156/15102 [00:07<10:27, 23.80it/s, Batch Loss=0.767]

질문: <usr> 헤이우드는 누구의 방대한 저술들의 핵심을 단순한 명제로 증류시켰는가? </s> <sys> 카를 마르크스 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 폴 로다는 어느나라 사람인가? </s> <sys> 영국 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 국회의사당이 폭파 될 때 불꽃놀이의 모양은 무엇인가? </s> <sys> V 모양 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 대한민국에 민주주의가 들어서기 시작한 년도는? </s> <sys> 1987년 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 고려시대 광종이 혜거국사를 왕사로 모시러 갔다가 마시고 속병이 나았다고 하는 어정수가  나오는 절 이름은? </s> <sys> 칠성암 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Epoch 1:   1%|          | 162/15102 [00:07<10:34, 23.54it/s, Batch Loss=0.845]

질문: <usr> 전리수소영역에서 태어난 별들 중 가장 무거운 것들이 일으키는 것은? </s> <sys> 초신성 폭발 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 전시 국무 차관이자 미육군 공병사령부의 지휘권을 갖고 있던 사람은 누구인가? </s> <sys> 로버트 페터슨 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 태풍 매미로 우리나라에서 측정된 최대순간풍속에서 상위를 기록한 대부분의 지방이 위치한 곳은? </s> <sys> 남부 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 폴링은 벤젠의 구조를 무엇에 기초하여 설명하였는가 </s> <sys> 양자역학 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
질문: <usr> 맥그리거의 딸은 몇 명인가? </s> <sys> 4명 </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


Epoch 1:   1%|          | 162/15102 [00:07<10:34, 23.54it/s, Batch Loss=0.557]

In [7]:

question = "인공지능이란?"

# 입력 텍스트 생성
input_text = f"<usr> {question} </s>"
input_ids = tokenizer.encode(
    input_text,
    max_length=100,
    truncation=True,
    padding="max_length",
    return_tensors="pt"
).to(model.device)

# KoGPT2로 문장 생성
model.eval()
with torch.no_grad():
    output_ids = model.generate(
        input_ids=input_ids,
        max_length=400,
        repetition_penalty=2.0,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
        bos_token_id=tokenizer.bos_token_id,
    )

generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=False)

# 결과 출력
print(f"질문: {question}")
print(f"생성된 답변: {generated_text}")

질문: 인공지능이란?
생성된 답변: <usr> 인공지능이란? </s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <sys> </s>


In [8]:
model.eval()
for epoch in range(epochs):
    epoch_loss = 0
    progress_bar = tqdm(enumerate(train_dataloader), desc=f"Epoch {epoch + 1}", total=len(train_dataloader))

    for batch_idx, batch in progress_bar:
        # if batch_idx >= max_batches_per_epoch:  # 조기 종료 조건
        #     print(f"Stopping early at batch {batch_idx} in epoch {epoch + 1}")
        #     break

        input_ids = batch["input_ids"].to(device)
        # cut off where the question endss
        # find where the question ends
        sep_token = tokenizer.convert_tokens_to_ids("<sys>")
        sep_positions = (input_ids == sep_token).nonzero(as_tuple=True)[1]
        if len(sep_positions) >= 1:
            input_ids = input_ids[:,:sep_positions[0]+1]
        
        
        labels = batch["labels"].to(device)

        with torch.no_grad():
            output_ids = model.generate(
                input_ids=input_ids,
                max_length=513,
                repetition_penalty=2.0,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                bos_token_id=tokenizer.bos_token_id,
            )
        
        print(f"질문: {tokenizer.decode(input_ids[0], skip_special_tokens=False)}")
        print(f"생성된 답변: {tokenizer.decode(output_ids[0], skip_special_tokens=False)}")
        break

Epoch 1:   0%|          | 0/15102 [00:04<?, ?it/s]

질문: <usr> 세 가지 형태의 환경교육 방법들이 별 의미가 없다고 하여 통합하여 부르는 용어는? </s> <sys>
생성된 답변: <usr> 세 가지 형태의 환경교육 방법들이 별 의미가 없다고 하여 통합하여 부르는 용어는? </s> <sys> <pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><p


