In [1]:
from torch.utils.data import DataLoader, Dataset
from tokenizers import SentencePieceBPETokenizer
import json
import pandas as pd
import numpy as np
import logging
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import PreTrainedTokenizerFast, GPT2LMHeadModel
import math
import os
from argparse import ArgumentParser
import torch
from tokenizers import SentencePieceBPETokenizer
from torch.optim import Adam
from tqdm import tqdm_notebook
from transformers import GPT2LMHeadModel, get_linear_schedule_with_warmup
from korquad_qg.config import QGConfig
from korquad_qg.utils import TqdmLoggingHandler
from typing import List, NamedTuple, Optional, Tuple

# 코쿼드 데이터셋을 이용한 GPT2 데이터셋 생성

In [None]:
with open('korquad.json', 'r') as f:

    json_data = json.load(f)

context_list = []
answers_list = []
question_list = []

for data in json_data['data']:
    for sub_data in data['paragraphs']:
        context = sub_data['context']
        for qa in sub_data['qas']:
            context_list.append(context)
            answers_list.append(qa['answers'][0]['text'])
            question_list.append(qa['question'])
            if len(qa['answers']) > 1:
                print(qa['answers'])
                
data = {
        'context':context_list,
        'answers': answers_list,
        'question':question_list
    }
df = pd.DataFrame(data)
df.to_csv('KorQuad_train_V1.csv',index=False)

# 토크나이저 생성

In [None]:
txt_f = open("sentences.txt", 'r')
all_dict = []
max_len = 0
count = 0
for line in tqdm_notebook(txt_f.readlines()):
    if '\n' in line:
        line = line.replace('\n', '')
    if '##' in line:
        line = line.replace('##', '')
    if len(line) > max_len:
        max_len = len(line)
    all_dict.append(line)
    count += 1
print(max_len)


f = open("data_save_test.txt", 'w')
for i in all_dict:
    f.write(i+'\n')
f.close()

# Initialize a tokenizer
tokenizer = SentencePieceBPETokenizer()
# Then train it!

tokenizer.train_from_iterator([all_dict], vocab_size=300000, min_frequency=1, limit_alphabet=100000)
tokenizer.save("./history_tokenizer.json",pretty=True)
tokenizer.save_model(directory='./')

In [None]:
txt_f = open("sentences.txt", 'r')
max_len = 512
count = 0
tok = PreTrainedTokenizerFast(tokenizer_file='Tokenizer/History/history_tokenizer.json')
for line in tqdm_notebook(txt_f.readlines()):
    if '\n' in line:
        line = line.replace('\n', '')
    if '##' in line:
        line = line.replace('##', '')
    if len(tok.tokenize(line)) + 7 > max_len:
        max_len = len(line)
        count += 1
print(max_len, count)

# 데이터셋 생성

In [2]:
Q_TKN = '<q>'
A_TKN = '<a>'
BOS = '<s>'
EOS = '</s>'
MASK = '<mask>'
C_TKN = '<c>'
PAD = '<pad>'
TOKENIZER = PreTrainedTokenizerFast(tokenizer_file='Tokenizer/tokenizer.json',
            bos_token=BOS, eos_token=EOS, unk_token='<unk>', 
            pad_token=PAD, mask_token=MASK)

## 데이터 셋 클래스

In [3]:
GPTDecodingInputType = Tuple[torch.Tensor, torch.Tensor]
GPTInputsType = Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
GPTFeaturesType = Tuple[List[int], List[float], List[int]]

class QAExample(NamedTuple):
    context: str
    answer: str
    question: Optional[str] = None

def load_korquad_dataset(dataset_path: str) -> List[QAExample]:
    korquad = [pd.read_csv(dataset_path)]
    max_len = 512
    examples = []
    for document in korquad:
        for i in tqdm_notebook(range(len(document))):
            if len(TOKENIZER.tokenize(document["context"][i])) + 10 <= max_len:
                example = QAExample(document["context"][i], document["answers"][i], document["question"][i])
                examples.append(example)
        
    return examples
    
def dynamic_padding_collate_fn(features: List[GPTFeaturesType]) -> GPTInputsType:
    max_seq_len = max([len(feature[0]) for feature in features])
    input_ids, attention_mask, labels = [], [], []

    for feature in features:
        padded_input_ids = feature[0] + [0] * (max_seq_len - len(feature[0]))
        padded_attention_mask = feature[1] + [0.0] * (max_seq_len - len(feature[1]))
        padded_labels = feature[2] + [-100] * (max_seq_len - len(feature[2]))

        input_ids.append(padded_input_ids)
        attention_mask.append(padded_attention_mask)
        labels.append(padded_labels)

    return torch.tensor(input_ids), torch.tensor(attention_mask), torch.tensor(labels)

In [4]:
class HistoryQGDataset(Dataset):
    def __init__(
        self,
        examples: List[QAExample],
        tokenizer: SentencePieceBPETokenizer,
        max_sequence_length: int,
        is_train: bool = True,
    ) -> None:
        self.examples = examples
        self.tokenizer = tokenizer
        self.max_sequence_length = max_sequence_length

        self.sos_token = tokenizer.convert_tokens_to_ids("<s>")
        self.eos_token = tokenizer.convert_tokens_to_ids("</s>")
        self.question_prefix_tokens = tokenizer.convert_tokens_to_ids('<q>')

        self.is_train = is_train

    def __getitem__(self, index: int) -> GPTFeaturesType:
        example = self.examples[index]

        context_tokens = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(f"<c>{example.context}"))
        answer_tokens = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(f"<a>{example.answer}"))
        question_tokens = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(f"{example.question}"))
        
        # [SOS] + 문맥:CONTEXT + 정답:ANSWER + 질문:
        conditional_tokens_len = 1 + len(context_tokens) + len(answer_tokens) + 1
        # QUESTION + [EOS]
        post_tokens_len = len(question_tokens) + 1

        if conditional_tokens_len + post_tokens_len > self.max_sequence_length:
            available_seq_len = (
                self.max_sequence_length - conditional_tokens_len - post_tokens_len + len(context_tokens)
            )
            context_tokens = context_tokens[:available_seq_len]

        conditional_tokens = [self.sos_token] + context_tokens + answer_tokens + [self.question_prefix_tokens]
        post_tokens = question_tokens + [self.eos_token]
        input_ids = conditional_tokens + post_tokens

        labels = input_ids if self.is_train else ([-100] * len(conditional_tokens)) + post_tokens
        attention_mask = [1.0] * len(input_ids)

        assert len(input_ids) <= self.max_sequence_length

        return input_ids, attention_mask, labels
        #return torch.tensor(input_ids), torch.tensor(attention_mask), torch.tensor(labels)

    def __len__(self) -> int:
        return len(self.examples)


In [5]:
def _create_logger(output_dir: str):
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter("[%(asctime)s] %(message)s")

    file_handler = logging.FileHandler(os.path.join(output_dir, "train.log"))
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    handler = TqdmLoggingHandler()
    handler.setFormatter(logging.Formatter("%(asctime)s - %(message)s"))
    logger.addHandler(handler)
    return logger

In [6]:
def _validate(
    model: GPT2LMHeadModel,
    dev_dataloader: DataLoader,
    device: torch.device,
    logger: logging.Logger,
    global_step: int,
):
    model.eval()
    loss_list = []
    for batch_data in tqdm_notebook(dev_dataloader, desc="[EVAL]"):
        with torch.no_grad():
            input_ids, attention_mask, labels = tuple(value.to(device) for value in batch_data)
            model_outputs = model.forward(input_ids, attention_mask=attention_mask, labels=labels, return_dict=True)
            loss_list.append(model_outputs.loss.item())

    mean_loss = np.mean(loss_list)
    logger.info(f"[EVAL] global_step:{global_step} loss:{mean_loss:.4f} perplexity:{math.exp(mean_loss):.4f}")
    model.train()

# 모델 학습

In [7]:
config = QGConfig()

tokenizer = PreTrainedTokenizerFast(tokenizer_file='Tokenizer/tokenizer.json',
            bos_token=BOS, eos_token=EOS, unk_token='<unk>', 
            pad_token=PAD, mask_token=MASK)

logger = _create_logger(output_dir=config.output_dir)
logger.info("============================")
for key, value in config._asdict().items():
    logger.info(f"{key:30}:{value}")
logger.info("============================")
torch.manual_seed(config.random_seed)

logger.info("loading train dataset")
train_examples = load_korquad_dataset(config.train_dataset)
train_dataset = HistoryQGDataset(train_examples, tokenizer, config.max_sequence_length)
train_dataloader = torch.utils.data.DataLoader(train_dataset, 16, shuffle=True, collate_fn=dynamic_padding_collate_fn)
#train_dataloader = torch.utils.data.DataLoader(train_dataset, 1, shuffle=True)
logger.info("loading dev dataset")
dev_examples = load_korquad_dataset(config.dev_dataset)
dev_dataset = HistoryQGDataset(dev_examples, tokenizer, config.max_sequence_length, is_train=False)
dev_dataloader = DataLoader(dev_dataset, 16, collate_fn=dynamic_padding_collate_fn)
#dev_dataloader = DataLoader(dev_dataset, 1)

#model 생성
model = GPT2LMHeadModel.from_pretrained('skt/kogpt2-base-v2')
model.load_state_dict(torch.load('outputs/After_History_Finetuning/gpt2_step_18000.pth'))
#model.load_state_dict(torch.load('outputs/After_History_Finetuning/gpt2_step_18000.pth', map_location="cpu"))
print(model.transformer.wte.weight.shape[0], len(tokenizer.vocab))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

optimizer = Adam(model.parameters(), lr=config.lr)
total_steps = len(train_dataloader) * config.epochs
warmup_steps = int(total_steps * config.warmup_ratio)
scheduler = get_linear_schedule_with_warmup(optimizer, warmup_steps, total_steps)

loss_list_between_log_interval = []
for epoch_id in range(config.epochs):
    for step_index, batch_data in tqdm_notebook(
            enumerate(train_dataloader), f"[TRAIN] EP:{epoch_id}", total=len(train_dataloader)
    ):
        global_step = len(train_dataloader) * epoch_id + step_index + 1
        optimizer.zero_grad()

        token_ids, attention_mask, labels = tuple(value.to(device) for value in batch_data)
        model_outputs = model.forward(token_ids, attention_mask=attention_mask, labels=labels, return_dict=True)
        model_outputs.loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_clip)
        optimizer.step()
        scheduler.step()

        # for logging
        loss_list_between_log_interval.append(model_outputs.loss.item())

        if global_step % config.train_log_interval == 0:
            mean_loss = np.mean(loss_list_between_log_interval)
            logger.info(
                    f"EP:{epoch_id} global_step:{global_step} "
                    f"loss:{mean_loss:.4f} perplexity:{math.exp(mean_loss):.4f}"
                )
            loss_list_between_log_interval.clear()
            
        if global_step % config.validation_interval == 0:
                _validate(model, dev_dataloader, device, logger, global_step)
                
        if global_step % config.save_interval == 0:
            state_dict = model.state_dict()
            model_path = os.path.join(config.output_dir, f"gpt2_step_{global_step}.pth")
            torch.save(state_dict, model_path)

2021-11-14 12:48:42,440 - train_dataset                 :data/History_train_After_FT_V3.csv
2021-11-14 12:48:42,440 - dev_dataset                   :data/History_train_After_FT_V1.csv
2021-11-14 12:48:42,441 - max_sequence_length           :512
2021-11-14 12:48:42,441 - epochs                        :100
2021-11-14 12:48:42,441 - lr                            :5e-05
2021-11-14 12:48:42,441 - train_batch_size              :2
2021-11-14 12:48:42,441 - dev_batch_size                :2
2021-11-14 12:48:42,442 - output_dir                    :outputs/
2021-11-14 12:48:42,442 - grad_clip                     :1.0
2021-11-14 12:48:42,442 - warmup_ratio                  :0.1
2021-11-14 12:48:42,442 - train_log_interval            :100
2021-11-14 12:48:42,442 - validation_interval           :1000
2021-11-14 12:48:42,442 - save_interval                 :1000
2021-11-14 12:48:42,442 - random_seed                   :0
2021-11-14 12:48:42,444 - loading train dataset


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  from ipykernel import kernelapp as app


HBox(children=(FloatProgress(value=0.0, max=9490.0), HTML(value='')))


2021-11-14 12:48:46,129 - loading dev dataset


HBox(children=(FloatProgress(value=0.0, max=6275.0), HTML(value='')))


51200 51200


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:0', max=593.0, style=ProgressStyle(description…

2021-11-14 12:49:42,763 - EP:0 global_step:100 loss:3.4378 perplexity:31.1193
2021-11-14 12:50:28,169 - EP:0 global_step:200 loss:3.1450 perplexity:23.2200
2021-11-14 12:51:12,875 - EP:0 global_step:300 loss:2.9008 perplexity:18.1882
2021-11-14 12:51:58,648 - EP:0 global_step:400 loss:2.7945 perplexity:16.3541
2021-11-14 12:52:44,388 - EP:0 global_step:500 loss:2.6676 perplexity:14.4058



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:1', max=593.0, style=ProgressStyle(description…

2021-11-14 12:53:28,070 - EP:1 global_step:600 loss:2.5233 perplexity:12.4692
2021-11-14 12:54:14,289 - EP:1 global_step:700 loss:2.3799 perplexity:10.8040
2021-11-14 12:54:58,997 - EP:1 global_step:800 loss:2.2670 perplexity:9.6504
2021-11-14 12:55:43,637 - EP:1 global_step:900 loss:2.2224 perplexity:9.2293
2021-11-14 12:56:29,264 - EP:1 global_step:1000 loss:2.1383 perplexity:8.4852


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  # Remove the CWD from sys.path while we load stuff.


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 12:57:27,119 - [EVAL] global_step:1000 loss:0.5868 perplexity:1.7982
2021-11-14 12:58:13,703 - EP:1 global_step:1100 loss:2.8872 perplexity:17.9431



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:2', max=593.0, style=ProgressStyle(description…

2021-11-14 12:58:59,997 - EP:2 global_step:1200 loss:2.7987 perplexity:16.4237
2021-11-14 12:59:44,815 - EP:2 global_step:1300 loss:2.6224 perplexity:13.7686
2021-11-14 13:00:30,352 - EP:2 global_step:1400 loss:2.5986 perplexity:13.4445
2021-11-14 13:01:17,466 - EP:2 global_step:1500 loss:2.5933 perplexity:13.3738
2021-11-14 13:02:05,790 - EP:2 global_step:1600 loss:2.6024 perplexity:13.4958
2021-11-14 13:02:51,038 - EP:2 global_step:1700 loss:2.5178 perplexity:12.4012



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:3', max=593.0, style=ProgressStyle(description…

2021-11-14 13:03:37,556 - EP:3 global_step:1800 loss:2.4951 perplexity:12.1234
2021-11-14 13:04:23,345 - EP:3 global_step:1900 loss:2.3355 perplexity:10.3346
2021-11-14 13:05:08,057 - EP:3 global_step:2000 loss:2.2941 perplexity:9.9151


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 13:06:05,645 - [EVAL] global_step:2000 loss:0.5290 perplexity:1.6972
2021-11-14 13:06:51,417 - EP:3 global_step:2100 loss:2.2699 perplexity:9.6783
2021-11-14 13:07:36,639 - EP:3 global_step:2200 loss:2.3094 perplexity:10.0684
2021-11-14 13:08:23,270 - EP:3 global_step:2300 loss:2.3127 perplexity:10.1021



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:4', max=593.0, style=ProgressStyle(description…

2021-11-14 13:09:09,623 - EP:4 global_step:2400 loss:2.2251 perplexity:9.2548
2021-11-14 13:09:54,593 - EP:4 global_step:2500 loss:2.0812 perplexity:8.0139
2021-11-14 13:10:39,391 - EP:4 global_step:2600 loss:2.0430 perplexity:7.7136
2021-11-14 13:11:24,824 - EP:4 global_step:2700 loss:2.0647 perplexity:7.8827
2021-11-14 13:12:11,112 - EP:4 global_step:2800 loss:2.0985 perplexity:8.1543
2021-11-14 13:12:57,688 - EP:4 global_step:2900 loss:2.0959 perplexity:8.1327



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:5', max=593.0, style=ProgressStyle(description…

2021-11-14 13:13:43,914 - EP:5 global_step:3000 loss:1.9795 perplexity:7.2391


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 13:14:41,600 - [EVAL] global_step:3000 loss:0.4592 perplexity:1.5827
2021-11-14 13:15:27,335 - EP:5 global_step:3100 loss:1.8452 perplexity:6.3296
2021-11-14 13:16:12,866 - EP:5 global_step:3200 loss:1.8777 perplexity:6.5382
2021-11-14 13:16:59,744 - EP:5 global_step:3300 loss:1.8851 perplexity:6.5870
2021-11-14 13:17:46,664 - EP:5 global_step:3400 loss:1.8738 perplexity:6.5128
2021-11-14 13:18:31,949 - EP:5 global_step:3500 loss:1.8834 perplexity:6.5757



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:6', max=593.0, style=ProgressStyle(description…

2021-11-14 13:19:17,174 - EP:6 global_step:3600 loss:1.7439 perplexity:5.7195
2021-11-14 13:20:04,905 - EP:6 global_step:3700 loss:1.6886 perplexity:5.4120
2021-11-14 13:20:50,119 - EP:6 global_step:3800 loss:1.6819 perplexity:5.3759
2021-11-14 13:21:36,108 - EP:6 global_step:3900 loss:1.6835 perplexity:5.3844
2021-11-14 13:22:22,039 - EP:6 global_step:4000 loss:1.7126 perplexity:5.5434


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 13:23:19,698 - [EVAL] global_step:4000 loss:0.3952 perplexity:1.4847
2021-11-14 13:24:06,134 - EP:6 global_step:4100 loss:1.6836 perplexity:5.3847



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:7', max=593.0, style=ProgressStyle(description…

2021-11-14 13:24:50,052 - EP:7 global_step:4200 loss:1.5715 perplexity:4.8139
2021-11-14 13:25:35,110 - EP:7 global_step:4300 loss:1.4667 perplexity:4.3348
2021-11-14 13:26:21,280 - EP:7 global_step:4400 loss:1.5141 perplexity:4.5451
2021-11-14 13:27:06,952 - EP:7 global_step:4500 loss:1.5599 perplexity:4.7581
2021-11-14 13:27:53,467 - EP:7 global_step:4600 loss:1.5586 perplexity:4.7523
2021-11-14 13:28:39,092 - EP:7 global_step:4700 loss:1.5662 perplexity:4.7885



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:8', max=593.0, style=ProgressStyle(description…

2021-11-14 13:29:23,572 - EP:8 global_step:4800 loss:1.4122 perplexity:4.1050
2021-11-14 13:30:09,885 - EP:8 global_step:4900 loss:1.3560 perplexity:3.8808
2021-11-14 13:30:52,984 - EP:8 global_step:5000 loss:1.3483 perplexity:3.8510


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 13:31:50,733 - [EVAL] global_step:5000 loss:0.3252 perplexity:1.3843
2021-11-14 13:32:37,862 - EP:8 global_step:5100 loss:1.3891 perplexity:4.0113
2021-11-14 13:33:23,890 - EP:8 global_step:5200 loss:1.4359 perplexity:4.2035
2021-11-14 13:34:10,462 - EP:8 global_step:5300 loss:1.4439 perplexity:4.2373



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:9', max=593.0, style=ProgressStyle(description…

2021-11-14 13:34:54,714 - EP:9 global_step:5400 loss:1.2373 perplexity:3.4464
2021-11-14 13:35:40,212 - EP:9 global_step:5500 loss:1.2229 perplexity:3.3969
2021-11-14 13:36:26,296 - EP:9 global_step:5600 loss:1.2493 perplexity:3.4878
2021-11-14 13:37:13,844 - EP:9 global_step:5700 loss:1.2993 perplexity:3.6668
2021-11-14 13:38:00,925 - EP:9 global_step:5800 loss:1.2697 perplexity:3.5599
2021-11-14 13:38:47,179 - EP:9 global_step:5900 loss:1.3099 perplexity:3.7058



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:10', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 13:39:32,344 - EP:10 global_step:6000 loss:1.1288 perplexity:3.0919


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 13:40:30,020 - [EVAL] global_step:6000 loss:0.2649 perplexity:1.3033
2021-11-14 13:41:16,745 - EP:10 global_step:6100 loss:1.0651 perplexity:2.9013
2021-11-14 13:42:02,804 - EP:10 global_step:6200 loss:1.1314 perplexity:3.1000
2021-11-14 13:42:48,141 - EP:10 global_step:6300 loss:1.1383 perplexity:3.1213
2021-11-14 13:43:34,702 - EP:10 global_step:6400 loss:1.1785 perplexity:3.2494
2021-11-14 13:44:21,365 - EP:10 global_step:6500 loss:1.1960 perplexity:3.3068



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:11', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 13:45:06,804 - EP:11 global_step:6600 loss:0.9677 perplexity:2.6319
2021-11-14 13:45:53,342 - EP:11 global_step:6700 loss:0.9758 perplexity:2.6534
2021-11-14 13:46:39,621 - EP:11 global_step:6800 loss:0.9703 perplexity:2.6387
2021-11-14 13:47:25,723 - EP:11 global_step:6900 loss:1.0168 perplexity:2.7644
2021-11-14 13:48:13,212 - EP:11 global_step:7000 loss:1.0617 perplexity:2.8911


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 13:49:10,953 - [EVAL] global_step:7000 loss:0.2119 perplexity:1.2361
2021-11-14 13:49:56,013 - EP:11 global_step:7100 loss:1.0263 perplexity:2.7907



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:12', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 13:50:42,551 - EP:12 global_step:7200 loss:0.8449 perplexity:2.3277
2021-11-14 13:51:27,397 - EP:12 global_step:7300 loss:0.8198 perplexity:2.2701
2021-11-14 13:52:12,657 - EP:12 global_step:7400 loss:0.8889 perplexity:2.4326
2021-11-14 13:52:58,010 - EP:12 global_step:7500 loss:0.8921 perplexity:2.4402
2021-11-14 13:53:44,198 - EP:12 global_step:7600 loss:0.8977 perplexity:2.4539
2021-11-14 13:54:29,560 - EP:12 global_step:7700 loss:0.9428 perplexity:2.5671



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:13', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 13:55:16,090 - EP:13 global_step:7800 loss:0.7427 perplexity:2.1017
2021-11-14 13:56:02,780 - EP:13 global_step:7900 loss:0.7605 perplexity:2.1392
2021-11-14 13:56:46,454 - EP:13 global_step:8000 loss:0.7542 perplexity:2.1260


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 13:57:44,188 - [EVAL] global_step:8000 loss:0.1775 perplexity:1.1942
2021-11-14 13:58:32,185 - EP:13 global_step:8100 loss:0.8073 perplexity:2.2419
2021-11-14 13:59:17,527 - EP:13 global_step:8200 loss:0.7900 perplexity:2.2033
2021-11-14 14:00:03,127 - EP:13 global_step:8300 loss:0.8113 perplexity:2.2508



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:14', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:00:48,460 - EP:14 global_step:8400 loss:0.6472 perplexity:1.9102
2021-11-14 14:01:34,880 - EP:14 global_step:8500 loss:0.6572 perplexity:1.9294
2021-11-14 14:02:21,532 - EP:14 global_step:8600 loss:0.6837 perplexity:1.9813
2021-11-14 14:03:07,935 - EP:14 global_step:8700 loss:0.6897 perplexity:1.9931
2021-11-14 14:03:52,913 - EP:14 global_step:8800 loss:0.7281 perplexity:2.0712



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:15', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:04:37,844 - EP:15 global_step:8900 loss:0.7271 perplexity:2.0690
2021-11-14 14:05:24,433 - EP:15 global_step:9000 loss:0.5705 perplexity:1.7691


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 14:06:22,198 - [EVAL] global_step:9000 loss:0.1384 perplexity:1.1485
2021-11-14 14:07:09,262 - EP:15 global_step:9100 loss:0.5821 perplexity:1.7897
2021-11-14 14:07:54,037 - EP:15 global_step:9200 loss:0.6120 perplexity:1.8441
2021-11-14 14:08:41,006 - EP:15 global_step:9300 loss:0.6259 perplexity:1.8699
2021-11-14 14:09:26,822 - EP:15 global_step:9400 loss:0.6446 perplexity:1.9052



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:16', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:10:11,288 - EP:16 global_step:9500 loss:0.6337 perplexity:1.8846
2021-11-14 14:10:56,446 - EP:16 global_step:9600 loss:0.5006 perplexity:1.6497
2021-11-14 14:11:42,833 - EP:16 global_step:9700 loss:0.5310 perplexity:1.7006
2021-11-14 14:12:29,299 - EP:16 global_step:9800 loss:0.5595 perplexity:1.7499
2021-11-14 14:13:14,158 - EP:16 global_step:9900 loss:0.5709 perplexity:1.7698
2021-11-14 14:13:59,205 - EP:16 global_step:10000 loss:0.5639 perplexity:1.7575


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 14:14:56,939 - [EVAL] global_step:10000 loss:0.1111 perplexity:1.1175



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:17', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:15:43,306 - EP:17 global_step:10100 loss:0.5582 perplexity:1.7476
2021-11-14 14:16:29,117 - EP:17 global_step:10200 loss:0.4602 perplexity:1.5844
2021-11-14 14:17:13,647 - EP:17 global_step:10300 loss:0.4791 perplexity:1.6145
2021-11-14 14:18:00,810 - EP:17 global_step:10400 loss:0.5007 perplexity:1.6499
2021-11-14 14:18:45,516 - EP:17 global_step:10500 loss:0.4980 perplexity:1.6454
2021-11-14 14:19:32,436 - EP:17 global_step:10600 loss:0.5199 perplexity:1.6818



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:18', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:20:18,199 - EP:18 global_step:10700 loss:0.4894 perplexity:1.6313
2021-11-14 14:21:05,584 - EP:18 global_step:10800 loss:0.4295 perplexity:1.5364
2021-11-14 14:21:50,983 - EP:18 global_step:10900 loss:0.4281 perplexity:1.5344
2021-11-14 14:22:37,953 - EP:18 global_step:11000 loss:0.4421 perplexity:1.5559


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 14:23:35,768 - [EVAL] global_step:11000 loss:0.0965 perplexity:1.1013
2021-11-14 14:24:22,526 - EP:18 global_step:11100 loss:0.4549 perplexity:1.5760
2021-11-14 14:25:07,190 - EP:18 global_step:11200 loss:0.4640 perplexity:1.5904



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:19', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:25:52,260 - EP:19 global_step:11300 loss:0.4471 perplexity:1.5638
2021-11-14 14:26:39,033 - EP:19 global_step:11400 loss:0.3772 perplexity:1.4581
2021-11-14 14:27:26,136 - EP:19 global_step:11500 loss:0.3917 perplexity:1.4796
2021-11-14 14:28:11,362 - EP:19 global_step:11600 loss:0.4032 perplexity:1.4967
2021-11-14 14:28:58,102 - EP:19 global_step:11700 loss:0.4207 perplexity:1.5231
2021-11-14 14:29:42,760 - EP:19 global_step:11800 loss:0.4252 perplexity:1.5299



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:20', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:30:27,911 - EP:20 global_step:11900 loss:0.3916 perplexity:1.4793
2021-11-14 14:31:15,291 - EP:20 global_step:12000 loss:0.3469 perplexity:1.4146


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 14:32:13,118 - [EVAL] global_step:12000 loss:0.0819 perplexity:1.0854
2021-11-14 14:32:59,765 - EP:20 global_step:12100 loss:0.3672 perplexity:1.4438
2021-11-14 14:33:42,552 - EP:20 global_step:12200 loss:0.3665 perplexity:1.4427
2021-11-14 14:34:30,155 - EP:20 global_step:12300 loss:0.3831 perplexity:1.4668
2021-11-14 14:35:15,631 - EP:20 global_step:12400 loss:0.3900 perplexity:1.4770



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:21', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:36:00,157 - EP:21 global_step:12500 loss:0.3606 perplexity:1.4342
2021-11-14 14:36:46,643 - EP:21 global_step:12600 loss:0.3254 perplexity:1.3846
2021-11-14 14:37:31,725 - EP:21 global_step:12700 loss:0.3251 perplexity:1.3842
2021-11-14 14:38:16,541 - EP:21 global_step:12800 loss:0.3475 perplexity:1.4156
2021-11-14 14:39:03,050 - EP:21 global_step:12900 loss:0.3535 perplexity:1.4241
2021-11-14 14:39:50,090 - EP:21 global_step:13000 loss:0.3659 perplexity:1.4418


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 14:40:47,952 - [EVAL] global_step:13000 loss:0.0751 perplexity:1.0779



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:22', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:41:35,937 - EP:22 global_step:13100 loss:0.3295 perplexity:1.3902
2021-11-14 14:42:21,082 - EP:22 global_step:13200 loss:0.3070 perplexity:1.3594
2021-11-14 14:43:07,024 - EP:22 global_step:13300 loss:0.3124 perplexity:1.3667
2021-11-14 14:43:53,460 - EP:22 global_step:13400 loss:0.3190 perplexity:1.3758
2021-11-14 14:44:37,562 - EP:22 global_step:13500 loss:0.3272 perplexity:1.3871
2021-11-14 14:45:24,127 - EP:22 global_step:13600 loss:0.3321 perplexity:1.3939



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:23', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:46:08,202 - EP:23 global_step:13700 loss:0.2931 perplexity:1.3406
2021-11-14 14:46:52,226 - EP:23 global_step:13800 loss:0.2862 perplexity:1.3314
2021-11-14 14:47:37,392 - EP:23 global_step:13900 loss:0.2942 perplexity:1.3421
2021-11-14 14:48:23,374 - EP:23 global_step:14000 loss:0.2986 perplexity:1.3479


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 14:49:21,321 - [EVAL] global_step:14000 loss:0.0683 perplexity:1.0707
2021-11-14 14:50:08,892 - EP:23 global_step:14100 loss:0.3018 perplexity:1.3523
2021-11-14 14:50:56,777 - EP:23 global_step:14200 loss:0.3145 perplexity:1.3696



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:24', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:51:42,440 - EP:24 global_step:14300 loss:0.2732 perplexity:1.3141
2021-11-14 14:52:27,706 - EP:24 global_step:14400 loss:0.2589 perplexity:1.2955
2021-11-14 14:53:14,233 - EP:24 global_step:14500 loss:0.2698 perplexity:1.3097
2021-11-14 14:54:01,042 - EP:24 global_step:14600 loss:0.2770 perplexity:1.3192
2021-11-14 14:54:47,988 - EP:24 global_step:14700 loss:0.2878 perplexity:1.3335
2021-11-14 14:55:31,660 - EP:24 global_step:14800 loss:0.2961 perplexity:1.3447



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:25', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 14:56:16,926 - EP:25 global_step:14900 loss:0.2547 perplexity:1.2901
2021-11-14 14:57:02,303 - EP:25 global_step:15000 loss:0.2527 perplexity:1.2875


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 14:58:00,201 - [EVAL] global_step:15000 loss:0.0660 perplexity:1.0683
2021-11-14 14:58:47,210 - EP:25 global_step:15100 loss:0.2599 perplexity:1.2968
2021-11-14 14:59:34,647 - EP:25 global_step:15200 loss:0.2620 perplexity:1.2995
2021-11-14 15:00:20,470 - EP:25 global_step:15300 loss:0.2682 perplexity:1.3076
2021-11-14 15:01:05,137 - EP:25 global_step:15400 loss:0.2742 perplexity:1.3154



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:26', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 15:01:51,826 - EP:26 global_step:15500 loss:0.2345 perplexity:1.2643
2021-11-14 15:02:37,644 - EP:26 global_step:15600 loss:0.2358 perplexity:1.2659
2021-11-14 15:03:24,496 - EP:26 global_step:15700 loss:0.2445 perplexity:1.2770
2021-11-14 15:04:11,558 - EP:26 global_step:15800 loss:0.2483 perplexity:1.2819
2021-11-14 15:04:55,962 - EP:26 global_step:15900 loss:0.2520 perplexity:1.2866
2021-11-14 15:05:43,080 - EP:26 global_step:16000 loss:0.2623 perplexity:1.3000


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 15:06:40,978 - [EVAL] global_step:16000 loss:0.0620 perplexity:1.0640



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:27', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 15:07:27,635 - EP:27 global_step:16100 loss:0.2171 perplexity:1.2425
2021-11-14 15:08:13,594 - EP:27 global_step:16200 loss:0.2242 perplexity:1.2513
2021-11-14 15:08:59,367 - EP:27 global_step:16300 loss:0.2300 perplexity:1.2586
2021-11-14 15:09:45,687 - EP:27 global_step:16400 loss:0.2354 perplexity:1.2654
2021-11-14 15:10:32,178 - EP:27 global_step:16500 loss:0.2462 perplexity:1.2792
2021-11-14 15:11:18,137 - EP:27 global_step:16600 loss:0.2425 perplexity:1.2744



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:28', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 15:12:04,387 - EP:28 global_step:16700 loss:0.2100 perplexity:1.2337
2021-11-14 15:12:49,547 - EP:28 global_step:16800 loss:0.2161 perplexity:1.2413
2021-11-14 15:13:36,127 - EP:28 global_step:16900 loss:0.2207 perplexity:1.2470
2021-11-14 15:14:20,794 - EP:28 global_step:17000 loss:0.2247 perplexity:1.2520


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 15:15:18,638 - [EVAL] global_step:17000 loss:0.0607 perplexity:1.0626
2021-11-14 15:16:06,436 - EP:28 global_step:17100 loss:0.2305 perplexity:1.2593



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:29', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 15:16:51,676 - EP:29 global_step:17200 loss:0.2297 perplexity:1.2582
2021-11-14 15:17:38,320 - EP:29 global_step:17300 loss:0.1972 perplexity:1.2180
2021-11-14 15:18:22,671 - EP:29 global_step:17400 loss:0.2086 perplexity:1.2319
2021-11-14 15:19:07,962 - EP:29 global_step:17500 loss:0.2113 perplexity:1.2352
2021-11-14 15:19:53,480 - EP:29 global_step:17600 loss:0.2143 perplexity:1.2389
2021-11-14 15:20:38,841 - EP:29 global_step:17700 loss:0.2178 perplexity:1.2433



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:30', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 15:21:26,926 - EP:30 global_step:17800 loss:0.2181 perplexity:1.2437
2021-11-14 15:22:13,124 - EP:30 global_step:17900 loss:0.1872 perplexity:1.2059
2021-11-14 15:22:57,572 - EP:30 global_step:18000 loss:0.1991 perplexity:1.2203


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 15:23:55,360 - [EVAL] global_step:18000 loss:0.0588 perplexity:1.0606
2021-11-14 15:24:41,105 - EP:30 global_step:18100 loss:0.2022 perplexity:1.2241
2021-11-14 15:25:26,763 - EP:30 global_step:18200 loss:0.2064 perplexity:1.2293
2021-11-14 15:26:14,079 - EP:30 global_step:18300 loss:0.2114 perplexity:1.2355



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:31', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 15:26:59,913 - EP:31 global_step:18400 loss:0.2066 perplexity:1.2295
2021-11-14 15:27:45,413 - EP:31 global_step:18500 loss:0.1846 perplexity:1.2027
2021-11-14 15:28:31,367 - EP:31 global_step:18600 loss:0.1919 perplexity:1.2116
2021-11-14 15:29:18,434 - EP:31 global_step:18700 loss:0.1928 perplexity:1.2126
2021-11-14 15:30:02,027 - EP:31 global_step:18800 loss:0.1980 perplexity:1.2189
2021-11-14 15:30:49,502 - EP:31 global_step:18900 loss:0.2002 perplexity:1.2216



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:32', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 15:31:35,303 - EP:32 global_step:19000 loss:0.1961 perplexity:1.2167


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 15:32:33,088 - [EVAL] global_step:19000 loss:0.0590 perplexity:1.0608
2021-11-14 15:33:19,839 - EP:32 global_step:19100 loss:0.1783 perplexity:1.1952
2021-11-14 15:34:05,802 - EP:32 global_step:19200 loss:0.1848 perplexity:1.2029
2021-11-14 15:34:51,922 - EP:32 global_step:19300 loss:0.1869 perplexity:1.2055
2021-11-14 15:35:38,473 - EP:32 global_step:19400 loss:0.1896 perplexity:1.2087
2021-11-14 15:36:23,750 - EP:32 global_step:19500 loss:0.1961 perplexity:1.2167



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:33', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 15:37:07,844 - EP:33 global_step:19600 loss:0.1895 perplexity:1.2087
2021-11-14 15:37:54,473 - EP:33 global_step:19700 loss:0.1709 perplexity:1.1864
2021-11-14 15:38:40,815 - EP:33 global_step:19800 loss:0.1781 perplexity:1.1949
2021-11-14 15:39:27,382 - EP:33 global_step:19900 loss:0.1814 perplexity:1.1989
2021-11-14 15:40:10,322 - EP:33 global_step:20000 loss:0.1893 perplexity:1.2085


HBox(children=(FloatProgress(value=0.0, description='[EVAL]', max=392.0, style=ProgressStyle(description_width…


2021-11-14 15:41:08,033 - [EVAL] global_step:20000 loss:0.0584 perplexity:1.0601
2021-11-14 15:41:54,798 - EP:33 global_step:20100 loss:0.1886 perplexity:1.2076



HBox(children=(FloatProgress(value=0.0, description='[TRAIN] EP:34', max=593.0, style=ProgressStyle(descriptio…

2021-11-14 15:42:39,719 - EP:34 global_step:20200 loss:0.1797 perplexity:1.1969



KeyboardInterrupt: 