# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'cleaned_v2_total_v1'

DATA_V = 'cleaned_v2_total_v1'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'snunlp/KR-ELECTRA-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/snunlp_kr_electra_discriminator_cleaned_v2_total_v1/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/cleaned_v2_total_v1/ce_train.csv exists.
./dataset/cleaned_v2_total_v1/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 15
batch_size = 32
gradient_accumulation_steps = 1

optim = 'adamw_torch' # 'adamw_hf'

learning_rate = 5e-5 # 3e-6
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'cosine'
warmup_ratio = 0

save_total_limit = 1

load_best_model_at_end = True
metric_for_best_model ='f1_macro'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 500

# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at snunlp/KR-ELECTRA-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = './dataset/cleaned_v2_total_v1/raw_train.csv'
dev_path = './dataset/cleaned_v2_total_v1/raw_dev.csv'
test_path = './dataset/cleaned_v2_total_v1/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

In [10]:
### new
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]


# more_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']

# emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
# emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))

# tokensToAdd = more_tokens + emojis
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

In [11]:
data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
print(len(data))
data = data.drop_duplicates()
print(len(data.drop_duplicates()))

7920
7894


In [12]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))

tokenizerTrainData = data.sentence_form.to_list()
newTokenizer = tokenizer.train_new_from_iterator(tokenizerTrainData, vocab_size=1)

new_tokens = set(list(newTokenizer.vocab.keys())) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(newTokenizer))
print(len(tokenizer))

model.resize_token_embeddings(len(tokenizer))

30000



2615
30013


Embedding(30013, 768)

In [13]:
new_tokens

{'##뜌', '##읒', '##죱', '##쨕', '##쫜', '뜌', '뿤', '쓩', '읒', '죱', '쨕', '쫜', '챳'}

In [14]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [15]:
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]

polarity_id_to_name = ['positive', 'negative', 'neutral']

tokenizer_tester = []
for pair in entity_property_pair:
    for polarity in polarity_id_to_name:
        tokenizer_tester.append('#'.join([pair, polarity]))

for e in tokenizer_tester:
    print(tokenizer.decode(tokenizer.encode(e)))

for e in tokenizer_tester:
    print(tokenizer.encode(e))

[CLS] 본품 # 가격 # positive [SEP]
[CLS] 본품 # 가격 # negative [SEP]
[CLS] 본품 # 가격 # neutral [SEP]
[CLS] 본품 # 다양성 # positive [SEP]
[CLS] 본품 # 다양성 # negative [SEP]
[CLS] 본품 # 다양성 # neutral [SEP]
[CLS] 본품 # 디자인 # positive [SEP]
[CLS] 본품 # 디자인 # negative [SEP]
[CLS] 본품 # 디자인 # neutral [SEP]
[CLS] 본품 # 인지도 # positive [SEP]
[CLS] 본품 # 인지도 # negative [SEP]
[CLS] 본품 # 인지도 # neutral [SEP]
[CLS] 본품 # 일반 # positive [SEP]
[CLS] 본품 # 일반 # negative [SEP]
[CLS] 본품 # 일반 # neutral [SEP]
[CLS] 본품 # 편의성 # positive [SEP]
[CLS] 본품 # 편의성 # negative [SEP]
[CLS] 본품 # 편의성 # neutral [SEP]
[CLS] 본품 # 품질 # positive [SEP]
[CLS] 본품 # 품질 # negative [SEP]
[CLS] 본품 # 품질 # neutral [SEP]
[CLS] 브랜드 # 가격 # positive [SEP]
[CLS] 브랜드 # 가격 # negative [SEP]
[CLS] 브랜드 # 가격 # neutral [SEP]
[CLS] 브랜드 # 디자인 # positive [SEP]
[CLS] 브랜드 # 디자인 # negative [SEP]
[CLS] 브랜드 # 디자인 # neutral [SEP]
[CLS] 브랜드 # 인지도 # positive [SEP]
[CLS] 브랜드 # 인지도 # negative [SEP]
[CLS] 브랜드 # 인지도 # neutral [SEP]
[CLS] 브랜드 # 일반 # positive [SEP]
[CLS] 브랜드 # 일반 # nega

# Define Metric

In [16]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [17]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [18]:
def preprocess_function(examples):
    return tokenizer(examples["sentence_form"], examples["entity_property"], truncation=True)

In [19]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset).shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset).shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/144825 [00:00<?, ?ex/s]

  0%|          | 424/144825 [00:00<00:34, 4232.90ex/s]

  1%|          | 859/144825 [00:00<00:33, 4298.68ex/s]

  1%|          | 1289/144825 [00:00<00:36, 3905.58ex/s]

  1%|          | 1721/144825 [00:00<00:35, 4059.07ex/s]

  1%|▏         | 2131/144825 [00:00<00:35, 3975.25ex/s]

  2%|▏         | 2569/144825 [00:00<00:34, 4105.47ex/s]

  2%|▏         | 2997/144825 [00:00<00:34, 4160.54ex/s]

  2%|▏         | 3415/144825 [00:00<00:35, 4037.32ex/s]

  3%|▎         | 3850/144825 [00:00<00:34, 4131.33ex/s]

  3%|▎         | 4265/144825 [00:01<00:35, 4006.54ex/s]

  3%|▎         | 4698/144825 [00:01<00:34, 4100.60ex/s]

  4%|▎         | 5110/144825 [00:01<00:34, 4009.19ex/s]

  4%|▍         | 5528/144825 [00:01<00:34, 4058.36ex/s]

  4%|▍         | 5966/144825 [00:01<00:33, 4151.55ex/s]

  4%|▍         | 6383/144825 [00:01<00:40, 3406.15ex/s]

  5%|▍         | 6807/144825 [00:01<00:38, 3619.69ex/s]

  5%|▍         | 7189/144825 [00:01<00:37, 3644.82ex/s]

  5%|▌         | 7629/144825 [00:01<00:35, 3852.59ex/s]

  6%|▌         | 8026/144825 [00:02<00:35, 3868.22ex/s]

  6%|▌         | 8466/144825 [00:02<00:33, 4019.07ex/s]

  6%|▌         | 8899/144825 [00:02<00:33, 4109.13ex/s]

  6%|▋         | 9315/144825 [00:02<00:33, 4022.39ex/s]

  7%|▋         | 9754/144825 [00:02<00:32, 4128.84ex/s]

  7%|▋         | 10170/144825 [00:02<00:33, 4046.39ex/s]

  7%|▋         | 10610/144825 [00:02<00:32, 4147.99ex/s]

  8%|▊         | 11027/144825 [00:02<00:32, 4060.85ex/s]

  8%|▊         | 11473/144825 [00:02<00:31, 4176.23ex/s]

  8%|▊         | 11906/144825 [00:02<00:31, 4220.20ex/s]

  9%|▊         | 12330/144825 [00:03<00:32, 4112.01ex/s]

  9%|▉         | 12768/144825 [00:03<00:31, 4187.83ex/s]

  9%|▉         | 13188/144825 [00:03<00:32, 4086.11ex/s]

  9%|▉         | 13628/144825 [00:03<00:31, 4176.84ex/s]

 10%|▉         | 14047/144825 [00:03<00:32, 4048.45ex/s]

 10%|▉         | 14481/144825 [00:03<00:31, 4130.95ex/s]

 10%|█         | 14914/144825 [00:03<00:31, 4188.91ex/s]

 11%|█         | 15334/144825 [00:03<00:31, 4090.44ex/s]

 11%|█         | 15769/144825 [00:03<00:30, 4163.22ex/s]

 11%|█         | 16187/144825 [00:04<00:31, 4060.43ex/s]

 11%|█▏        | 16621/144825 [00:04<00:30, 4138.38ex/s]

 12%|█▏        | 17036/144825 [00:04<00:31, 4033.92ex/s]

 12%|█▏        | 17486/144825 [00:04<00:30, 4166.38ex/s]

 12%|█▏        | 17923/144825 [00:04<00:30, 4223.01ex/s]

 13%|█▎        | 18347/144825 [00:04<00:30, 4099.03ex/s]

 13%|█▎        | 18787/144825 [00:04<00:30, 4185.37ex/s]

 13%|█▎        | 19207/144825 [00:04<00:31, 4038.28ex/s]

 14%|█▎        | 19645/144825 [00:04<00:30, 4134.33ex/s]

 14%|█▍        | 20061/144825 [00:04<00:30, 4038.96ex/s]

 14%|█▍        | 20490/144825 [00:05<00:30, 4109.92ex/s]

 14%|█▍        | 20926/144825 [00:05<00:29, 4181.46ex/s]

 15%|█▍        | 21346/144825 [00:05<00:30, 4074.82ex/s]

 15%|█▌        | 21784/144825 [00:05<00:29, 4162.73ex/s]

 15%|█▌        | 22202/144825 [00:05<00:30, 4056.90ex/s]

 16%|█▌        | 22641/144825 [00:05<00:29, 4151.54ex/s]

 16%|█▌        | 23058/144825 [00:05<00:30, 4056.49ex/s]

 16%|█▌        | 23496/144825 [00:05<00:29, 4149.76ex/s]

 17%|█▋        | 23923/144825 [00:05<00:28, 4183.85ex/s]

 17%|█▋        | 24343/144825 [00:05<00:29, 4077.20ex/s]

 17%|█▋        | 24780/144825 [00:06<00:28, 4160.06ex/s]

 17%|█▋        | 25198/144825 [00:06<00:29, 4064.24ex/s]

 18%|█▊        | 25636/144825 [00:06<00:28, 4154.82ex/s]

 18%|█▊        | 26053/144825 [00:06<00:29, 4071.03ex/s]

 18%|█▊        | 26486/144825 [00:06<00:28, 4144.41ex/s]

 19%|█▊        | 26926/144825 [00:06<00:27, 4218.71ex/s]

 19%|█▉        | 27349/144825 [00:06<00:28, 4123.06ex/s]

 19%|█▉        | 27792/144825 [00:06<00:27, 4210.74ex/s]

 19%|█▉        | 28214/144825 [00:06<00:28, 4028.45ex/s]

 20%|█▉        | 28648/144825 [00:07<00:28, 4116.25ex/s]

 20%|██        | 29062/144825 [00:07<00:28, 4047.88ex/s]

 20%|██        | 29507/144825 [00:07<00:27, 4162.62ex/s]

 21%|██        | 29957/144825 [00:07<00:26, 4259.34ex/s]

 21%|██        | 30385/144825 [00:07<00:27, 4113.22ex/s]

 21%|██▏       | 30806/144825 [00:07<00:27, 4138.82ex/s]

 22%|██▏       | 31222/144825 [00:07<00:28, 4038.32ex/s]

 22%|██▏       | 31664/144825 [00:07<00:27, 4146.84ex/s]

 22%|██▏       | 32080/144825 [00:07<00:27, 4062.39ex/s]

 22%|██▏       | 32513/144825 [00:07<00:27, 4139.36ex/s]

 23%|██▎       | 32952/144825 [00:08<00:26, 4212.14ex/s]

 23%|██▎       | 33375/144825 [00:08<00:27, 4101.84ex/s]

 23%|██▎       | 33819/144825 [00:08<00:26, 4198.29ex/s]

 24%|██▎       | 34240/144825 [00:08<00:26, 4102.43ex/s]

 24%|██▍       | 34677/144825 [00:08<00:26, 4178.68ex/s]

 24%|██▍       | 35096/144825 [00:08<00:26, 4074.78ex/s]

 25%|██▍       | 35527/144825 [00:08<00:26, 4142.23ex/s]

 25%|██▍       | 35943/144825 [00:08<00:36, 2986.79ex/s]

 25%|██▌       | 36323/144825 [00:09<00:34, 3174.48ex/s]

 25%|██▌       | 36759/144825 [00:09<00:31, 3468.24ex/s]

 26%|██▌       | 37149/144825 [00:09<00:30, 3579.49ex/s]

 26%|██▌       | 37588/144825 [00:09<00:28, 3799.15ex/s]

 26%|██▌       | 38000/144825 [00:09<00:27, 3823.37ex/s]

 27%|██▋       | 38439/144825 [00:09<00:26, 3982.72ex/s]

 27%|██▋       | 38875/144825 [00:09<00:25, 4090.46ex/s]

 27%|██▋       | 39292/144825 [00:09<00:26, 4030.97ex/s]

 27%|██▋       | 39732/144825 [00:09<00:25, 4137.30ex/s]

 28%|██▊       | 40150/144825 [00:09<00:25, 4070.09ex/s]

 28%|██▊       | 40582/144825 [00:10<00:25, 4140.42ex/s]

 28%|██▊       | 41000/144825 [00:10<00:25, 4061.95ex/s]

 29%|██▊       | 41432/144825 [00:10<00:25, 4134.26ex/s]

 29%|██▉       | 41870/144825 [00:10<00:24, 4204.61ex/s]

 29%|██▉       | 42292/144825 [00:10<00:25, 4099.04ex/s]

 30%|██▉       | 42733/144825 [00:10<00:24, 4189.15ex/s]

 30%|██▉       | 43154/144825 [00:10<00:24, 4096.68ex/s]

 30%|███       | 43596/144825 [00:10<00:24, 4188.58ex/s]

 30%|███       | 44016/144825 [00:10<00:24, 4098.25ex/s]

 31%|███       | 44454/144825 [00:10<00:24, 4177.19ex/s]

 31%|███       | 44901/144825 [00:11<00:23, 4262.50ex/s]

 31%|███▏      | 45329/144825 [00:11<00:23, 4167.61ex/s]

 32%|███▏      | 45768/144825 [00:11<00:23, 4231.69ex/s]

 32%|███▏      | 46193/144825 [00:11<00:23, 4121.22ex/s]

 32%|███▏      | 46636/144825 [00:11<00:23, 4208.72ex/s]

 32%|███▏      | 47058/144825 [00:11<00:23, 4133.39ex/s]

 33%|███▎      | 47502/144825 [00:11<00:23, 4220.80ex/s]

 33%|███▎      | 47942/144825 [00:11<00:22, 4273.26ex/s]

 33%|███▎      | 48371/144825 [00:11<00:23, 4145.30ex/s]

 34%|███▎      | 48811/144825 [00:12<00:22, 4218.15ex/s]

 34%|███▍      | 49234/144825 [00:12<00:23, 4125.47ex/s]

 34%|███▍      | 49671/144825 [00:12<00:22, 4194.26ex/s]

 35%|███▍      | 50092/144825 [00:12<00:23, 4082.40ex/s]

 35%|███▍      | 50530/144825 [00:12<00:22, 4164.71ex/s]

 35%|███▌      | 50975/144825 [00:12<00:22, 4247.24ex/s]

 35%|███▌      | 51401/144825 [00:12<00:22, 4152.17ex/s]

 36%|███▌      | 51841/144825 [00:12<00:22, 4223.08ex/s]

 36%|███▌      | 52265/144825 [00:12<00:22, 4103.76ex/s]

 36%|███▋      | 52700/144825 [00:12<00:22, 4173.80ex/s]

 37%|███▋      | 53119/144825 [00:13<00:22, 4080.50ex/s]

 37%|███▋      | 53566/144825 [00:13<00:21, 4190.87ex/s]

 37%|███▋      | 54000/144825 [00:13<00:22, 4115.40ex/s]

 38%|███▊      | 54434/144825 [00:13<00:21, 4177.21ex/s]

 38%|███▊      | 54871/144825 [00:13<00:21, 4230.47ex/s]

 38%|███▊      | 55295/144825 [00:13<00:22, 3963.09ex/s]

 38%|███▊      | 55728/144825 [00:13<00:21, 4065.03ex/s]

 39%|███▉      | 56138/144825 [00:13<00:22, 3985.11ex/s]

 39%|███▉      | 56571/144825 [00:13<00:21, 4081.86ex/s]

 39%|███▉      | 57000/144825 [00:14<00:21, 4017.45ex/s]

 40%|███▉      | 57436/144825 [00:14<00:21, 4113.89ex/s]

 40%|███▉      | 57880/144825 [00:14<00:20, 4206.88ex/s]

 40%|████      | 58303/144825 [00:14<00:21, 4101.48ex/s]

 41%|████      | 58737/144825 [00:14<00:20, 4168.99ex/s]

 41%|████      | 59156/144825 [00:14<00:20, 4091.93ex/s]

 41%|████      | 59589/144825 [00:14<00:20, 4160.69ex/s]

 41%|████▏     | 60007/144825 [00:14<00:20, 4095.13ex/s]

 42%|████▏     | 60446/144825 [00:14<00:20, 4177.88ex/s]

 42%|████▏     | 60883/144825 [00:14<00:19, 4232.41ex/s]

 42%|████▏     | 61307/144825 [00:15<00:20, 4095.63ex/s]

 43%|████▎     | 61746/144825 [00:15<00:19, 4180.47ex/s]

 43%|████▎     | 62166/144825 [00:15<00:20, 4097.22ex/s]

 43%|████▎     | 62599/144825 [00:15<00:19, 4162.71ex/s]

 44%|████▎     | 63017/144825 [00:15<00:20, 4069.86ex/s]

 44%|████▍     | 63452/144825 [00:15<00:19, 4150.41ex/s]

 44%|████▍     | 63894/144825 [00:15<00:19, 4227.01ex/s]

 44%|████▍     | 64318/144825 [00:15<00:19, 4130.57ex/s]

 45%|████▍     | 64754/144825 [00:15<00:19, 4195.75ex/s]

 45%|████▌     | 65175/144825 [00:15<00:19, 4097.88ex/s]

 45%|████▌     | 65607/144825 [00:16<00:19, 4160.25ex/s]

 46%|████▌     | 66024/144825 [00:16<00:19, 4062.24ex/s]

 46%|████▌     | 66462/144825 [00:16<00:18, 4153.40ex/s]

 46%|████▌     | 66884/144825 [00:16<00:18, 4170.59ex/s]

 46%|████▋     | 67302/144825 [00:16<00:19, 4060.84ex/s]

 47%|████▋     | 67739/144825 [00:16<00:18, 4149.04ex/s]

 47%|████▋     | 68155/144825 [00:16<00:18, 4048.58ex/s]

 47%|████▋     | 68591/144825 [00:16<00:18, 4137.38ex/s]

 48%|████▊     | 69006/144825 [00:16<00:18, 4080.81ex/s]

 48%|████▊     | 69439/144825 [00:17<00:18, 4152.17ex/s]

 48%|████▊     | 69886/144825 [00:17<00:17, 4244.64ex/s]

 49%|████▊     | 70312/144825 [00:17<00:18, 4112.05ex/s]

 49%|████▉     | 70750/144825 [00:17<00:17, 4187.04ex/s]

 49%|████▉     | 71170/144825 [00:17<00:18, 4084.38ex/s]

 49%|████▉     | 71607/144825 [00:17<00:17, 4165.74ex/s]

 50%|████▉     | 72025/144825 [00:17<00:17, 4048.92ex/s]

 50%|█████     | 72463/144825 [00:17<00:17, 4144.30ex/s]

 50%|█████     | 72899/144825 [00:17<00:17, 4205.99ex/s]

 51%|█████     | 73321/144825 [00:17<00:17, 4122.62ex/s]

 51%|█████     | 73762/144825 [00:18<00:16, 4204.54ex/s]

 51%|█████     | 74184/144825 [00:18<00:17, 4123.97ex/s]

 52%|█████▏    | 74630/144825 [00:18<00:16, 4221.48ex/s]

 52%|█████▏    | 75054/144825 [00:18<00:16, 4119.49ex/s]

 52%|█████▏    | 75490/144825 [00:18<00:16, 4187.00ex/s]

 52%|█████▏    | 75938/144825 [00:18<00:16, 4270.96ex/s]

 53%|█████▎    | 76366/144825 [00:18<00:16, 4190.76ex/s]

 53%|█████▎    | 76797/144825 [00:18<00:16, 4224.82ex/s]

 53%|█████▎    | 77221/144825 [00:18<00:16, 4106.97ex/s]

 54%|█████▎    | 77662/144825 [00:18<00:16, 4194.30ex/s]

 54%|█████▍    | 78083/144825 [00:19<00:16, 4079.91ex/s]

 54%|█████▍    | 78517/144825 [00:19<00:15, 4153.63ex/s]

 55%|█████▍    | 78946/144825 [00:19<00:15, 4193.32ex/s]

 55%|█████▍    | 79367/144825 [00:19<00:16, 4080.70ex/s]

 55%|█████▌    | 79803/144825 [00:19<00:15, 4160.83ex/s]

 55%|█████▌    | 80221/144825 [00:19<00:15, 4095.69ex/s]

 56%|█████▌    | 80658/144825 [00:19<00:15, 4172.37ex/s]

 56%|█████▌    | 81077/144825 [00:19<00:15, 4008.23ex/s]

 56%|█████▋    | 81512/144825 [00:19<00:15, 4105.84ex/s]

 57%|█████▋    | 81955/144825 [00:20<00:14, 4199.53ex/s]

 57%|█████▋    | 82377/144825 [00:20<00:15, 4143.30ex/s]

 57%|█████▋    | 82824/144825 [00:20<00:14, 4237.91ex/s]

 57%|█████▋    | 83249/144825 [00:20<00:14, 4116.83ex/s]

 58%|█████▊    | 83681/144825 [00:20<00:14, 4163.94ex/s]

 58%|█████▊    | 84099/144825 [00:20<00:14, 4076.59ex/s]

 58%|█████▊    | 84532/144825 [00:20<00:14, 4149.23ex/s]

 59%|█████▊    | 84975/144825 [00:20<00:14, 4231.22ex/s]

 59%|█████▉    | 85399/144825 [00:20<00:14, 4139.36ex/s]

 59%|█████▉    | 85839/144825 [00:20<00:13, 4214.02ex/s]

 60%|█████▉    | 86262/144825 [00:21<00:14, 4123.99ex/s]

 60%|█████▉    | 86708/144825 [00:21<00:13, 4219.90ex/s]

 60%|██████    | 87131/144825 [00:21<00:13, 4162.14ex/s]

 60%|██████    | 87579/144825 [00:21<00:13, 4254.21ex/s]

 61%|██████    | 88006/144825 [00:21<00:13, 4161.89ex/s]

 61%|██████    | 88440/144825 [00:21<00:13, 4212.35ex/s]

 61%|██████▏   | 88882/144825 [00:21<00:13, 4272.07ex/s]

 62%|██████▏   | 89310/144825 [00:21<00:13, 4177.91ex/s]

 62%|██████▏   | 89741/144825 [00:21<00:13, 4216.05ex/s]

 62%|██████▏   | 90164/144825 [00:21<00:13, 4104.69ex/s]

 63%|██████▎   | 90609/144825 [00:22<00:12, 4204.11ex/s]

 63%|██████▎   | 91031/144825 [00:22<00:13, 4119.32ex/s]

 63%|██████▎   | 91478/144825 [00:22<00:12, 4219.43ex/s]

 63%|██████▎   | 91918/144825 [00:22<00:12, 4270.46ex/s]

 64%|██████▍   | 92346/144825 [00:22<00:12, 4131.32ex/s]

 64%|██████▍   | 92780/144825 [00:22<00:12, 4189.89ex/s]

 64%|██████▍   | 93201/144825 [00:22<00:12, 4076.53ex/s]

 65%|██████▍   | 93638/144825 [00:22<00:12, 4161.04ex/s]

 65%|██████▍   | 94056/144825 [00:22<00:12, 4033.46ex/s]

 65%|██████▌   | 94492/144825 [00:23<00:12, 4124.52ex/s]

 66%|██████▌   | 94939/144825 [00:23<00:11, 4222.70ex/s]

 66%|██████▌   | 95363/144825 [00:23<00:11, 4145.95ex/s]

 66%|██████▌   | 95811/144825 [00:23<00:11, 4242.80ex/s]

 66%|██████▋   | 96237/144825 [00:23<00:11, 4151.11ex/s]

 67%|██████▋   | 96677/144825 [00:23<00:11, 4222.11ex/s]

 67%|██████▋   | 97101/144825 [00:23<00:11, 4160.00ex/s]

 67%|██████▋   | 97547/144825 [00:23<00:11, 4245.96ex/s]

 68%|██████▊   | 97997/144825 [00:23<00:10, 4318.28ex/s]

 68%|██████▊   | 98430/144825 [00:23<00:10, 4223.03ex/s]

 68%|██████▊   | 98882/144825 [00:24<00:10, 4306.93ex/s]

 69%|██████▊   | 99314/144825 [00:24<00:10, 4192.66ex/s]

 69%|██████▉   | 99749/144825 [00:24<00:10, 4237.20ex/s]

 69%|██████▉   | 100174/144825 [00:24<00:10, 4107.92ex/s]

 69%|██████▉   | 100617/144825 [00:24<00:10, 4198.94ex/s]

 70%|██████▉   | 101039/144825 [00:24<00:10, 4124.41ex/s]

 70%|███████   | 101487/144825 [00:24<00:10, 4226.41ex/s]

 70%|███████   | 101934/144825 [00:24<00:09, 4296.29ex/s]

 71%|███████   | 102365/144825 [00:24<00:10, 4203.28ex/s]

 71%|███████   | 102816/144825 [00:25<00:09, 4290.33ex/s]

 71%|███████▏  | 103246/144825 [00:25<00:13, 3080.48ex/s]

 72%|███████▏  | 103689/144825 [00:25<00:12, 3391.65ex/s]

 72%|███████▏  | 104072/144825 [00:25<00:11, 3499.40ex/s]

 72%|███████▏  | 104516/144825 [00:25<00:10, 3745.16ex/s]

 72%|███████▏  | 104953/144825 [00:25<00:10, 3915.14ex/s]

 73%|███████▎  | 105365/144825 [00:25<00:10, 3895.83ex/s]

 73%|███████▎  | 105801/144825 [00:25<00:09, 4024.83ex/s]

 73%|███████▎  | 106215/144825 [00:25<00:09, 3977.98ex/s]

 74%|███████▎  | 106653/144825 [00:26<00:09, 4091.39ex/s]

 74%|███████▍  | 107068/144825 [00:26<00:09, 4013.05ex/s]

 74%|███████▍  | 107504/144825 [00:26<00:09, 4111.53ex/s]

 75%|███████▍  | 107931/144825 [00:26<00:08, 4156.13ex/s]

 75%|███████▍  | 108350/144825 [00:26<00:08, 4058.91ex/s]

 75%|███████▌  | 108789/144825 [00:26<00:08, 4152.11ex/s]

 75%|███████▌  | 109206/144825 [00:26<00:09, 3676.78ex/s]

 76%|███████▌  | 109640/144825 [00:26<00:09, 3853.59ex/s]

 76%|███████▌  | 110035/144825 [00:26<00:09, 3819.12ex/s]

 76%|███████▋  | 110477/144825 [00:27<00:08, 3985.10ex/s]

 77%|███████▋  | 110913/144825 [00:27<00:08, 4091.22ex/s]

 77%|███████▋  | 111327/144825 [00:27<00:08, 4048.90ex/s]

 77%|███████▋  | 111761/144825 [00:27<00:08, 4131.00ex/s]

 77%|███████▋  | 112177/144825 [00:27<00:08, 4034.65ex/s]

 78%|███████▊  | 112613/144825 [00:27<00:07, 4128.68ex/s]

 78%|███████▊  | 113028/144825 [00:27<00:07, 4045.88ex/s]

 78%|███████▊  | 113459/144825 [00:27<00:07, 4121.18ex/s]

 79%|███████▊  | 113891/144825 [00:27<00:07, 4178.82ex/s]

 79%|███████▉  | 114310/144825 [00:27<00:07, 4083.77ex/s]

 79%|███████▉  | 114744/144825 [00:28<00:07, 4157.97ex/s]

 80%|███████▉  | 115161/144825 [00:28<00:07, 4049.98ex/s]

 80%|███████▉  | 115597/144825 [00:28<00:07, 4136.80ex/s]

 80%|████████  | 116012/144825 [00:28<00:07, 4045.51ex/s]

 80%|████████  | 116448/144825 [00:28<00:06, 4135.98ex/s]

 81%|████████  | 116892/144825 [00:28<00:06, 4222.37ex/s]

 81%|████████  | 117316/144825 [00:28<00:06, 4099.59ex/s]

 81%|████████▏ | 117754/144825 [00:28<00:06, 4180.19ex/s]

 82%|████████▏ | 118174/144825 [00:28<00:06, 4068.64ex/s]

 82%|████████▏ | 118619/144825 [00:28<00:06, 4178.36ex/s]

 82%|████████▏ | 119039/144825 [00:29<00:06, 4086.62ex/s]

 83%|████████▎ | 119483/144825 [00:29<00:06, 4185.99ex/s]

 83%|████████▎ | 119914/144825 [00:29<00:05, 4221.07ex/s]

 83%|████████▎ | 120338/144825 [00:29<00:05, 4115.86ex/s]

 83%|████████▎ | 120777/144825 [00:29<00:05, 4193.99ex/s]

 84%|████████▎ | 121198/144825 [00:29<00:05, 4089.48ex/s]

 84%|████████▍ | 121632/144825 [00:29<00:05, 4160.25ex/s]

 84%|████████▍ | 122050/144825 [00:29<00:05, 4044.63ex/s]

 85%|████████▍ | 122485/144825 [00:29<00:05, 4129.64ex/s]

 85%|████████▍ | 122921/144825 [00:30<00:05, 4196.45ex/s]

 85%|████████▌ | 123342/144825 [00:30<00:05, 4102.00ex/s]

 85%|████████▌ | 123787/144825 [00:30<00:05, 4202.09ex/s]

 86%|████████▌ | 124209/144825 [00:30<00:05, 4076.23ex/s]

 86%|████████▌ | 124647/144825 [00:30<00:04, 4163.89ex/s]

 86%|████████▋ | 125065/144825 [00:30<00:04, 4074.33ex/s]

 87%|████████▋ | 125514/144825 [00:30<00:04, 4192.34ex/s]

 87%|████████▋ | 125962/144825 [00:30<00:04, 4273.65ex/s]

 87%|████████▋ | 126391/144825 [00:30<00:04, 4150.53ex/s]

 88%|████████▊ | 126822/144825 [00:30<00:04, 4195.40ex/s]

 88%|████████▊ | 127243/144825 [00:31<00:04, 4106.19ex/s]

 88%|████████▊ | 127681/144825 [00:31<00:04, 4184.94ex/s]

 88%|████████▊ | 128101/144825 [00:31<00:04, 4090.63ex/s]

 89%|████████▉ | 128533/144825 [00:31<00:03, 4157.11ex/s]

 89%|████████▉ | 128973/144825 [00:31<00:03, 4225.01ex/s]

 89%|████████▉ | 129397/144825 [00:31<00:03, 4106.25ex/s]

 90%|████████▉ | 129841/144825 [00:31<00:03, 4201.95ex/s]

 90%|████████▉ | 130263/144825 [00:31<00:03, 4113.06ex/s]

 90%|█████████ | 130681/144825 [00:31<00:03, 4130.61ex/s]

 91%|█████████ | 131095/144825 [00:32<00:03, 4031.29ex/s]

 91%|█████████ | 131530/144825 [00:32<00:03, 4120.96ex/s]

 91%|█████████ | 131976/144825 [00:32<00:03, 4218.10ex/s]

 91%|█████████▏| 132399/144825 [00:32<00:03, 4114.08ex/s]

 92%|█████████▏| 132812/144825 [00:32<00:02, 4113.55ex/s]

 92%|█████████▏| 133225/144825 [00:32<00:02, 3991.15ex/s]

 92%|█████████▏| 133662/144825 [00:32<00:02, 4099.47ex/s]

 93%|█████████▎| 134074/144825 [00:32<00:02, 4006.78ex/s]

 93%|█████████▎| 134511/144825 [00:32<00:02, 4108.18ex/s]

 93%|█████████▎| 134951/144825 [00:32<00:02, 4191.16ex/s]

 93%|█████████▎| 135372/144825 [00:33<00:02, 4103.70ex/s]

 94%|█████████▍| 135809/144825 [00:33<00:02, 4180.20ex/s]

 94%|█████████▍| 136228/144825 [00:33<00:02, 4093.45ex/s]

 94%|█████████▍| 136661/144825 [00:33<00:01, 4160.19ex/s]

 95%|█████████▍| 137078/144825 [00:33<00:01, 4062.90ex/s]

 95%|█████████▍| 137525/144825 [00:33<00:01, 4180.32ex/s]

 95%|█████████▌| 137974/144825 [00:33<00:01, 4269.90ex/s]

 96%|█████████▌| 138402/144825 [00:33<00:01, 4188.56ex/s]

 96%|█████████▌| 138843/144825 [00:33<00:01, 4252.43ex/s]

 96%|█████████▌| 139270/144825 [00:33<00:01, 4117.19ex/s]

 96%|█████████▋| 139698/144825 [00:34<00:01, 4161.62ex/s]

 97%|█████████▋| 140116/144825 [00:34<00:01, 4073.07ex/s]

 97%|█████████▋| 140559/144825 [00:34<00:01, 4176.05ex/s]

 97%|█████████▋| 140996/144825 [00:34<00:00, 4230.31ex/s]

 98%|█████████▊| 141420/144825 [00:34<00:00, 4149.39ex/s]

 98%|█████████▊| 141869/144825 [00:34<00:00, 4248.39ex/s]

 98%|█████████▊| 142295/144825 [00:34<00:00, 4174.31ex/s]

 99%|█████████▊| 142744/144825 [00:34<00:00, 4264.97ex/s]

 99%|█████████▉| 143172/144825 [00:34<00:00, 4163.01ex/s]

 99%|█████████▉| 143598/144825 [00:35<00:00, 4190.52ex/s]

 99%|█████████▉| 144018/144825 [00:35<00:00, 4093.77ex/s]

100%|█████████▉| 144448/144825 [00:35<00:00, 4152.72ex/s]

100%|██████████| 144825/144825 [00:35<00:00, 4102.35ex/s]




  0%|          | 0/69825 [00:00<?, ?ex/s]

  1%|          | 430/69825 [00:00<00:16, 4292.38ex/s]

  1%|          | 860/69825 [00:00<00:16, 4279.85ex/s]

  2%|▏         | 1288/69825 [00:00<00:17, 3978.08ex/s]

  2%|▏         | 1715/69825 [00:00<00:16, 4086.39ex/s]

  3%|▎         | 2126/69825 [00:00<00:16, 4007.33ex/s]

  4%|▎         | 2550/69825 [00:00<00:16, 4083.42ex/s]

  4%|▍         | 2978/69825 [00:00<00:16, 4143.95ex/s]

  5%|▍         | 3394/69825 [00:00<00:16, 4001.85ex/s]

  5%|▌         | 3824/69825 [00:00<00:16, 4091.27ex/s]

  6%|▌         | 4235/69825 [00:01<00:16, 3997.71ex/s]

  7%|▋         | 4663/69825 [00:01<00:15, 4078.68ex/s]

  7%|▋         | 5072/69825 [00:01<00:16, 3975.14ex/s]

  8%|▊         | 5500/69825 [00:01<00:15, 4063.65ex/s]

  8%|▊         | 5928/69825 [00:01<00:15, 4126.68ex/s]

  9%|▉         | 6342/69825 [00:01<00:15, 4011.75ex/s]

 10%|▉         | 6777/69825 [00:01<00:15, 4109.88ex/s]

 10%|█         | 7190/69825 [00:01<00:15, 3982.31ex/s]

 11%|█         | 7609/69825 [00:01<00:15, 4040.18ex/s]

 11%|█▏        | 8015/69825 [00:01<00:15, 3969.74ex/s]

 12%|█▏        | 8441/69825 [00:02<00:15, 4053.37ex/s]

 13%|█▎        | 8871/69825 [00:02<00:14, 4123.89ex/s]

 13%|█▎        | 9285/69825 [00:02<00:15, 4008.64ex/s]

 14%|█▍        | 9702/69825 [00:02<00:14, 4053.47ex/s]

 14%|█▍        | 10109/69825 [00:02<00:15, 3964.88ex/s]

 15%|█▌        | 10528/69825 [00:02<00:14, 4027.49ex/s]

 16%|█▌        | 10957/69825 [00:02<00:14, 4103.87ex/s]

 16%|█▋        | 11369/69825 [00:02<00:14, 3993.76ex/s]

 17%|█▋        | 11801/69825 [00:02<00:14, 4088.24ex/s]

 17%|█▋        | 12211/69825 [00:03<00:14, 4002.25ex/s]

 18%|█▊        | 12641/69825 [00:03<00:13, 4086.74ex/s]

 19%|█▊        | 13051/69825 [00:03<00:14, 3990.25ex/s]

 19%|█▉        | 13476/69825 [00:03<00:13, 4063.46ex/s]

 20%|█▉        | 13903/69825 [00:03<00:13, 4122.36ex/s]

 21%|██        | 14317/69825 [00:03<00:13, 3974.58ex/s]

 21%|██        | 14736/69825 [00:03<00:13, 4034.60ex/s]

 22%|██▏       | 15141/69825 [00:03<00:13, 3964.85ex/s]

 22%|██▏       | 15562/69825 [00:03<00:13, 4033.73ex/s]

 23%|██▎       | 15990/69825 [00:03<00:13, 4105.76ex/s]

 23%|██▎       | 16402/69825 [00:04<00:13, 3995.03ex/s]

 24%|██▍       | 16831/69825 [00:04<00:12, 4080.41ex/s]

 25%|██▍       | 17241/69825 [00:04<00:13, 3974.52ex/s]

 25%|██▌       | 17664/69825 [00:04<00:12, 4045.81ex/s]

 26%|██▌       | 18070/69825 [00:04<00:13, 3948.93ex/s]

 26%|██▋       | 18498/69825 [00:04<00:12, 4043.89ex/s]

 27%|██▋       | 18917/69825 [00:04<00:12, 4084.83ex/s]

 28%|██▊       | 19327/69825 [00:04<00:12, 3980.64ex/s]

 28%|██▊       | 19755/69825 [00:04<00:12, 4065.50ex/s]

 29%|██▉       | 20163/69825 [00:04<00:12, 3975.81ex/s]

 29%|██▉       | 20590/69825 [00:05<00:12, 4059.45ex/s]

 30%|███       | 21000/69825 [00:05<00:12, 3965.65ex/s]

 31%|███       | 21425/69825 [00:05<00:11, 4046.76ex/s]

 31%|███▏      | 21856/69825 [00:05<00:11, 4123.61ex/s]

 32%|███▏      | 22270/69825 [00:05<00:11, 4031.28ex/s]

 32%|███▏      | 22687/69825 [00:05<00:11, 4069.50ex/s]

 33%|███▎      | 23095/69825 [00:05<00:11, 3981.12ex/s]

 34%|███▎      | 23520/69825 [00:05<00:11, 4056.94ex/s]

 34%|███▍      | 23950/69825 [00:05<00:11, 4126.21ex/s]

 35%|███▍      | 24364/69825 [00:06<00:11, 3992.92ex/s]

 36%|███▌      | 24795/69825 [00:06<00:11, 4082.78ex/s]

 36%|███▌      | 25205/69825 [00:06<00:14, 3022.49ex/s]

 37%|███▋      | 25628/69825 [00:06<00:13, 3307.29ex/s]

 37%|███▋      | 26000/69825 [00:06<00:12, 3392.88ex/s]

 38%|███▊      | 26413/69825 [00:06<00:12, 3585.10ex/s]

 38%|███▊      | 26837/69825 [00:06<00:11, 3761.22ex/s]

 39%|███▉      | 27230/69825 [00:06<00:11, 3721.84ex/s]

 40%|███▉      | 27656/69825 [00:06<00:10, 3872.04ex/s]

 40%|████      | 28053/69825 [00:07<00:11, 3741.60ex/s]

 41%|████      | 28477/69825 [00:07<00:10, 3882.07ex/s]

 41%|████▏     | 28907/69825 [00:07<00:10, 4001.17ex/s]

 42%|████▏     | 29312/69825 [00:07<00:10, 3938.56ex/s]

 43%|████▎     | 29737/69825 [00:07<00:09, 4027.89ex/s]

 43%|████▎     | 30143/69825 [00:07<00:10, 3897.11ex/s]

 44%|████▍     | 30572/69825 [00:07<00:09, 4008.34ex/s]

 44%|████▍     | 31000/69825 [00:07<00:09, 3961.64ex/s]

 45%|████▌     | 31427/69825 [00:07<00:09, 4048.05ex/s]

 46%|████▌     | 31853/69825 [00:08<00:09, 4108.35ex/s]

 46%|████▌     | 32266/69825 [00:08<00:09, 3993.59ex/s]

 47%|████▋     | 32696/69825 [00:08<00:09, 4081.89ex/s]

 47%|████▋     | 33106/69825 [00:08<00:09, 4007.13ex/s]

 48%|████▊     | 33534/69825 [00:08<00:08, 4084.67ex/s]

 49%|████▊     | 33959/69825 [00:08<00:08, 4132.22ex/s]

 49%|████▉     | 34374/69825 [00:08<00:08, 4015.91ex/s]

 50%|████▉     | 34810/69825 [00:08<00:08, 4113.13ex/s]

 50%|█████     | 35223/69825 [00:08<00:08, 4026.73ex/s]

 51%|█████     | 35652/69825 [00:08<00:08, 4102.56ex/s]

 52%|█████▏    | 36064/69825 [00:09<00:08, 3978.60ex/s]

 52%|█████▏    | 36491/69825 [00:09<00:08, 4062.50ex/s]

 53%|█████▎    | 36925/69825 [00:09<00:07, 4141.02ex/s]

 53%|█████▎    | 37341/69825 [00:09<00:08, 4038.20ex/s]

 54%|█████▍    | 37762/69825 [00:09<00:07, 4086.01ex/s]

 55%|█████▍    | 38172/69825 [00:09<00:07, 3969.65ex/s]

 55%|█████▌    | 38601/69825 [00:09<00:07, 4060.20ex/s]

 56%|█████▌    | 39009/69825 [00:09<00:07, 3975.93ex/s]

 56%|█████▋    | 39435/69825 [00:09<00:07, 4056.76ex/s]

 57%|█████▋    | 39862/69825 [00:09<00:07, 4118.80ex/s]

 58%|█████▊    | 40275/69825 [00:10<00:07, 4022.14ex/s]

 58%|█████▊    | 40718/69825 [00:10<00:07, 4139.81ex/s]

 59%|█████▉    | 41134/69825 [00:10<00:07, 4056.84ex/s]

 60%|█████▉    | 41563/69825 [00:10<00:06, 4122.36ex/s]

 60%|██████    | 41996/69825 [00:10<00:06, 4180.37ex/s]

 61%|██████    | 42415/69825 [00:10<00:06, 4082.08ex/s]

 61%|██████▏   | 42848/69825 [00:10<00:06, 4151.99ex/s]

 62%|██████▏   | 43265/69825 [00:10<00:06, 4061.16ex/s]

 63%|██████▎   | 43695/69825 [00:10<00:06, 4129.25ex/s]

 63%|██████▎   | 44109/69825 [00:11<00:06, 4017.63ex/s]

 64%|██████▍   | 44538/69825 [00:11<00:06, 4096.30ex/s]

 64%|██████▍   | 44977/69825 [00:11<00:05, 4179.50ex/s]

 65%|██████▌   | 45396/69825 [00:11<00:06, 4047.67ex/s]

 66%|██████▌   | 45820/69825 [00:11<00:05, 4101.24ex/s]

 66%|██████▌   | 46232/69825 [00:11<00:05, 4026.43ex/s]

 67%|██████▋   | 46656/69825 [00:11<00:05, 4087.63ex/s]

 67%|██████▋   | 47066/69825 [00:11<00:05, 3989.40ex/s]

 68%|██████▊   | 47490/69825 [00:11<00:05, 4060.07ex/s]

 69%|██████▊   | 47918/69825 [00:11<00:05, 4124.34ex/s]

 69%|██████▉   | 48332/69825 [00:12<00:05, 4030.17ex/s]

 70%|██████▉   | 48755/69825 [00:12<00:05, 4088.00ex/s]

 70%|███████   | 49165/69825 [00:12<00:05, 3999.38ex/s]

 71%|███████   | 49592/69825 [00:12<00:04, 4075.68ex/s]

 72%|███████▏  | 50001/69825 [00:12<00:04, 3972.92ex/s]

 72%|███████▏  | 50424/69825 [00:12<00:04, 4047.20ex/s]

 73%|███████▎  | 50849/69825 [00:12<00:04, 4103.41ex/s]

 73%|███████▎  | 51261/69825 [00:12<00:04, 4017.55ex/s]

 74%|███████▍  | 51689/69825 [00:12<00:04, 4091.96ex/s]

 75%|███████▍  | 52099/69825 [00:12<00:04, 3993.83ex/s]

 75%|███████▌  | 52529/69825 [00:13<00:04, 4082.63ex/s]

 76%|███████▌  | 52964/69825 [00:13<00:04, 4160.84ex/s]

 76%|███████▋  | 53381/69825 [00:13<00:04, 4071.41ex/s]

 77%|███████▋  | 53807/69825 [00:13<00:03, 4125.80ex/s]

 78%|███████▊  | 54221/69825 [00:13<00:03, 4020.91ex/s]

 78%|███████▊  | 54654/69825 [00:13<00:03, 4109.44ex/s]

 79%|███████▉  | 55066/69825 [00:13<00:03, 4012.01ex/s]

 79%|███████▉  | 55495/69825 [00:13<00:03, 4092.55ex/s]

 80%|████████  | 55919/69825 [00:13<00:03, 4134.86ex/s]

 81%|████████  | 56334/69825 [00:14<00:03, 4040.98ex/s]

 81%|████████▏ | 56769/69825 [00:14<00:03, 4129.65ex/s]

 82%|████████▏ | 57183/69825 [00:14<00:03, 4018.72ex/s]

 83%|████████▎ | 57614/69825 [00:14<00:02, 4102.46ex/s]

 83%|████████▎ | 58026/69825 [00:14<00:02, 4009.23ex/s]

 84%|████████▎ | 58456/69825 [00:14<00:02, 4093.12ex/s]

 84%|████████▍ | 58880/69825 [00:14<00:02, 4133.98ex/s]

 85%|████████▍ | 59295/69825 [00:14<00:02, 4011.69ex/s]

 86%|████████▌ | 59722/69825 [00:14<00:02, 4085.33ex/s]

 86%|████████▌ | 60132/69825 [00:14<00:02, 4004.74ex/s]

 87%|████████▋ | 60562/69825 [00:15<00:02, 4090.44ex/s]

 87%|████████▋ | 60980/69825 [00:15<00:02, 4115.27ex/s]

 88%|████████▊ | 61393/69825 [00:15<00:02, 3975.76ex/s]

 89%|████████▊ | 61814/69825 [00:15<00:01, 4040.95ex/s]

 89%|████████▉ | 62220/69825 [00:15<00:01, 3966.43ex/s]

 90%|████████▉ | 62657/69825 [00:15<00:01, 4081.84ex/s]

 90%|█████████ | 63067/69825 [00:15<00:01, 3979.49ex/s]

 91%|█████████ | 63497/69825 [00:15<00:01, 4070.86ex/s]

 92%|█████████▏| 63925/69825 [00:15<00:01, 4130.90ex/s]

 92%|█████████▏| 64340/69825 [00:16<00:01, 4045.22ex/s]

 93%|█████████▎| 64758/69825 [00:16<00:01, 4083.80ex/s]

 93%|█████████▎| 65168/69825 [00:16<00:01, 3996.35ex/s]

 94%|█████████▍| 65586/69825 [00:16<00:01, 4049.65ex/s]

 95%|█████████▍| 66000/69825 [00:16<00:00, 3981.90ex/s]

 95%|█████████▌| 66432/69825 [00:16<00:00, 4077.94ex/s]

 96%|█████████▌| 66858/69825 [00:16<00:00, 4129.10ex/s]

 96%|█████████▋| 67272/69825 [00:16<00:00, 4013.60ex/s]

 97%|█████████▋| 67700/69825 [00:16<00:00, 4090.92ex/s]

 98%|█████████▊| 68111/69825 [00:16<00:00, 3997.96ex/s]

 98%|█████████▊| 68536/69825 [00:17<00:00, 4070.85ex/s]

 99%|█████████▉| 68958/69825 [00:17<00:00, 4111.85ex/s]

 99%|█████████▉| 69370/69825 [00:17<00:00, 3993.88ex/s]

100%|█████████▉| 69778/69825 [00:17<00:00, 4017.22ex/s]

100%|██████████| 69825/69825 [00:17<00:00, 4022.09ex/s]




In [20]:
len(train_dataset), len(eval_dataset)

(144825, 69825)

In [21]:
k = random.randrange(len(train_dataset))
tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k]

('[CLS] 등받이와 엉덩이부분이 미끄럼방지 처리되어 있어 혼자 앉히고 씻기기 편하고 물받는 표시선까지 있어 물낭비도 줄여준다능 [SEP] 본품 # 가격 [SEP]',
 1)

# Load Trainer

In [22]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [23]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [24]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [25]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 144825


  Num Epochs = 15


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 128


  Gradient Accumulation steps = 1


  Total optimization steps = 16980


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss


In [None]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/