# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'cleaned_v2_total_v1'

DATA_V = 'cleaned_v2_total_v1'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'snunlp/KR-ELECTRA-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/snunlp_kr_electra_discriminator_cleaned_v2_total_v1/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/cleaned_v2_total_v1/ce_train.csv exists.
./dataset/cleaned_v2_total_v1/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 15
batch_size = 32
gradient_accumulation_steps = 1

optim = 'adamw_torch' # 'adamw_hf'

learning_rate = 5e-5 # 3e-6
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'cosine'
warmup_ratio = 0

save_total_limit = 1

load_best_model_at_end = True
metric_for_best_model ='f1_macro'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 500

# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at snunlp/KR-ELECTRA-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = './dataset/cleaned_v2_total_v1/raw_train.csv'
dev_path = './dataset/cleaned_v2_total_v1/raw_dev.csv'
test_path = './dataset/cleaned_v2_total_v1/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

In [10]:
### new
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]


# more_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']

# emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
# emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))

# tokensToAdd = more_tokens + emojis
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

In [11]:
data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
print(len(data))
data = data.drop_duplicates()
print(len(data.drop_duplicates()))

7920
7894


In [12]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))

tokenizerTrainData = data.sentence_form.to_list()
newTokenizer = tokenizer.train_new_from_iterator(tokenizerTrainData, vocab_size=1)

new_tokens = set(list(newTokenizer.vocab.keys())) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(newTokenizer))
print(len(tokenizer))

model.resize_token_embeddings(len(tokenizer))

30000





2615
30013


Embedding(30013, 768)

In [13]:
new_tokens

{'##뜌', '##읒', '##죱', '##쨕', '##쫜', '뜌', '뿤', '쓩', '읒', '죱', '쨕', '쫜', '챳'}

In [14]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [15]:
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]

polarity_id_to_name = ['positive', 'negative', 'neutral']

tokenizer_tester = []
for pair in entity_property_pair:
    for polarity in polarity_id_to_name:
        tokenizer_tester.append('#'.join([pair, polarity]))

for e in tokenizer_tester:
    print(tokenizer.decode(tokenizer.encode(e)))

for e in tokenizer_tester:
    print(tokenizer.encode(e))

[CLS] 본품 # 가격 # positive [SEP]
[CLS] 본품 # 가격 # negative [SEP]
[CLS] 본품 # 가격 # neutral [SEP]
[CLS] 본품 # 다양성 # positive [SEP]
[CLS] 본품 # 다양성 # negative [SEP]
[CLS] 본품 # 다양성 # neutral [SEP]
[CLS] 본품 # 디자인 # positive [SEP]
[CLS] 본품 # 디자인 # negative [SEP]
[CLS] 본품 # 디자인 # neutral [SEP]
[CLS] 본품 # 인지도 # positive [SEP]
[CLS] 본품 # 인지도 # negative [SEP]
[CLS] 본품 # 인지도 # neutral [SEP]
[CLS] 본품 # 일반 # positive [SEP]
[CLS] 본품 # 일반 # negative [SEP]
[CLS] 본품 # 일반 # neutral [SEP]
[CLS] 본품 # 편의성 # positive [SEP]
[CLS] 본품 # 편의성 # negative [SEP]
[CLS] 본품 # 편의성 # neutral [SEP]
[CLS] 본품 # 품질 # positive [SEP]
[CLS] 본품 # 품질 # negative [SEP]
[CLS] 본품 # 품질 # neutral [SEP]
[CLS] 브랜드 # 가격 # positive [SEP]
[CLS] 브랜드 # 가격 # negative [SEP]
[CLS] 브랜드 # 가격 # neutral [SEP]
[CLS] 브랜드 # 디자인 # positive [SEP]
[CLS] 브랜드 # 디자인 # negative [SEP]
[CLS] 브랜드 # 디자인 # neutral [SEP]
[CLS] 브랜드 # 인지도 # positive [SEP]
[CLS] 브랜드 # 인지도 # negative [SEP]
[CLS] 브랜드 # 인지도 # neutral [SEP]
[CLS] 브랜드 # 일반 # positive [SEP]
[CLS] 브랜드 # 일반 # nega

# Define Metric

In [16]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [17]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [18]:
def preprocess_function(examples):
    return tokenizer(examples["sentence_form"], examples["entity_property"], truncation=True)

In [19]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset).shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset).shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/144825 [00:00<?, ?ex/s]

  0%|          | 389/144825 [00:00<00:37, 3883.43ex/s]

  1%|          | 832/144825 [00:00<00:34, 4199.56ex/s]

  1%|          | 1252/144825 [00:00<00:36, 3946.85ex/s]

  1%|          | 1693/144825 [00:00<00:34, 4122.28ex/s]

  1%|▏         | 2107/144825 [00:00<00:35, 4072.82ex/s]

  2%|▏         | 2557/144825 [00:00<00:33, 4214.24ex/s]

  2%|▏         | 2991/144825 [00:00<00:33, 4253.96ex/s]

  2%|▏         | 3418/144825 [00:00<00:34, 4127.69ex/s]

  3%|▎         | 3867/144825 [00:00<00:33, 4235.98ex/s]

  3%|▎         | 4292/144825 [00:01<00:34, 4125.90ex/s]

  3%|▎         | 4736/144825 [00:01<00:33, 4217.22ex/s]

  4%|▎         | 5159/144825 [00:01<00:33, 4117.64ex/s]

  4%|▍         | 5608/144825 [00:01<00:32, 4224.28ex/s]

  4%|▍         | 6032/144825 [00:01<00:33, 4149.48ex/s]

  4%|▍         | 6481/144825 [00:01<00:32, 4246.11ex/s]

  5%|▍         | 6921/144825 [00:01<00:32, 4290.59ex/s]

  5%|▌         | 7351/144825 [00:01<00:32, 4176.64ex/s]

  5%|▌         | 7797/144825 [00:01<00:32, 4258.51ex/s]

  6%|▌         | 8224/144825 [00:01<00:32, 4165.31ex/s]

  6%|▌         | 8667/144825 [00:02<00:32, 4239.92ex/s]

  6%|▋         | 9092/144825 [00:02<00:32, 4136.37ex/s]

  7%|▋         | 9528/144825 [00:02<00:32, 4201.18ex/s]

  7%|▋         | 9972/144825 [00:02<00:31, 4269.48ex/s]

  7%|▋         | 10400/144825 [00:02<00:32, 4173.77ex/s]

  7%|▋         | 10849/144825 [00:02<00:31, 4263.86ex/s]

  8%|▊         | 11277/144825 [00:02<00:32, 4156.74ex/s]

  8%|▊         | 11716/144825 [00:02<00:31, 4223.51ex/s]

  8%|▊         | 12140/144825 [00:02<00:32, 4082.95ex/s]

  9%|▊         | 12590/144825 [00:03<00:31, 4202.91ex/s]

  9%|▉         | 13012/144825 [00:03<00:31, 4127.93ex/s]

  9%|▉         | 13451/144825 [00:03<00:31, 4201.76ex/s]

 10%|▉         | 13888/144825 [00:03<00:30, 4248.80ex/s]

 10%|▉         | 14314/144825 [00:03<00:31, 4109.41ex/s]

 10%|█         | 14754/144825 [00:03<00:31, 4192.03ex/s]

 10%|█         | 15175/144825 [00:03<00:31, 4116.81ex/s]

 11%|█         | 15619/144825 [00:03<00:30, 4209.91ex/s]

 11%|█         | 16042/144825 [00:03<00:31, 4096.07ex/s]

 11%|█▏        | 16475/144825 [00:03<00:30, 4161.77ex/s]

 12%|█▏        | 16913/144825 [00:04<00:30, 4223.21ex/s]

 12%|█▏        | 17337/144825 [00:04<00:30, 4124.23ex/s]

 12%|█▏        | 17775/144825 [00:04<00:30, 4197.61ex/s]

 13%|█▎        | 18196/144825 [00:04<00:30, 4102.43ex/s]

 13%|█▎        | 18639/144825 [00:04<00:30, 4196.82ex/s]

 13%|█▎        | 19060/144825 [00:04<00:30, 4077.86ex/s]

 13%|█▎        | 19504/144825 [00:04<00:29, 4180.03ex/s]

 14%|█▍        | 19950/144825 [00:04<00:29, 4259.33ex/s]

 14%|█▍        | 20377/144825 [00:04<00:30, 4145.43ex/s]

 14%|█▍        | 20821/144825 [00:04<00:29, 4230.83ex/s]

 15%|█▍        | 21246/144825 [00:05<00:29, 4125.21ex/s]

 15%|█▍        | 21692/144825 [00:05<00:29, 4220.50ex/s]

 15%|█▌        | 22116/144825 [00:05<00:29, 4129.86ex/s]

 16%|█▌        | 22559/144825 [00:05<00:29, 4214.74ex/s]

 16%|█▌        | 23000/144825 [00:05<00:29, 4122.14ex/s]

 16%|█▌        | 23437/144825 [00:05<00:28, 4192.76ex/s]

 16%|█▋        | 23876/144825 [00:05<00:28, 4247.37ex/s]

 17%|█▋        | 24302/144825 [00:05<00:28, 4156.93ex/s]

 17%|█▋        | 24750/144825 [00:05<00:28, 4250.68ex/s]

 17%|█▋        | 25177/144825 [00:06<00:28, 4149.46ex/s]

 18%|█▊        | 25615/144825 [00:06<00:28, 4215.57ex/s]

 18%|█▊        | 26038/144825 [00:06<00:28, 4123.53ex/s]

 18%|█▊        | 26483/144825 [00:06<00:28, 4216.84ex/s]

 19%|█▊        | 26935/144825 [00:06<00:27, 4303.85ex/s]

 19%|█▉        | 27367/144825 [00:06<00:28, 4174.74ex/s]

 19%|█▉        | 27810/144825 [00:06<00:27, 4246.55ex/s]

 19%|█▉        | 28236/144825 [00:06<00:28, 4078.98ex/s]

 20%|█▉        | 28674/144825 [00:06<00:27, 4162.63ex/s]

 20%|██        | 29092/144825 [00:06<00:28, 4088.53ex/s]

 20%|██        | 29534/144825 [00:07<00:27, 4182.33ex/s]

 21%|██        | 29984/144825 [00:07<00:26, 4273.55ex/s]

 21%|██        | 30413/144825 [00:07<00:27, 4150.27ex/s]

 21%|██▏       | 30856/144825 [00:07<00:26, 4228.68ex/s]

 22%|██▏       | 31281/144825 [00:07<00:27, 4148.22ex/s]

 22%|██▏       | 31733/144825 [00:07<00:26, 4255.79ex/s]

 22%|██▏       | 32160/144825 [00:07<00:27, 4162.34ex/s]

 23%|██▎       | 32594/144825 [00:07<00:26, 4211.37ex/s]

 23%|██▎       | 33017/144825 [00:07<00:27, 4123.09ex/s]

 23%|██▎       | 33461/144825 [00:08<00:26, 4212.60ex/s]

 23%|██▎       | 33911/144825 [00:08<00:25, 4294.83ex/s]

 24%|██▎       | 34342/144825 [00:08<00:26, 4181.51ex/s]

 24%|██▍       | 34779/144825 [00:08<00:25, 4235.52ex/s]

 24%|██▍       | 35204/144825 [00:08<00:26, 4117.27ex/s]

 25%|██▍       | 35649/144825 [00:08<00:25, 4213.15ex/s]

 25%|██▍       | 36072/144825 [00:08<00:36, 2994.40ex/s]

 25%|██▌       | 36519/144825 [00:08<00:32, 3331.74ex/s]

 26%|██▌       | 36970/144825 [00:08<00:29, 3619.91ex/s]

 26%|██▌       | 37379/144825 [00:09<00:28, 3742.12ex/s]

 26%|██▌       | 37826/144825 [00:09<00:27, 3937.51ex/s]

 26%|██▋       | 38242/144825 [00:09<00:27, 3944.37ex/s]

 27%|██▋       | 38690/144825 [00:09<00:25, 4093.60ex/s]

 27%|██▋       | 39112/144825 [00:09<00:25, 4073.36ex/s]

 27%|██▋       | 39567/144825 [00:09<00:25, 4208.69ex/s]

 28%|██▊       | 40000/144825 [00:09<00:25, 4179.31ex/s]

 28%|██▊       | 40451/144825 [00:09<00:24, 4275.06ex/s]

 28%|██▊       | 40899/144825 [00:09<00:23, 4332.82ex/s]

 29%|██▊       | 41335/144825 [00:09<00:24, 4242.25ex/s]

 29%|██▉       | 41781/144825 [00:10<00:23, 4303.05ex/s]

 29%|██▉       | 42213/144825 [00:10<00:24, 4211.12ex/s]

 29%|██▉       | 42671/144825 [00:10<00:23, 4317.95ex/s]

 30%|██▉       | 43105/144825 [00:10<00:23, 4243.95ex/s]

 30%|███       | 43557/144825 [00:10<00:23, 4322.00ex/s]

 30%|███       | 44000/144825 [00:10<00:23, 4202.12ex/s]

 31%|███       | 44437/144825 [00:10<00:23, 4248.39ex/s]

 31%|███       | 44880/144825 [00:10<00:23, 4299.95ex/s]

 31%|███▏      | 45311/144825 [00:10<00:23, 4203.68ex/s]

 32%|███▏      | 45759/144825 [00:11<00:23, 4281.83ex/s]

 32%|███▏      | 46189/144825 [00:11<00:23, 4199.82ex/s]

 32%|███▏      | 46644/144825 [00:11<00:22, 4301.45ex/s]

 33%|███▎      | 47076/144825 [00:11<00:23, 4240.55ex/s]

 33%|███▎      | 47529/144825 [00:11<00:22, 4322.17ex/s]

 33%|███▎      | 47980/144825 [00:11<00:22, 4377.34ex/s]

 33%|███▎      | 48419/144825 [00:11<00:22, 4267.39ex/s]

 34%|███▎      | 48867/144825 [00:11<00:22, 4327.23ex/s]

 34%|███▍      | 49301/144825 [00:11<00:22, 4200.36ex/s]

 34%|███▍      | 49751/144825 [00:11<00:22, 4284.74ex/s]

 35%|███▍      | 50181/144825 [00:12<00:22, 4206.07ex/s]

 35%|███▍      | 50635/144825 [00:12<00:21, 4302.42ex/s]

 35%|███▌      | 51067/144825 [00:12<00:22, 4229.20ex/s]

 36%|███▌      | 51523/144825 [00:12<00:21, 4323.92ex/s]

 36%|███▌      | 51969/144825 [00:12<00:21, 4361.91ex/s]

 36%|███▌      | 52406/144825 [00:12<00:21, 4282.62ex/s]

 36%|███▋      | 52861/144825 [00:12<00:21, 4360.12ex/s]

 37%|███▋      | 53298/144825 [00:12<00:21, 4277.48ex/s]

 37%|███▋      | 53751/144825 [00:12<00:20, 4350.64ex/s]

 37%|███▋      | 54187/144825 [00:12<00:21, 4266.80ex/s]

 38%|███▊      | 54639/144825 [00:13<00:20, 4340.47ex/s]

 38%|███▊      | 55074/144825 [00:13<00:21, 4106.46ex/s]

 38%|███▊      | 55518/144825 [00:13<00:21, 4200.11ex/s]

 39%|███▊      | 55960/144825 [00:13<00:20, 4262.96ex/s]

 39%|███▉      | 56389/144825 [00:13<00:21, 4152.28ex/s]

 39%|███▉      | 56834/144825 [00:13<00:20, 4237.95ex/s]

 40%|███▉      | 57260/144825 [00:13<00:21, 4127.47ex/s]

 40%|███▉      | 57708/144825 [00:13<00:20, 4227.90ex/s]

 40%|████      | 58133/144825 [00:13<00:20, 4137.63ex/s]

 40%|████      | 58575/144825 [00:14<00:20, 4217.18ex/s]

 41%|████      | 59000/144825 [00:14<00:20, 4118.50ex/s]

 41%|████      | 59452/144825 [00:14<00:20, 4233.16ex/s]

 41%|████▏     | 59904/144825 [00:14<00:19, 4315.59ex/s]

 42%|████▏     | 60337/144825 [00:14<00:20, 4191.89ex/s]

 42%|████▏     | 60779/144825 [00:14<00:19, 4257.16ex/s]

 42%|████▏     | 61206/144825 [00:14<00:20, 4148.49ex/s]

 43%|████▎     | 61657/144825 [00:14<00:19, 4251.48ex/s]

 43%|████▎     | 62084/144825 [00:14<00:19, 4152.13ex/s]

 43%|████▎     | 62520/144825 [00:14<00:19, 4211.30ex/s]

 43%|████▎     | 62968/144825 [00:15<00:19, 4286.64ex/s]

 44%|████▍     | 63398/144825 [00:15<00:19, 4164.67ex/s]

 44%|████▍     | 63847/144825 [00:15<00:19, 4258.95ex/s]

 44%|████▍     | 64275/144825 [00:15<00:19, 4163.99ex/s]

 45%|████▍     | 64713/144825 [00:15<00:18, 4226.54ex/s]

 45%|████▍     | 65137/144825 [00:15<00:19, 4121.24ex/s]

 45%|████▌     | 65582/144825 [00:15<00:18, 4214.44ex/s]

 46%|████▌     | 66005/144825 [00:15<00:19, 4134.40ex/s]

 46%|████▌     | 66447/144825 [00:15<00:18, 4215.44ex/s]

 46%|████▌     | 66884/144825 [00:16<00:18, 4260.10ex/s]

 46%|████▋     | 67311/144825 [00:16<00:18, 4145.76ex/s]

 47%|████▋     | 67756/144825 [00:16<00:18, 4232.38ex/s]

 47%|████▋     | 68181/144825 [00:16<00:18, 4144.01ex/s]

 47%|████▋     | 68623/144825 [00:16<00:18, 4221.98ex/s]

 48%|████▊     | 69047/144825 [00:16<00:18, 4146.23ex/s]

 48%|████▊     | 69487/144825 [00:16<00:17, 4219.10ex/s]

 48%|████▊     | 69941/144825 [00:16<00:17, 4311.77ex/s]

 49%|████▊     | 70373/144825 [00:16<00:17, 4201.93ex/s]

 49%|████▉     | 70818/144825 [00:16<00:17, 4271.82ex/s]

 49%|████▉     | 71247/144825 [00:17<00:17, 4152.60ex/s]

 49%|████▉     | 71684/144825 [00:17<00:17, 4215.40ex/s]

 50%|████▉     | 72107/144825 [00:17<00:17, 4138.46ex/s]

 50%|█████     | 72552/144825 [00:17<00:17, 4228.60ex/s]

 50%|█████     | 72989/144825 [00:17<00:16, 4267.44ex/s]

 51%|█████     | 73417/144825 [00:17<00:17, 4141.59ex/s]

 51%|█████     | 73858/144825 [00:17<00:16, 4217.57ex/s]

 51%|█████▏    | 74281/144825 [00:17<00:17, 4133.50ex/s]

 52%|█████▏    | 74724/144825 [00:17<00:16, 4218.36ex/s]

 52%|█████▏    | 75147/144825 [00:17<00:16, 4127.11ex/s]

 52%|█████▏    | 75589/144825 [00:18<00:16, 4209.33ex/s]

 52%|█████▏    | 76011/144825 [00:18<00:16, 4128.22ex/s]

 53%|█████▎    | 76460/144825 [00:18<00:16, 4231.75ex/s]

 53%|█████▎    | 76897/144825 [00:18<00:15, 4271.61ex/s]

 53%|█████▎    | 77325/144825 [00:18<00:16, 4164.57ex/s]

 54%|█████▎    | 77762/144825 [00:18<00:15, 4224.07ex/s]

 54%|█████▍    | 78186/144825 [00:18<00:16, 4121.31ex/s]

 54%|█████▍    | 78634/144825 [00:18<00:15, 4223.44ex/s]

 55%|█████▍    | 79058/144825 [00:18<00:15, 4111.87ex/s]

 55%|█████▍    | 79497/144825 [00:19<00:15, 4190.97ex/s]

 55%|█████▌    | 79935/144825 [00:19<00:15, 4244.44ex/s]

 55%|█████▌    | 80361/144825 [00:19<00:15, 4149.01ex/s]

 56%|█████▌    | 80797/144825 [00:19<00:15, 4210.04ex/s]

 56%|█████▌    | 81219/144825 [00:19<00:15, 4087.87ex/s]

 56%|█████▋    | 81661/144825 [00:19<00:15, 4181.74ex/s]

 57%|█████▋    | 82081/144825 [00:19<00:15, 4112.94ex/s]

 57%|█████▋    | 82531/144825 [00:19<00:14, 4225.13ex/s]

 57%|█████▋    | 82978/144825 [00:19<00:14, 4296.21ex/s]

 58%|█████▊    | 83409/144825 [00:19<00:14, 4206.79ex/s]

 58%|█████▊    | 83852/144825 [00:20<00:14, 4270.47ex/s]

 58%|█████▊    | 84280/144825 [00:20<00:14, 4171.32ex/s]

 58%|█████▊    | 84716/144825 [00:20<00:14, 4224.53ex/s]

 59%|█████▉    | 85140/144825 [00:20<00:14, 4137.94ex/s]

 59%|█████▉    | 85589/144825 [00:20<00:13, 4239.44ex/s]

 59%|█████▉    | 86014/144825 [00:20<00:14, 4111.79ex/s]

 60%|█████▉    | 86455/144825 [00:20<00:13, 4197.36ex/s]

 60%|█████▉    | 86894/144825 [00:20<00:13, 4251.46ex/s]

 60%|██████    | 87321/144825 [00:20<00:13, 4159.42ex/s]

 61%|██████    | 87765/144825 [00:20<00:13, 4239.79ex/s]

 61%|██████    | 88190/144825 [00:21<00:13, 4116.56ex/s]

 61%|██████    | 88628/144825 [00:21<00:13, 4192.55ex/s]

 61%|██████▏   | 89049/144825 [00:21<00:13, 4095.01ex/s]

 62%|██████▏   | 89493/144825 [00:21<00:13, 4194.21ex/s]

 62%|██████▏   | 89940/144825 [00:21<00:12, 4274.29ex/s]

 62%|██████▏   | 90369/144825 [00:21<00:13, 4153.12ex/s]

 63%|██████▎   | 90812/144825 [00:21<00:12, 4230.99ex/s]

 63%|██████▎   | 91237/144825 [00:21<00:13, 4121.25ex/s]

 63%|██████▎   | 91681/144825 [00:21<00:12, 4209.69ex/s]

 64%|██████▎   | 92104/144825 [00:22<00:12, 4127.11ex/s]

 64%|██████▍   | 92544/144825 [00:22<00:12, 4203.78ex/s]

 64%|██████▍   | 92981/144825 [00:22<00:12, 4251.77ex/s]

 64%|██████▍   | 93407/144825 [00:22<00:12, 4148.51ex/s]

 65%|██████▍   | 93848/144825 [00:22<00:12, 4224.33ex/s]

 65%|██████▌   | 94272/144825 [00:22<00:12, 4115.84ex/s]

 65%|██████▌   | 94714/144825 [00:22<00:11, 4202.53ex/s]

 66%|██████▌   | 95136/144825 [00:22<00:12, 4100.03ex/s]

 66%|██████▌   | 95590/144825 [00:22<00:11, 4225.37ex/s]

 66%|██████▋   | 96014/144825 [00:22<00:11, 4129.80ex/s]

 67%|██████▋   | 96447/144825 [00:23<00:11, 4186.91ex/s]

 67%|██████▋   | 96897/144825 [00:23<00:11, 4276.80ex/s]

 67%|██████▋   | 97326/144825 [00:23<00:11, 4154.12ex/s]

 68%|██████▊   | 97765/144825 [00:23<00:11, 4220.69ex/s]

 68%|██████▊   | 98189/144825 [00:23<00:11, 4137.97ex/s]

 68%|██████▊   | 98640/144825 [00:23<00:10, 4243.47ex/s]

 68%|██████▊   | 99066/144825 [00:23<00:11, 4142.01ex/s]

 69%|██████▊   | 99502/144825 [00:23<00:10, 4202.69ex/s]

 69%|██████▉   | 99947/144825 [00:23<00:10, 4273.27ex/s]

 69%|██████▉   | 100376/144825 [00:24<00:10, 4162.95ex/s]

 70%|██████▉   | 100819/144825 [00:24<00:10, 4238.69ex/s]

 70%|██████▉   | 101244/144825 [00:24<00:10, 4126.12ex/s]

 70%|███████   | 101683/144825 [00:24<00:10, 4200.17ex/s]

 71%|███████   | 102105/144825 [00:24<00:10, 4085.59ex/s]

 71%|███████   | 102552/144825 [00:24<00:10, 4194.79ex/s]

 71%|███████   | 102990/144825 [00:24<00:09, 4247.78ex/s]

 71%|███████▏  | 103416/144825 [00:24<00:13, 3054.38ex/s]

 72%|███████▏  | 103862/144825 [00:24<00:12, 3378.83ex/s]

 72%|███████▏  | 104250/144825 [00:25<00:11, 3503.43ex/s]

 72%|███████▏  | 104689/144825 [00:25<00:10, 3735.23ex/s]

 73%|███████▎  | 105089/144825 [00:25<00:10, 3770.75ex/s]

 73%|███████▎  | 105522/144825 [00:25<00:10, 3924.01ex/s]

 73%|███████▎  | 105971/144825 [00:25<00:09, 4084.41ex/s]

 73%|███████▎  | 106391/144825 [00:25<00:09, 4058.85ex/s]

 74%|███████▍  | 106835/144825 [00:25<00:09, 4167.08ex/s]

 74%|███████▍  | 107258/144825 [00:25<00:09, 4072.00ex/s]

 74%|███████▍  | 107689/144825 [00:25<00:08, 4139.80ex/s]

 75%|███████▍  | 108107/144825 [00:25<00:09, 4062.85ex/s]

 75%|███████▍  | 108555/144825 [00:26<00:08, 4181.37ex/s]

 75%|███████▌  | 109000/144825 [00:26<00:09, 3862.71ex/s]

 76%|███████▌  | 109448/144825 [00:26<00:08, 4031.10ex/s]

 76%|███████▌  | 109887/144825 [00:26<00:08, 4130.80ex/s]

 76%|███████▌  | 110306/144825 [00:26<00:08, 4104.04ex/s]

 76%|███████▋  | 110756/144825 [00:26<00:08, 4216.42ex/s]

 77%|███████▋  | 111181/144825 [00:26<00:08, 4124.74ex/s]

 77%|███████▋  | 111623/144825 [00:26<00:07, 4207.60ex/s]

 77%|███████▋  | 112046/144825 [00:26<00:08, 4091.40ex/s]

 78%|███████▊  | 112484/144825 [00:27<00:07, 4173.94ex/s]

 78%|███████▊  | 112934/144825 [00:27<00:07, 4267.16ex/s]

 78%|███████▊  | 113363/144825 [00:27<00:07, 4147.42ex/s]

 79%|███████▊  | 113793/144825 [00:27<00:07, 4189.88ex/s]

 79%|███████▉  | 114214/144825 [00:27<00:07, 4082.77ex/s]

 79%|███████▉  | 114655/144825 [00:27<00:07, 4176.55ex/s]

 79%|███████▉  | 115074/144825 [00:27<00:07, 4099.06ex/s]

 80%|███████▉  | 115512/144825 [00:27<00:07, 4178.02ex/s]

 80%|████████  | 115950/144825 [00:27<00:06, 4235.36ex/s]

 80%|████████  | 116375/144825 [00:27<00:06, 4087.74ex/s]

 81%|████████  | 116820/144825 [00:28<00:06, 4191.87ex/s]

 81%|████████  | 117241/144825 [00:28<00:06, 4123.42ex/s]

 81%|████████▏ | 117686/144825 [00:28<00:06, 4218.04ex/s]

 82%|████████▏ | 118109/144825 [00:28<00:06, 4116.97ex/s]

 82%|████████▏ | 118554/144825 [00:28<00:06, 4210.54ex/s]

 82%|████████▏ | 119000/144825 [00:28<00:06, 4145.92ex/s]

 82%|████████▏ | 119450/144825 [00:28<00:05, 4247.66ex/s]

 83%|████████▎ | 119882/144825 [00:28<00:05, 4268.05ex/s]

 83%|████████▎ | 120310/144825 [00:28<00:05, 4165.77ex/s]

 83%|████████▎ | 120745/144825 [00:29<00:05, 4218.76ex/s]

 84%|████████▎ | 121168/144825 [00:29<00:05, 4116.06ex/s]

 84%|████████▍ | 121612/144825 [00:29<00:05, 4208.19ex/s]

 84%|████████▍ | 122034/144825 [00:29<00:05, 4123.41ex/s]

 85%|████████▍ | 122475/144825 [00:29<00:05, 4204.33ex/s]

 85%|████████▍ | 122910/144825 [00:29<00:05, 4245.12ex/s]

 85%|████████▌ | 123336/144825 [00:29<00:05, 4124.14ex/s]

 85%|████████▌ | 123787/144825 [00:29<00:04, 4233.51ex/s]

 86%|████████▌ | 124212/144825 [00:29<00:05, 4119.19ex/s]

 86%|████████▌ | 124654/144825 [00:29<00:04, 4203.79ex/s]

 86%|████████▋ | 125076/144825 [00:30<00:04, 4116.65ex/s]

 87%|████████▋ | 125532/144825 [00:30<00:04, 4243.13ex/s]

 87%|████████▋ | 125990/144825 [00:30<00:04, 4341.15ex/s]

 87%|████████▋ | 126426/144825 [00:30<00:04, 4256.94ex/s]

 88%|████████▊ | 126877/144825 [00:30<00:04, 4329.68ex/s]

 88%|████████▊ | 127311/144825 [00:30<00:04, 4257.17ex/s]

 88%|████████▊ | 127764/144825 [00:30<00:03, 4333.84ex/s]

 89%|████████▊ | 128199/144825 [00:30<00:03, 4251.42ex/s]

 89%|████████▉ | 128652/144825 [00:30<00:03, 4330.72ex/s]

 89%|████████▉ | 129086/144825 [00:30<00:03, 4244.45ex/s]

 89%|████████▉ | 129537/144825 [00:31<00:03, 4318.77ex/s]

 90%|████████▉ | 129975/144825 [00:31<00:03, 4334.89ex/s]

 90%|█████████ | 130410/144825 [00:31<00:03, 4181.89ex/s]

 90%|█████████ | 130847/144825 [00:31<00:03, 4233.55ex/s]

 91%|█████████ | 131272/144825 [00:31<00:03, 4135.21ex/s]

 91%|█████████ | 131711/144825 [00:31<00:03, 4206.93ex/s]

 91%|█████████ | 132133/144825 [00:31<00:03, 4106.34ex/s]

 92%|█████████▏| 132569/144825 [00:31<00:02, 4178.45ex/s]

 92%|█████████▏| 133000/144825 [00:31<00:02, 4066.41ex/s]

 92%|█████████▏| 133441/144825 [00:32<00:02, 4162.78ex/s]

 92%|█████████▏| 133871/144825 [00:32<00:02, 4201.95ex/s]

 93%|█████████▎| 134293/144825 [00:32<00:02, 4102.09ex/s]

 93%|█████████▎| 134738/144825 [00:32<00:02, 4201.53ex/s]

 93%|█████████▎| 135160/144825 [00:32<00:02, 4114.34ex/s]

 94%|█████████▎| 135601/144825 [00:32<00:02, 4199.93ex/s]

 94%|█████████▍| 136022/144825 [00:32<00:02, 4096.78ex/s]

 94%|█████████▍| 136465/144825 [00:32<00:01, 4191.06ex/s]

 95%|█████████▍| 136899/144825 [00:32<00:01, 4233.92ex/s]

 95%|█████████▍| 137324/144825 [00:32<00:01, 4114.71ex/s]

 95%|█████████▌| 137762/144825 [00:33<00:01, 4190.15ex/s]

 95%|█████████▌| 138183/144825 [00:33<00:01, 4092.96ex/s]

 96%|█████████▌| 138625/144825 [00:33<00:01, 4187.35ex/s]

 96%|█████████▌| 139045/144825 [00:33<00:01, 4061.70ex/s]

 96%|█████████▋| 139482/144825 [00:33<00:01, 4149.33ex/s]

 97%|█████████▋| 139919/144825 [00:33<00:01, 4210.92ex/s]

 97%|█████████▋| 140342/144825 [00:33<00:01, 4142.29ex/s]

 97%|█████████▋| 140785/144825 [00:33<00:00, 4223.37ex/s]

 98%|█████████▊| 141209/144825 [00:33<00:00, 4121.91ex/s]

 98%|█████████▊| 141655/144825 [00:34<00:00, 4219.67ex/s]

 98%|█████████▊| 142078/144825 [00:34<00:00, 4119.82ex/s]

 98%|█████████▊| 142525/144825 [00:34<00:00, 4220.62ex/s]

 99%|█████████▊| 142972/144825 [00:34<00:00, 4292.67ex/s]

 99%|█████████▉| 143403/144825 [00:34<00:00, 4195.31ex/s]

 99%|█████████▉| 143858/144825 [00:34<00:00, 4296.67ex/s]

100%|█████████▉| 144289/144825 [00:34<00:00, 4219.94ex/s]

100%|█████████▉| 144738/144825 [00:34<00:00, 4297.95ex/s]

100%|██████████| 144825/144825 [00:34<00:00, 4167.07ex/s]




  0%|          | 0/69825 [00:00<?, ?ex/s]

  1%|          | 444/69825 [00:00<00:15, 4435.68ex/s]

  1%|▏         | 888/69825 [00:00<00:15, 4429.29ex/s]

  2%|▏         | 1331/69825 [00:00<00:16, 4193.30ex/s]

  3%|▎         | 1773/69825 [00:00<00:15, 4279.01ex/s]

  3%|▎         | 2203/69825 [00:00<00:16, 4166.09ex/s]

  4%|▍         | 2642/69825 [00:00<00:15, 4239.16ex/s]

  4%|▍         | 3067/69825 [00:00<00:16, 4130.06ex/s]

  5%|▌         | 3499/69825 [00:00<00:15, 4186.38ex/s]

  6%|▌         | 3932/69825 [00:00<00:15, 4228.11ex/s]

  6%|▌         | 4356/69825 [00:01<00:15, 4111.60ex/s]

  7%|▋         | 4790/69825 [00:01<00:15, 4177.71ex/s]

  7%|▋         | 5209/69825 [00:01<00:15, 4099.92ex/s]

  8%|▊         | 5653/69825 [00:01<00:15, 4198.33ex/s]

  9%|▊         | 6074/69825 [00:01<00:15, 4087.12ex/s]

  9%|▉         | 6505/69825 [00:01<00:15, 4151.77ex/s]

 10%|▉         | 6939/69825 [00:01<00:14, 4205.76ex/s]

 11%|█         | 7361/69825 [00:01<00:15, 4063.54ex/s]

 11%|█         | 7793/69825 [00:01<00:14, 4137.42ex/s]

 12%|█▏        | 8209/69825 [00:01<00:15, 4051.28ex/s]

 12%|█▏        | 8645/69825 [00:02<00:14, 4138.11ex/s]

 13%|█▎        | 9060/69825 [00:02<00:15, 4028.59ex/s]

 14%|█▎        | 9465/69825 [00:02<00:15, 3957.58ex/s]

 14%|█▍        | 9862/69825 [00:02<00:15, 3793.40ex/s]

 15%|█▍        | 10243/69825 [00:02<00:17, 3428.45ex/s]

 15%|█▌        | 10593/69825 [00:02<00:17, 3440.63ex/s]

 16%|█▌        | 10942/69825 [00:02<00:17, 3424.79ex/s]

 16%|█▌        | 11318/69825 [00:02<00:16, 3518.76ex/s]

 17%|█▋        | 11748/69825 [00:02<00:15, 3740.89ex/s]

 17%|█▋        | 12128/69825 [00:03<00:15, 3756.25ex/s]

 18%|█▊        | 12564/69825 [00:03<00:14, 3932.25ex/s]

 19%|█▊        | 13000/69825 [00:03<00:14, 3942.66ex/s]

 19%|█▉        | 13440/69825 [00:03<00:13, 4074.80ex/s]

 20%|█▉        | 13873/69825 [00:03<00:13, 4146.95ex/s]

 20%|██        | 14289/69825 [00:03<00:13, 3998.60ex/s]

 21%|██        | 14721/69825 [00:03<00:13, 4090.15ex/s]

 22%|██▏       | 15132/69825 [00:03<00:13, 4032.28ex/s]

 22%|██▏       | 15564/69825 [00:03<00:13, 4115.18ex/s]

 23%|██▎       | 15994/69825 [00:03<00:12, 4168.21ex/s]

 24%|██▎       | 16412/69825 [00:04<00:13, 4058.63ex/s]

 24%|██▍       | 16843/69825 [00:04<00:12, 4128.85ex/s]

 25%|██▍       | 17257/69825 [00:04<00:12, 4062.10ex/s]

 25%|██▌       | 17686/69825 [00:04<00:12, 4127.38ex/s]

 26%|██▌       | 18100/69825 [00:04<00:12, 4026.39ex/s]

 27%|██▋       | 18530/69825 [00:04<00:12, 4103.41ex/s]

 27%|██▋       | 18961/69825 [00:04<00:12, 4161.65ex/s]

 28%|██▊       | 19378/69825 [00:04<00:12, 4073.40ex/s]

 28%|██▊       | 19810/69825 [00:04<00:12, 4144.75ex/s]

 29%|██▉       | 20226/69825 [00:05<00:12, 4021.91ex/s]

 30%|██▉       | 20662/69825 [00:05<00:11, 4119.47ex/s]

 30%|███       | 21076/69825 [00:05<00:12, 4030.65ex/s]

 31%|███       | 21512/69825 [00:05<00:11, 4126.07ex/s]

 31%|███▏      | 21950/69825 [00:05<00:11, 4197.35ex/s]

 32%|███▏      | 22371/69825 [00:05<00:11, 4076.54ex/s]

 33%|███▎      | 22798/69825 [00:05<00:11, 4130.38ex/s]

 33%|███▎      | 23213/69825 [00:05<00:11, 4041.17ex/s]

 34%|███▍      | 23653/69825 [00:05<00:11, 4144.28ex/s]

 34%|███▍      | 24069/69825 [00:05<00:11, 4041.37ex/s]

 35%|███▌      | 24495/69825 [00:06<00:11, 4102.15ex/s]

 36%|███▌      | 24928/69825 [00:06<00:10, 4168.24ex/s]

 36%|███▋      | 25346/69825 [00:06<00:15, 2804.13ex/s]

 37%|███▋      | 25686/69825 [00:06<00:15, 2833.17ex/s]

 37%|███▋      | 26012/69825 [00:06<00:16, 2699.23ex/s]

 38%|███▊      | 26340/69825 [00:06<00:15, 2834.34ex/s]

 38%|███▊      | 26708/69825 [00:06<00:14, 3045.00ex/s]

 39%|███▊      | 27034/69825 [00:06<00:14, 2995.54ex/s]

 39%|███▉      | 27349/69825 [00:07<00:15, 2829.99ex/s]

 40%|███▉      | 27687/69825 [00:07<00:14, 2973.77ex/s]

 40%|████      | 28000/69825 [00:07<00:14, 2842.51ex/s]

 41%|████      | 28336/69825 [00:07<00:13, 2980.40ex/s]

 41%|████      | 28680/69825 [00:07<00:13, 3105.69ex/s]

 42%|████▏     | 29000/69825 [00:07<00:13, 3099.38ex/s]

 42%|████▏     | 29373/69825 [00:07<00:12, 3278.62ex/s]

 43%|████▎     | 29726/69825 [00:07<00:11, 3349.24ex/s]

 43%|████▎     | 30064/69825 [00:07<00:12, 3124.44ex/s]

 44%|████▎     | 30408/69825 [00:08<00:12, 3211.63ex/s]

 44%|████▍     | 30745/69825 [00:08<00:12, 3255.13ex/s]

 45%|████▍     | 31074/69825 [00:08<00:12, 3124.08ex/s]

 45%|████▌     | 31468/69825 [00:08<00:11, 3354.02ex/s]

 46%|████▌     | 31855/69825 [00:08<00:10, 3500.04ex/s]

 46%|████▌     | 32208/69825 [00:08<00:11, 3290.56ex/s]

 47%|████▋     | 32554/69825 [00:08<00:11, 3337.00ex/s]

 47%|████▋     | 32899/69825 [00:08<00:10, 3368.93ex/s]

 48%|████▊     | 33239/69825 [00:08<00:11, 3160.44ex/s]

 48%|████▊     | 33601/69825 [00:09<00:11, 3287.33ex/s]

 49%|████▊     | 33984/69825 [00:09<00:10, 3439.47ex/s]

 49%|████▉     | 34332/69825 [00:09<00:10, 3406.22ex/s]

 50%|████▉     | 34683/69825 [00:09<00:10, 3432.37ex/s]

 50%|█████     | 35028/69825 [00:09<00:10, 3261.77ex/s]

 51%|█████     | 35369/69825 [00:09<00:10, 3303.61ex/s]

 51%|█████     | 35709/69825 [00:09<00:10, 3330.62ex/s]

 52%|█████▏    | 36044/69825 [00:09<00:10, 3245.86ex/s]

 52%|█████▏    | 36370/69825 [00:09<00:10, 3162.65ex/s]

 53%|█████▎    | 36791/69825 [00:09<00:09, 3460.47ex/s]

 53%|█████▎    | 37161/69825 [00:10<00:09, 3529.24ex/s]

 54%|█████▍    | 37585/69825 [00:10<00:08, 3735.86ex/s]

 54%|█████▍    | 38000/69825 [00:10<00:08, 3740.67ex/s]

 55%|█████▌    | 38412/69825 [00:10<00:08, 3850.14ex/s]

 56%|█████▌    | 38841/69825 [00:10<00:07, 3976.67ex/s]

 56%|█████▌    | 39240/69825 [00:10<00:07, 3897.01ex/s]

 57%|█████▋    | 39661/69825 [00:10<00:07, 3987.98ex/s]

 57%|█████▋    | 40061/69825 [00:10<00:07, 3923.97ex/s]

 58%|█████▊    | 40473/69825 [00:10<00:07, 3979.61ex/s]

 59%|█████▊    | 40910/69825 [00:11<00:07, 4093.94ex/s]

 59%|█████▉    | 41321/69825 [00:11<00:07, 4025.07ex/s]

 60%|█████▉    | 41749/69825 [00:11<00:06, 4098.48ex/s]

 60%|██████    | 42160/69825 [00:11<00:06, 3978.08ex/s]

 61%|██████    | 42581/69825 [00:11<00:06, 4045.10ex/s]

 62%|██████▏   | 42987/69825 [00:11<00:06, 4023.42ex/s]

 62%|██████▏   | 43391/69825 [00:11<00:07, 3659.51ex/s]

 63%|██████▎   | 43764/69825 [00:11<00:08, 3195.80ex/s]

 63%|██████▎   | 44100/69825 [00:11<00:07, 3236.45ex/s]

 64%|██████▍   | 44531/69825 [00:12<00:07, 3521.91ex/s]

 64%|██████▍   | 44969/69825 [00:12<00:06, 3757.74ex/s]

 65%|██████▍   | 45355/69825 [00:12<00:06, 3753.33ex/s]

 66%|██████▌   | 45780/69825 [00:12<00:06, 3893.92ex/s]

 66%|██████▌   | 46175/69825 [00:12<00:06, 3847.83ex/s]

 67%|██████▋   | 46603/69825 [00:12<00:05, 3971.28ex/s]

 67%|██████▋   | 47004/69825 [00:12<00:05, 3931.68ex/s]

 68%|██████▊   | 47432/69825 [00:12<00:05, 4031.75ex/s]

 69%|██████▊   | 47865/69825 [00:12<00:05, 4118.46ex/s]

 69%|██████▉   | 48279/69825 [00:12<00:05, 4006.71ex/s]

 70%|██████▉   | 48713/69825 [00:13<00:05, 4101.02ex/s]

 70%|███████   | 49125/69825 [00:13<00:05, 3967.43ex/s]

 71%|███████   | 49553/69825 [00:13<00:04, 4056.80ex/s]

 72%|███████▏  | 49988/69825 [00:13<00:04, 4141.47ex/s]

 72%|███████▏  | 50404/69825 [00:13<00:04, 4059.76ex/s]

 73%|███████▎  | 50838/69825 [00:13<00:04, 4141.07ex/s]

 73%|███████▎  | 51254/69825 [00:13<00:04, 4062.55ex/s]

 74%|███████▍  | 51699/69825 [00:13<00:04, 4173.03ex/s]

 75%|███████▍  | 52118/69825 [00:13<00:04, 4089.15ex/s]

 75%|███████▌  | 52559/69825 [00:13<00:04, 4182.53ex/s]

 76%|███████▌  | 53000/69825 [00:14<00:04, 4120.79ex/s]

 77%|███████▋  | 53440/69825 [00:14<00:03, 4200.92ex/s]

 77%|███████▋  | 53880/69825 [00:14<00:03, 4258.64ex/s]

 78%|███████▊  | 54307/69825 [00:14<00:03, 4155.79ex/s]

 78%|███████▊  | 54746/69825 [00:14<00:03, 4222.83ex/s]

 79%|███████▉  | 55170/69825 [00:14<00:03, 4130.18ex/s]

 80%|███████▉  | 55601/69825 [00:14<00:03, 4181.82ex/s]

 80%|████████  | 56020/69825 [00:14<00:03, 4083.76ex/s]

 81%|████████  | 56453/69825 [00:14<00:03, 4154.60ex/s]

 81%|████████▏ | 56879/69825 [00:15<00:03, 4185.12ex/s]

 82%|████████▏ | 57299/69825 [00:15<00:03, 4068.10ex/s]

 83%|████████▎ | 57734/69825 [00:15<00:02, 4149.49ex/s]

 83%|████████▎ | 58150/69825 [00:15<00:02, 4068.03ex/s]

 84%|████████▍ | 58587/69825 [00:15<00:02, 4155.59ex/s]

 85%|████████▍ | 59004/69825 [00:15<00:02, 4082.52ex/s]

 85%|████████▌ | 59444/69825 [00:15<00:02, 4173.68ex/s]

 86%|████████▌ | 59882/69825 [00:15<00:02, 4231.33ex/s]

 86%|████████▋ | 60306/69825 [00:15<00:02, 4130.78ex/s]

 87%|████████▋ | 60743/69825 [00:15<00:02, 4198.62ex/s]

 88%|████████▊ | 61164/69825 [00:16<00:02, 4096.62ex/s]

 88%|████████▊ | 61600/69825 [00:16<00:01, 4172.05ex/s]

 89%|████████▉ | 62019/69825 [00:16<00:01, 4091.25ex/s]

 89%|████████▉ | 62456/69825 [00:16<00:01, 4170.14ex/s]

 90%|█████████ | 62897/69825 [00:16<00:01, 4239.14ex/s]

 91%|█████████ | 63322/69825 [00:16<00:01, 4146.11ex/s]

 91%|█████████▏| 63761/69825 [00:16<00:01, 4214.25ex/s]

 92%|█████████▏| 64184/69825 [00:16<00:01, 4117.62ex/s]

 93%|█████████▎| 64625/69825 [00:16<00:01, 4201.19ex/s]

 93%|█████████▎| 65046/69825 [00:16<00:01, 4136.97ex/s]

 94%|█████████▍| 65478/69825 [00:17<00:01, 4189.99ex/s]

 94%|█████████▍| 65901/69825 [00:17<00:00, 4200.81ex/s]

 95%|█████████▍| 66322/69825 [00:17<00:00, 4079.48ex/s]

 96%|█████████▌| 66759/69825 [00:17<00:00, 4161.30ex/s]

 96%|█████████▌| 67177/69825 [00:17<00:00, 4071.03ex/s]

 97%|█████████▋| 67608/69825 [00:17<00:00, 4138.87ex/s]

 97%|█████████▋| 68023/69825 [00:17<00:00, 4012.41ex/s]

 98%|█████████▊| 68454/69825 [00:17<00:00, 4098.20ex/s]

 99%|█████████▊| 68884/69825 [00:17<00:00, 4155.41ex/s]

 99%|█████████▉| 69301/69825 [00:18<00:00, 4064.00ex/s]

100%|█████████▉| 69738/69825 [00:18<00:00, 4149.94ex/s]

100%|██████████| 69825/69825 [00:18<00:00, 3850.10ex/s]




In [20]:
len(train_dataset), len(eval_dataset)

(144825, 69825)

In [21]:
k = random.randrange(len(train_dataset))
tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k]

('[CLS] 신기하네요오홋 [SEP] 제품 전체 # 인지도 [SEP]', 1)

# Load Trainer

In [22]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [23]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [24]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [25]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 144825


  Num Epochs = 15


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 128


  Gradient Accumulation steps = 1


  Total optimization steps = 16980


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.0894,0.069027,0.97647,0.691571,0.987768,0.83967,0.97647
2,0.0682,0.047744,0.982413,0.760996,0.990871,0.875934,0.982413
3,0.0511,0.031315,0.988557,0.861597,0.994032,0.927814,0.988557
4,0.0386,0.022881,0.991651,0.899465,0.995644,0.947555,0.991651
5,0.0283,0.014987,0.995446,0.946338,0.997622,0.97198,0.995446
6,0.0217,0.012261,0.996205,0.954569,0.99802,0.976294,0.996205
7,0.0164,0.006307,0.998067,0.977466,0.99899,0.988228,0.998067
8,0.0119,0.004425,0.998697,0.984785,0.999319,0.992052,0.998697
9,0.008,0.002809,0.999141,0.98997,0.999551,0.994761,0.999141
10,0.0059,0.001517,0.999513,0.994326,0.999746,0.997036,0.999513


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1132


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1132/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1132/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1132/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1132/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2264


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2264/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2264/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2264/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2264/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1132] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3396


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3396/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3396/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3396/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3396/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2264] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4528


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4528/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4528/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4528/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4528/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3396] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-5660


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-5660/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-5660/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-5660/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-5660/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4528] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-6792


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-6792/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-6792/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-6792/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-6792/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-5660] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-7924


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-7924/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-7924/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-7924/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-7924/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-6792] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-9056


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-9056/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-9056/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-9056/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-9056/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-7924] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-10188


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-10188/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-10188/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-10188/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-10188/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-9056] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-11320


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-11320/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-11320/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-11320/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-11320/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-10188] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-12452


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-12452/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-12452/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-12452/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-12452/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-11320] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-13584


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-13584/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-13584/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-13584/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-13584/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-12452] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-14716


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-14716/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-14716/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-14716/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-14716/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-13584] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-15848


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-15848/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-15848/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-15848/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-15848/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-14716] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: entity_property, sentence_form, id. If entity_property, sentence_form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-16980


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-16980/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-16980/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-16980/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-16980/special_tokens_map.json




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-15848 (score: 0.9992164546935713).


Saving model checkpoint to /tmp/tmptfubyr5h


Configuration saved in /tmp/tmptfubyr5h/config.json


Model weights saved in /tmp/tmptfubyr5h/pytorch_model.bin


tokenizer config file saved in /tmp/tmptfubyr5h/tokenizer_config.json


Special tokens file saved in /tmp/tmptfubyr5h/special_tokens_map.json


0,1
eval/accuracy,▁▃▅▆▇▇▇████████
eval/f1_false,▁▃▅▆▇▇▇████████
eval/f1_macro,▁▃▅▆▇▇█████████
eval/f1_micro,▁▃▅▆▇▇▇████████
eval/f1_true,▁▃▅▆▇▇█████████
eval/loss,█▆▄▃▂▂▂▁▁▁▁▁▁▁▁
eval/runtime,▄█▅▆▄▆▅▁▅▇▆▅▆██
eval/samples_per_second,▅▁▃▃▅▃▄█▄▂▃▄▃▁▁
eval/steps_per_second,▅▁▄▃▅▃▄█▄▂▃▄▃▁▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
eval/accuracy,0.99987
eval/f1_false,0.99993
eval/f1_macro,0.99922
eval/f1_micro,0.99987
eval/f1_true,0.9985
eval/loss,0.00051
eval/runtime,210.0723
eval/samples_per_second,332.385
eval/steps_per_second,2.599
train/epoch,15.0


In [26]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
