# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v5'

DATA_V = 'uncleaned_v5'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'snunlp/KR-ELECTRA-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/snunlp_kr_electra_discriminator_uncleaned_v5/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/uncleaned_v5/ce_train.csv exists.
./dataset/uncleaned_v5/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 10
batch_size = 25 * 2
gradient_accumulation_steps = 1

optim = 'adamw_torch' # 'adamw_hf'

learning_rate = 3e-6 / 8 * batch_size * 4 # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'cosine'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='eval_loss'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 5

# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at snunlp/KR-ELECTRA-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = f'./dataset/{DATA_V}/raw_train.csv'
dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
test_path = f'./dataset/{DATA_V}/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

### new
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]
special_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

tokens2add = special_tokens + emojis

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))
tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame().drop_duplicates()
tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
new_tokens = set(list(new_tokenizer.vocab.keys()) + tokens2add) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(new_tokenizer))
print(len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

30000





3060
30117


Embedding(30117, 768)

In [10]:
print(len(new_tokens))
print(new_tokens)

117
{'🍼', '&tel-num&', '⁉️', '##ᵕ', 'ᴠ', '☺️', '##ɴ', 'ᴛ', '##💇', '##🤡', 'ᴜ', '##❔', '💆\u200d♀️', '👌🏻', 'ʜ', '##쨕', '##ꈍ', '👋🏻', '##ɢ', '##👠', '&name&', '❣️', '🧚\u200d♀️', 'ᴘ', '##➕', '🤡', '##ᴠ', 'ɪ', '##ᴍ', '뿤', 'ɢ', '☝️', '##ɪ', '👨\u200d👧', '##🚗', '읒', '✌️', '💇🏼\u200d♀️', '##🕸', '🙌🏻', 'ꈍ', '‼️', '##뜌', '##🥤', '🙋🏻', '##ᴜ', '&card-num&', '🕺', 'ˇ', '쫜', '〰️', 'ʀ', '💇', '&num&', '😯', '🕷', '챳', '🍷', '✔️', '㉦', '🙋\u200d♀️', 'ᴍ', '👉🏻', '👩\u200d👦', '😺', '##ˇ', '뜌', 'ғ', '❤️', '💬', '🙆\u200d♂️', '🥤', '##ʀ', '🙏🏻', '##쫜', '##읒', '💆', '##㉦', 'ᵕ', '☝🏻', '죱', '💆🏻\u200d♀️', '♥️', '&online-account&', '🚗', '##ᴛ', '🐄', '❔', '&social-security-num&', '🏃\u200d♀️', '🙆🏻', '➕', 'ᴡ', '💄', '◍', '💪🏻', '쨕', '🤘🏻', '##죱', '##🕷', '##◍', '✌🏻', '&bank-account&', '👏🏻', '🙋🏻\u200d♀️', '&affiliation&', '⏰', '쓩', '🕸', '##💄', '👠', '##💆', '💡', '##ᴡ', '👦🏼', '##ᴘ', 'ɴ'}


In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [12]:
# entity_property_pair = [
#     '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
#     '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
#     '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
#     '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
# ]
# polarity_id_to_name = ['positive', 'negative', 'neutral']
# tokenizer_tester = []
# for pair in entity_property_pair:
#     for polarity in polarity_id_to_name:
#         tokenizer_tester.append('#'.join([pair, polarity]))
# for e in tokenizer_tester:
#     print(tokenizer.decode(tokenizer.encode(e)))
# for e in tokenizer_tester:
#     print(tokenizer.encode(e))

# Define Metric

In [13]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [14]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [15]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [16]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/75000 [00:00<?, ?ex/s]

  0%|          | 204/75000 [00:00<00:36, 2035.63ex/s]

  1%|          | 559/75000 [00:00<00:25, 2924.76ex/s]

  1%|          | 870/75000 [00:00<00:24, 3008.21ex/s]

  2%|▏         | 1171/75000 [00:00<00:47, 1547.51ex/s]

  2%|▏         | 1498/75000 [00:00<00:38, 1933.44ex/s]

  2%|▏         | 1834/75000 [00:00<00:32, 2278.74ex/s]

  3%|▎         | 2118/75000 [00:00<00:30, 2390.28ex/s]

  3%|▎         | 2416/75000 [00:01<00:28, 2544.97ex/s]

  4%|▎         | 2728/75000 [00:01<00:26, 2703.18ex/s]

  4%|▍         | 3022/75000 [00:01<00:28, 2497.53ex/s]

  4%|▍         | 3341/75000 [00:01<00:26, 2679.61ex/s]

  5%|▍         | 3682/75000 [00:01<00:24, 2880.06ex/s]

  5%|▌         | 4000/75000 [00:01<00:25, 2811.39ex/s]

  6%|▌         | 4308/75000 [00:01<00:24, 2883.26ex/s]

  6%|▌         | 4616/75000 [00:01<00:23, 2938.81ex/s]

  7%|▋         | 4934/75000 [00:01<00:23, 3006.86ex/s]

  7%|▋         | 5239/75000 [00:02<00:24, 2803.76ex/s]

  7%|▋         | 5577/75000 [00:02<00:23, 2963.79ex/s]

  8%|▊         | 5894/75000 [00:02<00:22, 3020.19ex/s]

  8%|▊         | 6200/75000 [00:02<00:23, 2929.85ex/s]

  9%|▊         | 6533/75000 [00:02<00:22, 3041.92ex/s]

  9%|▉         | 6850/75000 [00:02<00:22, 3076.12ex/s]

 10%|▉         | 7160/75000 [00:02<00:24, 2806.73ex/s]

 10%|▉         | 7468/75000 [00:02<00:23, 2880.99ex/s]

 10%|█         | 7814/75000 [00:02<00:22, 3043.99ex/s]

 11%|█         | 8123/75000 [00:02<00:23, 2882.38ex/s]

 11%|█         | 8433/75000 [00:03<00:22, 2941.89ex/s]

 12%|█▏        | 8760/75000 [00:03<00:21, 3033.48ex/s]

 12%|█▏        | 9067/75000 [00:03<00:23, 2835.02ex/s]

 12%|█▏        | 9356/75000 [00:03<00:23, 2849.43ex/s]

 13%|█▎        | 9676/75000 [00:03<00:22, 2947.56ex/s]

 13%|█▎        | 10000/75000 [00:03<00:22, 2895.68ex/s]

 14%|█▍        | 10332/75000 [00:03<00:21, 3014.15ex/s]

 14%|█▍        | 10684/75000 [00:03<00:20, 3159.26ex/s]

 15%|█▍        | 11003/75000 [00:03<00:21, 2978.11ex/s]

 15%|█▌        | 11306/75000 [00:04<00:21, 2991.33ex/s]

 15%|█▌        | 11608/75000 [00:04<00:24, 2639.86ex/s]

 16%|█▌        | 11893/75000 [00:04<00:23, 2694.93ex/s]

 16%|█▌        | 12170/75000 [00:04<00:23, 2636.70ex/s]

 17%|█▋        | 12458/75000 [00:04<00:23, 2702.35ex/s]

 17%|█▋        | 12738/75000 [00:04<00:22, 2729.26ex/s]

 17%|█▋        | 13014/75000 [00:04<00:24, 2548.10ex/s]

 18%|█▊        | 13313/75000 [00:04<00:23, 2670.09ex/s]

 18%|█▊        | 13621/75000 [00:04<00:22, 2783.29ex/s]

 19%|█▊        | 13960/75000 [00:05<00:20, 2956.10ex/s]

 19%|█▉        | 14259/75000 [00:05<00:22, 2733.28ex/s]

 19%|█▉        | 14580/75000 [00:05<00:21, 2864.80ex/s]

 20%|█▉        | 14936/75000 [00:05<00:19, 3059.53ex/s]

 20%|██        | 15247/75000 [00:05<00:22, 2704.16ex/s]

 21%|██        | 15541/75000 [00:05<00:21, 2765.56ex/s]

 21%|██        | 15866/75000 [00:05<00:20, 2898.05ex/s]

 22%|██▏       | 16163/75000 [00:05<00:21, 2780.15ex/s]

 22%|██▏       | 16451/75000 [00:05<00:20, 2806.83ex/s]

 22%|██▏       | 16752/75000 [00:06<00:20, 2863.62ex/s]

 23%|██▎       | 17042/75000 [00:06<00:21, 2642.88ex/s]

 23%|██▎       | 17329/75000 [00:06<00:21, 2703.77ex/s]

 24%|██▎       | 17632/75000 [00:06<00:20, 2793.39ex/s]

 24%|██▍       | 17958/75000 [00:06<00:19, 2925.35ex/s]

 24%|██▍       | 18254/75000 [00:06<00:20, 2747.57ex/s]

 25%|██▍       | 18535/75000 [00:06<00:20, 2763.05ex/s]

 25%|██▌       | 18847/75000 [00:06<00:19, 2863.69ex/s]

 26%|██▌       | 19136/75000 [00:06<00:20, 2693.65ex/s]

 26%|██▌       | 19433/75000 [00:07<00:20, 2768.94ex/s]

 26%|██▋       | 19774/75000 [00:07<00:18, 2950.01ex/s]

 27%|██▋       | 20072/75000 [00:07<00:19, 2798.06ex/s]

 27%|██▋       | 20401/75000 [00:07<00:18, 2935.48ex/s]

 28%|██▊       | 20712/75000 [00:07<00:18, 2983.44ex/s]

 28%|██▊       | 21013/75000 [00:07<00:19, 2789.28ex/s]

 28%|██▊       | 21340/75000 [00:07<00:18, 2919.25ex/s]

 29%|██▉       | 21685/75000 [00:07<00:17, 3069.01ex/s]

 29%|██▉       | 21996/75000 [00:07<00:17, 3061.26ex/s]

 30%|██▉       | 22305/75000 [00:08<00:18, 2786.09ex/s]

 30%|███       | 22612/75000 [00:08<00:18, 2863.35ex/s]

 31%|███       | 22944/75000 [00:08<00:17, 2989.87ex/s]

 31%|███       | 23248/75000 [00:08<00:18, 2814.30ex/s]

 31%|███▏      | 23540/75000 [00:08<00:18, 2841.53ex/s]

 32%|███▏      | 23887/75000 [00:08<00:16, 3017.94ex/s]

 32%|███▏      | 24193/75000 [00:08<00:17, 2867.87ex/s]

 33%|███▎      | 24511/75000 [00:08<00:17, 2950.29ex/s]

 33%|███▎      | 24824/75000 [00:08<00:16, 2999.08ex/s]

 34%|███▎      | 25127/75000 [00:08<00:18, 2750.16ex/s]

 34%|███▍      | 25454/75000 [00:09<00:17, 2892.12ex/s]

 34%|███▍      | 25801/75000 [00:09<00:16, 3053.20ex/s]

 35%|███▍      | 26111/75000 [00:09<00:17, 2825.18ex/s]

 35%|███▌      | 26419/75000 [00:09<00:16, 2894.28ex/s]

 36%|███▌      | 26725/75000 [00:09<00:16, 2940.45ex/s]

 36%|███▌      | 27023/75000 [00:09<00:18, 2611.23ex/s]

 36%|███▋      | 27358/75000 [00:09<00:16, 2805.96ex/s]

 37%|███▋      | 27675/75000 [00:09<00:16, 2904.66ex/s]

 37%|███▋      | 28000/75000 [00:09<00:16, 2821.71ex/s]

 38%|███▊      | 28324/75000 [00:10<00:15, 2935.18ex/s]

 38%|███▊      | 28649/75000 [00:10<00:15, 3023.29ex/s]

 39%|███▊      | 28984/75000 [00:10<00:14, 3116.97ex/s]

 39%|███▉      | 29299/75000 [00:10<00:15, 2888.51ex/s]

 40%|███▉      | 29628/75000 [00:10<00:15, 2997.48ex/s]

 40%|███▉      | 29979/75000 [00:10<00:14, 3142.70ex/s]

 40%|████      | 30298/75000 [00:10<00:15, 2874.08ex/s]

 41%|████      | 30620/75000 [00:10<00:14, 2967.07ex/s]

 41%|████      | 30929/75000 [00:10<00:14, 3000.31ex/s]

 42%|████▏     | 31234/75000 [00:11<00:15, 2807.29ex/s]

 42%|████▏     | 31576/75000 [00:11<00:14, 2974.03ex/s]

 43%|████▎     | 31907/75000 [00:11<00:14, 3068.55ex/s]

 43%|████▎     | 32218/75000 [00:11<00:15, 2802.98ex/s]

 43%|████▎     | 32506/75000 [00:11<00:15, 2776.04ex/s]

 44%|████▎     | 32806/75000 [00:11<00:14, 2837.73ex/s]

 44%|████▍     | 33094/75000 [00:11<00:16, 2609.11ex/s]

 45%|████▍     | 33392/75000 [00:11<00:15, 2708.40ex/s]

 45%|████▍     | 33730/75000 [00:11<00:14, 2894.42ex/s]

 45%|████▌     | 34025/75000 [00:12<00:15, 2725.73ex/s]

 46%|████▌     | 34338/75000 [00:12<00:14, 2834.49ex/s]

 46%|████▌     | 34645/75000 [00:12<00:13, 2900.32ex/s]

 47%|████▋     | 34988/75000 [00:12<00:13, 3052.08ex/s]

 47%|████▋     | 35297/75000 [00:12<00:14, 2823.75ex/s]

 48%|████▊     | 35648/75000 [00:12<00:13, 3013.28ex/s]

 48%|████▊     | 35991/75000 [00:12<00:12, 3130.11ex/s]

 48%|████▊     | 36309/75000 [00:12<00:13, 2804.81ex/s]

 49%|████▉     | 36619/75000 [00:12<00:13, 2882.50ex/s]

 49%|████▉     | 36923/75000 [00:13<00:13, 2925.13ex/s]

 50%|████▉     | 37221/75000 [00:13<00:13, 2786.28ex/s]

 50%|█████     | 37505/75000 [00:13<00:14, 2658.76ex/s]

 50%|█████     | 37814/75000 [00:13<00:13, 2775.94ex/s]

 51%|█████     | 38096/75000 [00:13<00:13, 2743.81ex/s]

 51%|█████     | 38423/75000 [00:13<00:12, 2890.50ex/s]

 52%|█████▏    | 38719/75000 [00:13<00:12, 2909.08ex/s]

 52%|█████▏    | 39012/75000 [00:13<00:13, 2668.37ex/s]

 52%|█████▏    | 39323/75000 [00:13<00:12, 2788.07ex/s]

 53%|█████▎    | 39645/75000 [00:14<00:12, 2908.41ex/s]

 53%|█████▎    | 39983/75000 [00:14<00:11, 3043.26ex/s]

 54%|█████▎    | 40291/75000 [00:14<00:12, 2823.22ex/s]

 54%|█████▍    | 40582/75000 [00:14<00:12, 2846.04ex/s]

 55%|█████▍    | 40900/75000 [00:14<00:11, 2939.40ex/s]

 55%|█████▍    | 41198/75000 [00:14<00:12, 2764.19ex/s]

 55%|█████▌    | 41489/75000 [00:14<00:11, 2804.43ex/s]

 56%|█████▌    | 41828/75000 [00:14<00:11, 2969.26ex/s]

 56%|█████▌    | 42128/75000 [00:14<00:11, 2810.46ex/s]

 57%|█████▋    | 42448/75000 [00:14<00:11, 2918.91ex/s]

 57%|█████▋    | 42765/75000 [00:15<00:10, 2989.64ex/s]

 57%|█████▋    | 43067/75000 [00:15<00:12, 2645.24ex/s]

 58%|█████▊    | 43410/75000 [00:15<00:11, 2853.66ex/s]

 58%|█████▊    | 43769/75000 [00:15<00:10, 3055.27ex/s]

 59%|█████▉    | 44083/75000 [00:15<00:10, 2829.24ex/s]

 59%|█████▉    | 44393/75000 [00:15<00:10, 2900.65ex/s]

 60%|█████▉    | 44709/75000 [00:15<00:10, 2970.31ex/s]

 60%|██████    | 45012/75000 [00:15<00:11, 2645.47ex/s]

 60%|██████    | 45331/75000 [00:16<00:10, 2789.12ex/s]

 61%|██████    | 45659/75000 [00:16<00:10, 2922.15ex/s]

 61%|██████▏   | 45997/75000 [00:16<00:09, 3050.25ex/s]

 62%|██████▏   | 46308/75000 [00:16<00:09, 2880.63ex/s]

 62%|██████▏   | 46630/75000 [00:16<00:09, 2974.10ex/s]

 63%|██████▎   | 46959/75000 [00:16<00:09, 3062.05ex/s]

 63%|██████▎   | 47269/75000 [00:16<00:09, 2885.40ex/s]

 63%|██████▎   | 47616/75000 [00:16<00:08, 3048.34ex/s]

 64%|██████▍   | 47961/75000 [00:16<00:08, 3162.59ex/s]

 64%|██████▍   | 48281/75000 [00:16<00:09, 2932.96ex/s]

 65%|██████▍   | 48603/75000 [00:17<00:08, 3010.33ex/s]

 65%|██████▌   | 48919/75000 [00:17<00:08, 3051.41ex/s]

 66%|██████▌   | 49228/75000 [00:17<00:09, 2831.35ex/s]

 66%|██████▌   | 49574/75000 [00:17<00:08, 3003.26ex/s]

 67%|██████▋   | 49914/75000 [00:17<00:08, 3114.74ex/s]

 67%|██████▋   | 50230/75000 [00:17<00:08, 2920.99ex/s]

 67%|██████▋   | 50541/75000 [00:17<00:08, 2971.80ex/s]

 68%|██████▊   | 50901/75000 [00:17<00:07, 3149.84ex/s]

 68%|██████▊   | 51220/75000 [00:17<00:08, 2963.60ex/s]

 69%|██████▉   | 51568/75000 [00:18<00:07, 3106.57ex/s]

 69%|██████▉   | 51925/75000 [00:18<00:07, 3236.29ex/s]

 70%|██████▉   | 52253/75000 [00:18<00:07, 3048.92ex/s]

 70%|███████   | 52573/75000 [00:18<00:07, 3088.80ex/s]

 71%|███████   | 52886/75000 [00:18<00:07, 3084.93ex/s]

 71%|███████   | 53197/75000 [00:18<00:07, 2873.32ex/s]

 71%|███████▏  | 53514/75000 [00:18<00:07, 2954.58ex/s]

 72%|███████▏  | 53854/75000 [00:18<00:06, 3080.71ex/s]

 72%|███████▏  | 54166/75000 [00:18<00:07, 2658.13ex/s]

 73%|███████▎  | 54493/75000 [00:19<00:07, 2817.32ex/s]

 73%|███████▎  | 54810/75000 [00:19<00:06, 2910.63ex/s]

 73%|███████▎  | 55110/75000 [00:19<00:07, 2723.21ex/s]

 74%|███████▍  | 55433/75000 [00:19<00:06, 2858.57ex/s]

 74%|███████▍  | 55777/75000 [00:19<00:06, 3018.65ex/s]

 75%|███████▍  | 56085/75000 [00:19<00:06, 2877.54ex/s]

 75%|███████▌  | 56427/75000 [00:19<00:06, 3027.20ex/s]

 76%|███████▌  | 56735/75000 [00:19<00:06, 2979.19ex/s]

 76%|███████▌  | 57037/75000 [00:19<00:06, 2626.47ex/s]

 76%|███████▋  | 57348/75000 [00:20<00:06, 2752.71ex/s]

 77%|███████▋  | 57679/75000 [00:20<00:05, 2900.90ex/s]

 77%|███████▋  | 58000/75000 [00:20<00:06, 2799.48ex/s]

 78%|███████▊  | 58286/75000 [00:20<00:06, 2715.74ex/s]

 78%|███████▊  | 58591/75000 [00:20<00:05, 2805.64ex/s]

 79%|███████▊  | 58911/75000 [00:20<00:05, 2914.96ex/s]

 79%|███████▉  | 59206/75000 [00:20<00:05, 2723.71ex/s]

 79%|███████▉  | 59524/75000 [00:20<00:05, 2848.81ex/s]

 80%|███████▉  | 59814/75000 [00:20<00:05, 2615.13ex/s]

 80%|████████  | 60082/75000 [00:21<00:06, 2391.04ex/s]

 80%|████████  | 60371/75000 [00:21<00:05, 2519.38ex/s]

 81%|████████  | 60664/75000 [00:21<00:05, 2627.92ex/s]

 81%|████████▏ | 60960/75000 [00:21<00:05, 2719.16ex/s]

 82%|████████▏ | 61237/75000 [00:21<00:06, 2238.85ex/s]

 82%|████████▏ | 61573/75000 [00:21<00:05, 2518.60ex/s]

 82%|████████▏ | 61863/75000 [00:21<00:05, 2618.77ex/s]

 83%|████████▎ | 62139/75000 [00:21<00:05, 2490.46ex/s]

 83%|████████▎ | 62428/75000 [00:22<00:04, 2596.18ex/s]

 84%|████████▎ | 62705/75000 [00:22<00:04, 2641.87ex/s]

 84%|████████▍ | 62983/75000 [00:22<00:04, 2679.65ex/s]

 84%|████████▍ | 63256/75000 [00:22<00:04, 2449.28ex/s]

 85%|████████▍ | 63552/75000 [00:22<00:04, 2587.72ex/s]

 85%|████████▌ | 63846/75000 [00:22<00:04, 2683.58ex/s]

 85%|████████▌ | 64120/75000 [00:22<00:04, 2446.09ex/s]

 86%|████████▌ | 64402/75000 [00:22<00:04, 2544.27ex/s]

 86%|████████▋ | 64694/75000 [00:22<00:03, 2647.54ex/s]

 87%|████████▋ | 64989/75000 [00:22<00:03, 2730.86ex/s]

 87%|████████▋ | 65267/75000 [00:23<00:03, 2624.82ex/s]

 87%|████████▋ | 65597/75000 [00:23<00:03, 2813.14ex/s]

 88%|████████▊ | 65926/75000 [00:23<00:03, 2949.23ex/s]

 88%|████████▊ | 66224/75000 [00:23<00:03, 2773.13ex/s]

 89%|████████▊ | 66531/75000 [00:23<00:02, 2854.39ex/s]

 89%|████████▉ | 66844/75000 [00:23<00:02, 2931.36ex/s]

 90%|████████▉ | 67140/75000 [00:23<00:02, 2727.26ex/s]

 90%|████████▉ | 67489/75000 [00:23<00:02, 2938.62ex/s]

 90%|█████████ | 67833/75000 [00:23<00:02, 3080.79ex/s]

 91%|█████████ | 68146/75000 [00:24<00:02, 2903.62ex/s]

 91%|█████████▏| 68441/75000 [00:24<00:03, 1911.87ex/s]

 92%|█████████▏| 68748/75000 [00:24<00:02, 2150.15ex/s]

 92%|█████████▏| 69006/75000 [00:24<00:02, 2225.11ex/s]

 92%|█████████▏| 69337/75000 [00:24<00:02, 2489.59ex/s]

 93%|█████████▎| 69662/75000 [00:24<00:01, 2684.40ex/s]

 93%|█████████▎| 69981/75000 [00:24<00:01, 2818.33ex/s]

 94%|█████████▎| 70281/75000 [00:25<00:01, 2634.10ex/s]

 94%|█████████▍| 70580/75000 [00:25<00:01, 2728.86ex/s]

 95%|█████████▍| 70903/75000 [00:25<00:01, 2867.11ex/s]

 95%|█████████▍| 71199/75000 [00:25<00:01, 2820.85ex/s]

 95%|█████████▌| 71519/75000 [00:25<00:01, 2926.73ex/s]

 96%|█████████▌| 71844/75000 [00:25<00:01, 3019.43ex/s]

 96%|█████████▌| 72150/75000 [00:25<00:01, 2659.80ex/s]

 97%|█████████▋| 72446/75000 [00:25<00:00, 2737.74ex/s]

 97%|█████████▋| 72728/75000 [00:25<00:00, 2700.67ex/s]

 97%|█████████▋| 73004/75000 [00:26<00:00, 2518.89ex/s]

 98%|█████████▊| 73307/75000 [00:26<00:00, 2654.28ex/s]

 98%|█████████▊| 73597/75000 [00:26<00:00, 2721.33ex/s]

 99%|█████████▊| 73941/75000 [00:26<00:00, 2922.96ex/s]

 99%|█████████▉| 74238/75000 [00:26<00:00, 2673.84ex/s]

 99%|█████████▉| 74569/75000 [00:26<00:00, 2844.55ex/s]

100%|█████████▉| 74889/75000 [00:26<00:00, 2943.93ex/s]

100%|██████████| 75000/75000 [00:26<00:00, 2809.00ex/s]




  0%|          | 0/69825 [00:00<?, ?ex/s]

  0%|          | 284/69825 [00:00<00:24, 2825.12ex/s]

  1%|          | 607/69825 [00:00<00:22, 3059.87ex/s]

  1%|▏         | 928/69825 [00:00<00:22, 3127.44ex/s]

  2%|▏         | 1241/69825 [00:00<00:24, 2755.66ex/s]

  2%|▏         | 1551/69825 [00:00<00:23, 2867.77ex/s]

  3%|▎         | 1865/69825 [00:00<00:23, 2952.19ex/s]

  3%|▎         | 2164/69825 [00:00<00:24, 2807.94ex/s]

  4%|▎         | 2452/69825 [00:00<00:23, 2826.84ex/s]

  4%|▍         | 2752/69825 [00:00<00:23, 2877.52ex/s]

  4%|▍         | 3042/69825 [00:01<00:24, 2678.43ex/s]

  5%|▍         | 3339/69825 [00:01<00:24, 2761.13ex/s]

  5%|▌         | 3671/69825 [00:01<00:22, 2919.76ex/s]

  6%|▌         | 3991/69825 [00:01<00:21, 2999.62ex/s]

  6%|▌         | 4294/69825 [00:01<00:22, 2885.38ex/s]

  7%|▋         | 4629/69825 [00:01<00:21, 3016.18ex/s]

  7%|▋         | 4945/69825 [00:01<00:21, 3055.68ex/s]

  8%|▊         | 5253/69825 [00:01<00:23, 2784.25ex/s]

  8%|▊         | 5557/69825 [00:01<00:22, 2854.02ex/s]

  8%|▊         | 5857/69825 [00:02<00:22, 2892.45ex/s]

  9%|▉         | 6150/69825 [00:02<00:23, 2742.20ex/s]

  9%|▉         | 6486/69825 [00:02<00:21, 2912.97ex/s]

 10%|▉         | 6812/69825 [00:02<00:20, 3010.21ex/s]

 10%|█         | 7117/69825 [00:02<00:23, 2623.11ex/s]

 11%|█         | 7462/69825 [00:02<00:21, 2840.45ex/s]

 11%|█         | 7818/69825 [00:02<00:20, 3037.23ex/s]

 12%|█▏        | 8131/69825 [00:02<00:20, 2988.73ex/s]

 12%|█▏        | 8502/69825 [00:02<00:19, 3190.00ex/s]

 13%|█▎        | 8878/69825 [00:03<00:18, 3351.06ex/s]

 13%|█▎        | 9218/69825 [00:03<00:19, 3163.96ex/s]

 14%|█▎        | 9548/69825 [00:03<00:18, 3200.79ex/s]

 14%|█▍        | 9875/69825 [00:03<00:18, 3220.49ex/s]

 15%|█▍        | 10200/69825 [00:03<00:20, 2918.52ex/s]

 15%|█▌        | 10499/69825 [00:03<00:20, 2920.02ex/s]

 15%|█▌        | 10814/69825 [00:03<00:19, 2983.43ex/s]

 16%|█▌        | 11117/69825 [00:03<00:22, 2655.40ex/s]

 16%|█▋        | 11411/69825 [00:03<00:21, 2730.63ex/s]

 17%|█▋        | 11704/69825 [00:04<00:20, 2785.01ex/s]

 17%|█▋        | 12000/69825 [00:04<00:21, 2635.04ex/s]

 18%|█▊        | 12299/69825 [00:04<00:21, 2729.77ex/s]

 18%|█▊        | 12596/69825 [00:04<00:20, 2796.51ex/s]

 19%|█▊        | 12929/69825 [00:04<00:19, 2946.54ex/s]

 19%|█▉        | 13227/69825 [00:04<00:20, 2726.13ex/s]

 19%|█▉        | 13527/69825 [00:04<00:20, 2800.59ex/s]

 20%|█▉        | 13818/69825 [00:04<00:19, 2829.69ex/s]

 20%|██        | 14105/69825 [00:04<00:21, 2638.48ex/s]

 21%|██        | 14403/69825 [00:05<00:20, 2731.81ex/s]

 21%|██        | 14725/69825 [00:05<00:19, 2867.57ex/s]

 22%|██▏       | 15016/69825 [00:05<00:19, 2796.58ex/s]

 22%|██▏       | 15363/69825 [00:05<00:18, 2987.27ex/s]

 22%|██▏       | 15665/69825 [00:05<00:18, 2900.84ex/s]

 23%|██▎       | 15968/69825 [00:05<00:18, 2936.93ex/s]

 23%|██▎       | 16264/69825 [00:05<00:19, 2723.82ex/s]

 24%|██▍       | 16596/69825 [00:05<00:18, 2887.83ex/s]

 24%|██▍       | 16917/69825 [00:05<00:17, 2978.26ex/s]

 25%|██▍       | 17219/69825 [00:05<00:18, 2785.29ex/s]

 25%|██▌       | 17550/69825 [00:06<00:17, 2930.67ex/s]

 26%|██▌       | 17878/69825 [00:06<00:17, 3029.07ex/s]

 26%|██▌       | 18185/69825 [00:06<00:18, 2829.61ex/s]

 27%|██▋       | 18524/69825 [00:06<00:17, 2984.42ex/s]

 27%|██▋       | 18850/69825 [00:06<00:16, 3055.74ex/s]

 27%|██▋       | 19160/69825 [00:06<00:17, 2831.74ex/s]

 28%|██▊       | 19478/69825 [00:06<00:17, 2925.76ex/s]

 28%|██▊       | 19811/69825 [00:06<00:16, 3037.36ex/s]

 29%|██▉       | 20119/69825 [00:06<00:17, 2788.93ex/s]

 29%|██▉       | 20426/69825 [00:07<00:17, 2864.39ex/s]

 30%|██▉       | 20744/69825 [00:07<00:16, 2951.31ex/s]

 30%|███       | 21044/69825 [00:07<00:17, 2856.17ex/s]

 31%|███       | 21350/69825 [00:07<00:16, 2911.89ex/s]

 31%|███       | 21660/69825 [00:07<00:16, 2963.97ex/s]

 31%|███▏      | 21975/69825 [00:07<00:15, 3015.75ex/s]

 32%|███▏      | 22279/69825 [00:07<00:17, 2698.29ex/s]

 32%|███▏      | 22591/69825 [00:07<00:16, 2812.27ex/s]

 33%|███▎      | 22936/69825 [00:07<00:15, 2990.59ex/s]

 33%|███▎      | 23241/69825 [00:08<00:16, 2741.51ex/s]

 34%|███▎      | 23538/69825 [00:08<00:16, 2803.32ex/s]

 34%|███▍      | 23833/69825 [00:08<00:16, 2842.49ex/s]

 35%|███▍      | 24122/69825 [00:08<00:17, 2587.51ex/s]

 35%|███▍      | 24422/69825 [00:08<00:16, 2697.97ex/s]

 35%|███▌      | 24766/69825 [00:08<00:15, 2901.73ex/s]

 36%|███▌      | 25062/69825 [00:08<00:16, 2704.66ex/s]

 36%|███▋      | 25362/69825 [00:08<00:15, 2782.74ex/s]

 37%|███▋      | 25655/69825 [00:08<00:15, 2822.52ex/s]

 37%|███▋      | 25962/69825 [00:09<00:15, 2891.87ex/s]

 38%|███▊      | 26255/69825 [00:09<00:16, 2637.37ex/s]

 38%|███▊      | 26570/69825 [00:09<00:15, 2775.37ex/s]

 39%|███▊      | 26892/69825 [00:09<00:14, 2899.93ex/s]

 39%|███▉      | 27187/69825 [00:09<00:16, 2532.11ex/s]

 39%|███▉      | 27476/69825 [00:09<00:16, 2625.78ex/s]

 40%|███▉      | 27765/69825 [00:09<00:15, 2696.42ex/s]

 40%|████      | 28042/69825 [00:09<00:16, 2511.71ex/s]

 41%|████      | 28301/69825 [00:09<00:17, 2430.83ex/s]

 41%|████      | 28549/69825 [00:10<00:16, 2430.15ex/s]

 41%|████▏     | 28859/69825 [00:10<00:15, 2614.12ex/s]

 42%|████▏     | 29125/69825 [00:10<00:16, 2454.11ex/s]

 42%|████▏     | 29375/69825 [00:10<00:17, 2310.74ex/s]

 42%|████▏     | 29660/69825 [00:10<00:16, 2454.20ex/s]

 43%|████▎     | 29986/69825 [00:10<00:14, 2676.06ex/s]

 43%|████▎     | 30259/69825 [00:10<00:15, 2480.48ex/s]

 44%|████▍     | 30566/69825 [00:10<00:14, 2639.46ex/s]

 44%|████▍     | 30885/69825 [00:10<00:13, 2791.32ex/s]

 45%|████▍     | 31170/69825 [00:11<00:14, 2619.58ex/s]

 45%|████▌     | 31487/69825 [00:11<00:13, 2768.84ex/s]

 46%|████▌     | 31796/69825 [00:11<00:13, 2857.69ex/s]

 46%|████▌     | 32086/69825 [00:11<00:13, 2777.80ex/s]

 46%|████▋     | 32408/69825 [00:11<00:12, 2901.62ex/s]

 47%|████▋     | 32717/69825 [00:11<00:12, 2954.60ex/s]

 47%|████▋     | 33015/69825 [00:11<00:13, 2754.77ex/s]

 48%|████▊     | 33330/69825 [00:11<00:12, 2864.31ex/s]

 48%|████▊     | 33649/69825 [00:11<00:12, 2956.13ex/s]

 49%|████▊     | 33976/69825 [00:12<00:11, 3046.51ex/s]

 49%|████▉     | 34284/69825 [00:12<00:12, 2896.64ex/s]

 50%|████▉     | 34594/69825 [00:12<00:11, 2952.43ex/s]

 50%|████▉     | 34895/69825 [00:12<00:11, 2968.07ex/s]

 50%|█████     | 35194/69825 [00:12<00:12, 2711.37ex/s]

 51%|█████     | 35502/69825 [00:12<00:12, 2811.67ex/s]

 51%|█████▏    | 35788/69825 [00:12<00:12, 2650.06ex/s]

 52%|█████▏    | 36058/69825 [00:12<00:13, 2432.53ex/s]

 52%|█████▏    | 36351/69825 [00:12<00:13, 2563.17ex/s]

 53%|█████▎    | 36675/69825 [00:13<00:12, 2747.66ex/s]

 53%|█████▎    | 36998/69825 [00:13<00:11, 2882.03ex/s]

 53%|█████▎    | 37292/69825 [00:13<00:12, 2706.75ex/s]

 54%|█████▍    | 37628/69825 [00:13<00:11, 2886.88ex/s]

 54%|█████▍    | 37981/69825 [00:13<00:10, 3068.69ex/s]

 55%|█████▍    | 38293/69825 [00:13<00:10, 2931.45ex/s]

 55%|█████▌    | 38620/69825 [00:13<00:10, 3026.22ex/s]

 56%|█████▌    | 38946/69825 [00:13<00:09, 3091.02ex/s]

 56%|█████▌    | 39258/69825 [00:13<00:10, 2890.15ex/s]

 57%|█████▋    | 39563/69825 [00:13<00:10, 2931.45ex/s]

 57%|█████▋    | 39876/69825 [00:14<00:10, 2986.93ex/s]

 58%|█████▊    | 40178/69825 [00:14<00:10, 2884.78ex/s]

 58%|█████▊    | 40546/69825 [00:14<00:09, 3109.10ex/s]

 59%|█████▊    | 40913/69825 [00:14<00:08, 3270.31ex/s]

 59%|█████▉    | 41243/69825 [00:14<00:09, 3105.27ex/s]

 60%|█████▉    | 41567/69825 [00:14<00:08, 3142.94ex/s]

 60%|█████▉    | 41890/69825 [00:14<00:08, 3166.28ex/s]

 60%|██████    | 42209/69825 [00:14<00:09, 3004.40ex/s]

 61%|██████    | 42590/69825 [00:14<00:08, 3230.87ex/s]

 62%|██████▏   | 42971/69825 [00:15<00:07, 3397.08ex/s]

 62%|██████▏   | 43314/69825 [00:15<00:08, 3093.37ex/s]

 63%|██████▎   | 43643/69825 [00:15<00:08, 3145.25ex/s]

 63%|██████▎   | 43963/69825 [00:15<00:08, 3114.60ex/s]

 63%|██████▎   | 44279/69825 [00:15<00:08, 2984.31ex/s]

 64%|██████▍   | 44616/69825 [00:15<00:08, 3089.01ex/s]

 64%|██████▍   | 44963/69825 [00:15<00:07, 3195.05ex/s]

 65%|██████▍   | 45286/69825 [00:15<00:08, 2902.19ex/s]

 65%|██████▌   | 45598/69825 [00:15<00:08, 2960.70ex/s]

 66%|██████▌   | 45913/69825 [00:16<00:07, 3013.33ex/s]

 66%|██████▌   | 46219/69825 [00:16<00:08, 2825.39ex/s]

 67%|██████▋   | 46538/69825 [00:16<00:07, 2925.11ex/s]

 67%|██████▋   | 46872/69825 [00:16<00:07, 3041.56ex/s]

 68%|██████▊   | 47180/69825 [00:16<00:08, 2799.42ex/s]

 68%|██████▊   | 47510/69825 [00:16<00:07, 2934.52ex/s]

 68%|██████▊   | 47809/69825 [00:16<00:07, 2935.38ex/s]

 69%|██████▉   | 48107/69825 [00:16<00:07, 2721.21ex/s]

 69%|██████▉   | 48438/69825 [00:16<00:07, 2880.57ex/s]

 70%|██████▉   | 48795/69825 [00:17<00:06, 3072.55ex/s]

 70%|███████   | 49108/69825 [00:17<00:07, 2597.62ex/s]

 71%|███████   | 49404/69825 [00:17<00:07, 2689.38ex/s]

 71%|███████   | 49712/69825 [00:17<00:07, 2793.03ex/s]

 72%|███████▏  | 50002/69825 [00:17<00:07, 2673.41ex/s]

 72%|███████▏  | 50340/69825 [00:17<00:06, 2862.33ex/s]

 73%|███████▎  | 50709/69825 [00:17<00:06, 3091.97ex/s]

 73%|███████▎  | 51025/69825 [00:17<00:06, 2949.51ex/s]

 74%|███████▎  | 51348/69825 [00:17<00:06, 3027.37ex/s]

 74%|███████▍  | 51674/69825 [00:18<00:05, 3091.97ex/s]

 74%|███████▍  | 52000/69825 [00:18<00:06, 2938.92ex/s]

 75%|███████▍  | 52334/69825 [00:18<00:05, 3050.00ex/s]

 75%|███████▌  | 52663/69825 [00:18<00:05, 3117.85ex/s]

 76%|███████▌  | 53000/69825 [00:18<00:05, 2939.40ex/s]

 76%|███████▋  | 53311/69825 [00:18<00:05, 2983.30ex/s]

 77%|███████▋  | 53626/69825 [00:18<00:05, 3029.65ex/s]

 77%|███████▋  | 53965/69825 [00:18<00:05, 3131.36ex/s]

 78%|███████▊  | 54281/69825 [00:18<00:05, 2666.08ex/s]

 78%|███████▊  | 54628/69825 [00:19<00:05, 2874.02ex/s]

 79%|███████▊  | 54978/69825 [00:19<00:04, 3043.69ex/s]

 79%|███████▉  | 55293/69825 [00:19<00:05, 2871.66ex/s]

 80%|███████▉  | 55631/69825 [00:19<00:04, 3006.83ex/s]

 80%|████████  | 55940/69825 [00:19<00:04, 2969.82ex/s]

 81%|████████  | 56243/69825 [00:19<00:04, 2905.05ex/s]

 81%|████████  | 56597/69825 [00:19<00:04, 3081.40ex/s]

 82%|████████▏ | 56909/69825 [00:19<00:04, 3045.85ex/s]

 82%|████████▏ | 57217/69825 [00:19<00:04, 2841.60ex/s]

 82%|████████▏ | 57511/69825 [00:20<00:04, 2868.61ex/s]

 83%|████████▎ | 57821/69825 [00:20<00:04, 2933.40ex/s]

 83%|████████▎ | 58117/69825 [00:20<00:04, 2734.54ex/s]

 84%|████████▎ | 58412/69825 [00:20<00:04, 2793.81ex/s]

 84%|████████▍ | 58748/69825 [00:20<00:03, 2951.89ex/s]

 85%|████████▍ | 59047/69825 [00:20<00:03, 2788.86ex/s]

 85%|████████▍ | 59344/69825 [00:20<00:03, 2838.29ex/s]

 85%|████████▌ | 59634/69825 [00:20<00:03, 2854.56ex/s]

 86%|████████▌ | 59922/69825 [00:21<00:05, 1861.86ex/s]

 86%|████████▌ | 60164/69825 [00:21<00:04, 1979.97ex/s]

 87%|████████▋ | 60399/69825 [00:21<00:04, 1984.09ex/s]

 87%|████████▋ | 60712/69825 [00:21<00:04, 2260.97ex/s]

 87%|████████▋ | 61000/69825 [00:21<00:03, 2303.79ex/s]

 88%|████████▊ | 61306/69825 [00:21<00:03, 2497.03ex/s]

 88%|████████▊ | 61639/69825 [00:21<00:03, 2721.26ex/s]

 89%|████████▉ | 62000/69825 [00:21<00:02, 2805.85ex/s]

 89%|████████▉ | 62316/69825 [00:21<00:02, 2902.26ex/s]

 90%|████████▉ | 62643/69825 [00:22<00:02, 3002.87ex/s]

 90%|█████████ | 62991/69825 [00:22<00:02, 3138.69ex/s]

 91%|█████████ | 63310/69825 [00:22<00:02, 2832.30ex/s]

 91%|█████████ | 63602/69825 [00:22<00:02, 2597.12ex/s]

 92%|█████████▏| 63955/69825 [00:22<00:02, 2839.81ex/s]

 92%|█████████▏| 64249/69825 [00:22<00:02, 2712.11ex/s]

 92%|█████████▏| 64533/69825 [00:22<00:01, 2744.31ex/s]

 93%|█████████▎| 64854/69825 [00:22<00:01, 2871.86ex/s]

 93%|█████████▎| 65147/69825 [00:22<00:01, 2714.40ex/s]

 94%|█████████▍| 65472/69825 [00:23<00:01, 2859.46ex/s]

 94%|█████████▍| 65803/69825 [00:23<00:01, 2985.05ex/s]

 95%|█████████▍| 66106/69825 [00:23<00:01, 2857.76ex/s]

 95%|█████████▌| 66406/69825 [00:23<00:01, 2895.97ex/s]

 96%|█████████▌| 66708/69825 [00:23<00:01, 2930.83ex/s]

 96%|█████████▌| 67004/69825 [00:23<00:01, 2622.04ex/s]

 96%|█████████▋| 67317/69825 [00:23<00:00, 2757.09ex/s]

 97%|█████████▋| 67653/69825 [00:23<00:00, 2923.57ex/s]

 97%|█████████▋| 67992/69825 [00:23<00:00, 3054.10ex/s]

 98%|█████████▊| 68303/69825 [00:24<00:00, 2846.10ex/s]

 98%|█████████▊| 68661/69825 [00:24<00:00, 3047.41ex/s]

 99%|█████████▉| 69000/69825 [00:24<00:00, 2955.71ex/s]

 99%|█████████▉| 69336/69825 [00:24<00:00, 3064.72ex/s]

100%|█████████▉| 69647/69825 [00:24<00:00, 3035.67ex/s]

100%|██████████| 69825/69825 [00:24<00:00, 2850.33ex/s]




In [17]:
len(train_dataset), len(eval_dataset)

(75000, 69825)

In [18]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] 상품평 문장 : < < 한겨울에도 피부속부터 촉촉한 피부를 만들어주는 # 키엘 # 울트라훼이셜크림 > > [SEP] 상품평 문장의 대범주 유형은 < < 제품 전체 > > 이고 소범주 유형은 < < 다양성 > > 이다. [SEP] 1


[CLS] 상품평 문장 : < < 애정하며 # 홈케어 로 사용중인 # 플라베네 😍 > > [SEP] 상품평 문장의 대범주 유형은 < < 제품 전체 > > 이고 소범주 유형은 < < 가격 > > 이다. [SEP] 1


# Load Trainer

In [19]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [20]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [21]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [22]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 75000


  Num Epochs = 10


  Instantaneous batch size per device = 50


  Total train batch size (w. parallel, distributed & accumulation) = 200


  Gradient Accumulation steps = 1


  Total optimization steps = 3750


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.1279,0.112957,0.958195,0.435942,0.978293,0.707118,0.958195
2,0.0913,0.090364,0.965557,0.56861,0.982062,0.775336,0.965557
3,0.0629,0.09052,0.970999,0.605032,0.984947,0.794989,0.970999
4,0.084,0.078832,0.9713,0.635901,0.985061,0.810481,0.9713
5,0.0476,0.083763,0.974035,0.648439,0.98652,0.817479,0.974035
6,0.0487,0.088256,0.971214,0.647245,0.984995,0.81612,0.971214
7,0.0347,0.095637,0.970841,0.6601,0.984767,0.822434,0.970841
8,0.031,0.097916,0.972159,0.660377,0.985485,0.822931,0.972159
9,0.0465,0.104112,0.973104,0.673845,0.985974,0.829909,0.973104
10,0.0314,0.105328,0.972818,0.674107,0.985817,0.829962,0.972818


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-375


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-375/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-375/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-375/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-375/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-750/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1125


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1125/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1125/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1125/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1125/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-375] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1500


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1500/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1500/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1500/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1500/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-750] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1875


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1875/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1875/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1875/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1875/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1125] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2250


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2250/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2250/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2250/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2250/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1875] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2625


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2625/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2625/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2625/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2625/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2250] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3000


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3000/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3000/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3000/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3000/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-2625] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3375


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3375/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3375/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3375/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3375/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3750/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-3375] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from snunlp_kr_electra_discriminator_uncleaned_v5/checkpoint-1500 (score: 0.07883194833993912).


Saving model checkpoint to /tmp/tmpny1pqn7p


Configuration saved in /tmp/tmpny1pqn7p/config.json


Model weights saved in /tmp/tmpny1pqn7p/pytorch_model.bin


tokenizer config file saved in /tmp/tmpny1pqn7p/tokenizer_config.json


Special tokens file saved in /tmp/tmpny1pqn7p/special_tokens_map.json


0,1
eval/accuracy,▁▄▇▇█▇▇▇█▇
eval/f1_false,▁▄▇▇█▇▇▇█▇
eval/f1_macro,▁▅▆▇▇▇████
eval/f1_micro,▁▄▇▇█▇▇▇█▇
eval/f1_true,▁▅▆▇▇▇████
eval/loss,█▃▃▁▂▃▄▅▆▆
eval/runtime,█▃▁▅▅▄█▄▂▆
eval/samples_per_second,▁▆█▄▄▅▁▅▇▃
eval/steps_per_second,▁▆█▄▄▅▁▅▇▃
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/accuracy,0.97282
eval/f1_false,0.98582
eval/f1_macro,0.82996
eval/f1_micro,0.97282
eval/f1_true,0.67411
eval/loss,0.10533
eval/runtime,189.9684
eval/samples_per_second,367.561
eval/steps_per_second,1.842
train/epoch,10.0


In [23]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
