# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    ElectraTokenizer, ElectraForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v7'

DATA_V = 'uncleaned_v7'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'monologg/koelectra-base-v3-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/monologg_koelectra_base_v3_discriminator_uncleaned_v7/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/uncleaned_v7/ce_train.csv exists.
./dataset/uncleaned_v7/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 10
batch_size = 32
gradient_accumulation_steps = 1

optim = 'adamw_hf' # 'adamw_torch'

learning_rate = 3e-6 / 8 * batch_size * 4 # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'linear'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='eval_loss'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 50

# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = ElectraTokenizer.from_pretrained(model_checkpoint)
model = ElectraForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = f'./dataset/{DATA_V}/raw_train.csv'
dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
test_path = f'./dataset/{DATA_V}/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

### new
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]
special_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

tokens2add = special_tokens + emojis

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))
tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame().drop_duplicates()
tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
new_tokens = set(list(new_tokenizer.vocab.keys()) + tokens2add) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(new_tokenizer))
print(len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

35000





3060
35254


Embedding(35254, 768)

In [10]:
print(len(new_tokens))
print(new_tokens)

254
{'💎', '✌️', '밪', '##💯', '##ᵕ', '😮', '🙋🏻', '##👦', '👨', '##ɢ', '👌', '🕺', '뿤', '##츌', '😴', '##♬', '♥️', '옄', '➰', '##👠', '👩\u200d👦', '##ʀ', '˚', '🎂', '##ᴘ', '̵', '쨕', '😯', 'ᴍ', '🥤', 'ɢ', '💪🏻', '😲', '##➰', '##ˇ', 'ㅖ', '🍷', '##👆', 'ᴛ', '##😮', '👌🏻', '##닼', '컄', '횽', '👠', '💯', '🤟', '🎀', '닠', '##귯', '💇', '##🍎', '🤘🏻', '☝', '##뜌', 'ɪ', '㉦', '⏰', '곘', '💆', '##곘', '👋🏻', '##🖒', '👦🏼', '💇🏼\u200d♀️', '➕', '##🚗', '##🌝', '##쨕', '##👌', '👍🏻', 'ʀ', '💡', '🔸', '##띡', '##챦', 'ᴘ', '💞', 'ෆ', '##ᴛ', '##🕷', '🏃\u200d♀️', '👨\u200d👧', '##➕', '웻', '##💞', '꺠', '앝', '🍼', '👏🏻', '❔', '😺', 'ɴ', '##💋', '💄', '##ᴠ', '💬', '&affiliation&', '##🧚', '💝', '##͈', '##̵', '##듕', '##ᴇ', '🙃', 'ᵕ', '귯', '😡', '&name&', '🕸', '‼️', '챳', '˃', '͈', '##💆', '💆\u200d♀️', '💆🏻\u200d♀️', '☝🏻', 'ғ', '🙋\u200d♀️', '츌', '❣️', '♩', '##☝', '##💧', '♪', '##🕸', '##💇', '〰️', '##♩', '젔', '##❔', '🤘', '##💄', '🍰', 'ˇ', 'ღ', '##😲', '##앝', '쵝', 'ʜ', '🐱', '🙌🏻', '##잍', '😶', '🎵', '##닠', '🍎', '##🤘', '잍', '&bank-account&', '##♪', '##㉦', '##죱', '🧚', 'ᴗ', '⁉', '🌻', 

In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [12]:
# entity_property_pair = [
#     '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
#     '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
#     '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
#     '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
# ]
# polarity_id_to_name = ['positive', 'negative', 'neutral']
# tokenizer_tester = []
# for pair in entity_property_pair:
#     for polarity in polarity_id_to_name:
#         tokenizer_tester.append('#'.join([pair, polarity]))
# for e in tokenizer_tester:
#     print(tokenizer.decode(tokenizer.encode(e)))
# for e in tokenizer_tester:
#     print(tokenizer.encode(e))

# Define Metric

In [13]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [14]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [15]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [16]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/75000 [00:00<?, ?ex/s]

  0%|          | 144/75000 [00:00<00:52, 1437.25ex/s]

  1%|          | 510/75000 [00:00<00:27, 2741.87ex/s]

  1%|          | 883/75000 [00:00<00:23, 3190.40ex/s]

  2%|▏         | 1203/75000 [00:00<00:26, 2831.19ex/s]

  2%|▏         | 1519/75000 [00:00<00:24, 2940.49ex/s]

  2%|▏         | 1863/75000 [00:00<00:23, 3100.23ex/s]

  3%|▎         | 2219/75000 [00:00<00:22, 3242.66ex/s]

  3%|▎         | 2547/75000 [00:00<00:24, 3012.48ex/s]

  4%|▍         | 2854/75000 [00:01<00:36, 2001.45ex/s]

  4%|▍         | 3100/75000 [00:01<00:36, 1987.46ex/s]

  5%|▍         | 3505/75000 [00:01<00:29, 2451.75ex/s]

  5%|▌         | 3862/75000 [00:01<00:26, 2724.11ex/s]

  6%|▌         | 4168/75000 [00:01<00:28, 2492.38ex/s]

  6%|▌         | 4490/75000 [00:01<00:26, 2670.51ex/s]

  6%|▋         | 4860/75000 [00:01<00:23, 2936.85ex/s]

  7%|▋         | 5174/75000 [00:01<00:24, 2890.05ex/s]

  7%|▋         | 5477/75000 [00:02<00:24, 2858.67ex/s]

  8%|▊         | 5773/75000 [00:02<00:24, 2874.14ex/s]

  8%|▊         | 6068/75000 [00:02<00:24, 2860.61ex/s]

  9%|▊         | 6483/75000 [00:02<00:21, 3226.88ex/s]

  9%|▉         | 6811/75000 [00:02<00:21, 3240.51ex/s]

 10%|▉         | 7139/75000 [00:02<00:22, 3012.18ex/s]

 10%|█         | 7511/75000 [00:02<00:21, 3206.58ex/s]

 11%|█         | 7951/75000 [00:02<00:18, 3543.65ex/s]

 11%|█         | 8311/75000 [00:02<00:20, 3229.87ex/s]

 12%|█▏        | 8690/75000 [00:03<00:19, 3380.00ex/s]

 12%|█▏        | 9036/75000 [00:03<00:19, 3355.18ex/s]

 13%|█▎        | 9377/75000 [00:03<00:19, 3292.03ex/s]

 13%|█▎        | 9721/75000 [00:03<00:19, 3331.84ex/s]

 13%|█▎        | 10057/75000 [00:03<00:19, 3297.99ex/s]

 14%|█▍        | 10507/75000 [00:03<00:17, 3644.18ex/s]

 14%|█▍        | 10875/75000 [00:03<00:17, 3634.65ex/s]

 15%|█▍        | 11241/75000 [00:03<00:18, 3369.03ex/s]

 15%|█▌        | 11583/75000 [00:03<00:19, 3333.00ex/s]

 16%|█▌        | 11935/75000 [00:03<00:18, 3384.13ex/s]

 16%|█▋        | 12277/75000 [00:04<00:19, 3214.54ex/s]

 17%|█▋        | 12608/75000 [00:04<00:19, 3239.97ex/s]

 17%|█▋        | 12935/75000 [00:04<00:19, 3243.50ex/s]

 18%|█▊        | 13262/75000 [00:04<00:20, 3018.39ex/s]

 18%|█▊        | 13607/75000 [00:04<00:19, 3137.43ex/s]

 19%|█▊        | 13931/75000 [00:04<00:19, 3165.46ex/s]

 19%|█▉        | 14251/75000 [00:04<00:20, 3017.34ex/s]

 20%|█▉        | 14643/75000 [00:04<00:18, 3269.12ex/s]

 20%|█▉        | 14974/75000 [00:04<00:18, 3234.83ex/s]

 20%|██        | 15301/75000 [00:05<00:22, 2676.28ex/s]

 21%|██        | 15588/75000 [00:05<00:21, 2723.08ex/s]

 21%|██        | 15922/75000 [00:05<00:20, 2885.09ex/s]

 22%|██▏       | 16222/75000 [00:05<00:21, 2764.83ex/s]

 22%|██▏       | 16564/75000 [00:05<00:19, 2941.18ex/s]

 23%|██▎       | 16908/75000 [00:05<00:18, 3079.02ex/s]

 23%|██▎       | 17223/75000 [00:05<00:19, 2889.59ex/s]

 23%|██▎       | 17528/75000 [00:05<00:19, 2932.64ex/s]

 24%|██▍       | 17840/75000 [00:05<00:19, 2982.62ex/s]

 24%|██▍       | 18142/75000 [00:06<00:19, 2924.41ex/s]

 25%|██▍       | 18500/75000 [00:06<00:18, 3109.92ex/s]

 25%|██▌       | 18921/75000 [00:06<00:16, 3427.53ex/s]

 26%|██▌       | 19267/75000 [00:06<00:17, 3240.49ex/s]

 26%|██▌       | 19616/75000 [00:06<00:16, 3310.61ex/s]

 27%|██▋       | 19985/75000 [00:06<00:16, 3419.67ex/s]

 27%|██▋       | 20330/75000 [00:06<00:18, 2880.40ex/s]

 28%|██▊       | 20693/75000 [00:06<00:17, 3073.26ex/s]

 28%|██▊       | 21015/75000 [00:06<00:19, 2711.48ex/s]

 29%|██▊       | 21411/75000 [00:07<00:17, 3024.09ex/s]

 29%|██▉       | 21786/75000 [00:07<00:16, 3212.53ex/s]

 29%|██▉       | 22122/75000 [00:07<00:18, 2905.60ex/s]

 30%|██▉       | 22450/75000 [00:07<00:17, 2999.41ex/s]

 30%|███       | 22807/75000 [00:07<00:16, 3151.51ex/s]

 31%|███       | 23133/75000 [00:07<00:18, 2841.04ex/s]

 31%|███▏      | 23503/75000 [00:07<00:16, 3064.43ex/s]

 32%|███▏      | 23904/75000 [00:07<00:15, 3319.77ex/s]

 32%|███▏      | 24292/75000 [00:07<00:14, 3474.97ex/s]

 33%|███▎      | 24689/75000 [00:08<00:13, 3614.04ex/s]

 33%|███▎      | 25058/75000 [00:08<00:14, 3424.91ex/s]

 34%|███▍      | 25460/75000 [00:08<00:13, 3589.80ex/s]

 34%|███▍      | 25866/75000 [00:08<00:13, 3722.69ex/s]

 35%|███▍      | 26243/75000 [00:08<00:15, 3216.44ex/s]

 35%|███▌      | 26580/75000 [00:08<00:16, 2923.98ex/s]

 36%|███▌      | 26949/75000 [00:08<00:15, 3116.84ex/s]

 36%|███▋      | 27274/75000 [00:08<00:15, 3064.52ex/s]

 37%|███▋      | 27611/75000 [00:09<00:15, 3145.33ex/s]

 37%|███▋      | 28000/75000 [00:09<00:14, 3232.95ex/s]

 38%|███▊      | 28433/75000 [00:09<00:13, 3535.97ex/s]

 39%|███▊      | 28884/75000 [00:09<00:12, 3810.56ex/s]

 39%|███▉      | 29271/75000 [00:09<00:12, 3681.42ex/s]

 40%|███▉      | 29661/75000 [00:09<00:12, 3742.59ex/s]

 40%|████      | 30039/75000 [00:09<00:13, 3443.28ex/s]

 41%|████      | 30425/75000 [00:09<00:12, 3555.64ex/s]

 41%|████      | 30819/75000 [00:09<00:12, 3659.60ex/s]

 42%|████▏     | 31190/75000 [00:10<00:14, 2967.49ex/s]

 42%|████▏     | 31596/75000 [00:10<00:13, 3236.84ex/s]

 43%|████▎     | 31964/75000 [00:10<00:12, 3352.63ex/s]

 43%|████▎     | 32317/75000 [00:10<00:13, 3117.90ex/s]

 44%|████▎     | 32652/75000 [00:10<00:13, 3178.76ex/s]

 44%|████▍     | 33000/75000 [00:10<00:13, 3050.27ex/s]

 44%|████▍     | 33314/75000 [00:10<00:14, 2972.38ex/s]

 45%|████▍     | 33628/75000 [00:10<00:13, 3016.91ex/s]

 45%|████▌     | 34000/75000 [00:10<00:13, 2951.89ex/s]

 46%|████▌     | 34300/75000 [00:11<00:14, 2735.96ex/s]

 46%|████▌     | 34685/75000 [00:11<00:13, 3025.86ex/s]

 47%|████▋     | 35066/75000 [00:11<00:12, 3236.36ex/s]

 47%|████▋     | 35410/75000 [00:11<00:12, 3293.13ex/s]

 48%|████▊     | 35745/75000 [00:11<00:12, 3080.85ex/s]

 48%|████▊     | 36060/75000 [00:11<00:13, 2953.00ex/s]

 49%|████▊     | 36421/75000 [00:11<00:12, 3129.67ex/s]

 49%|████▉     | 36742/75000 [00:11<00:12, 3150.01ex/s]

 49%|████▉     | 37071/75000 [00:11<00:11, 3188.39ex/s]

 50%|████▉     | 37472/75000 [00:12<00:10, 3425.28ex/s]

 50%|█████     | 37862/75000 [00:12<00:10, 3562.52ex/s]

 51%|█████     | 38221/75000 [00:12<00:11, 3271.32ex/s]

 51%|█████▏    | 38582/75000 [00:12<00:10, 3364.03ex/s]

 52%|█████▏    | 38924/75000 [00:12<00:11, 3218.72ex/s]

 52%|█████▏    | 39251/75000 [00:12<00:12, 2899.53ex/s]

 53%|█████▎    | 39605/75000 [00:12<00:11, 3066.92ex/s]

 53%|█████▎    | 40000/75000 [00:12<00:11, 3098.45ex/s]

 54%|█████▍    | 40396/75000 [00:12<00:10, 3327.52ex/s]

 54%|█████▍    | 40736/75000 [00:13<00:10, 3330.16ex/s]

 55%|█████▍    | 41074/75000 [00:13<00:10, 3306.60ex/s]

 55%|█████▌    | 41408/75000 [00:13<00:11, 2963.01ex/s]

 56%|█████▌    | 41806/75000 [00:13<00:10, 3233.81ex/s]

 56%|█████▌    | 42171/75000 [00:13<00:09, 3345.74ex/s]

 57%|█████▋    | 42565/75000 [00:13<00:09, 3510.99ex/s]

 57%|█████▋    | 42934/75000 [00:13<00:09, 3561.24ex/s]

 58%|█████▊    | 43295/75000 [00:13<00:09, 3317.02ex/s]

 58%|█████▊    | 43658/75000 [00:13<00:09, 3402.95ex/s]

 59%|█████▊    | 44004/75000 [00:14<00:09, 3341.20ex/s]

 59%|█████▉    | 44368/75000 [00:14<00:08, 3421.88ex/s]

 60%|█████▉    | 44726/75000 [00:14<00:08, 3465.96ex/s]

 60%|██████    | 45075/75000 [00:14<00:09, 3031.58ex/s]

 61%|██████    | 45389/75000 [00:14<00:10, 2957.08ex/s]

 61%|██████    | 45752/75000 [00:14<00:09, 3136.16ex/s]

 61%|██████▏   | 46099/75000 [00:14<00:08, 3228.14ex/s]

 62%|██████▏   | 46524/75000 [00:14<00:08, 3517.52ex/s]

 63%|██████▎   | 46891/75000 [00:14<00:07, 3560.57ex/s]

 63%|██████▎   | 47252/75000 [00:15<00:08, 3392.43ex/s]

 63%|██████▎   | 47596/75000 [00:15<00:08, 3258.58ex/s]

 64%|██████▍   | 47926/75000 [00:15<00:09, 2957.06ex/s]

 64%|██████▍   | 48229/75000 [00:15<00:09, 2904.12ex/s]

 65%|██████▍   | 48624/75000 [00:15<00:08, 3185.12ex/s]

 65%|██████▌   | 49000/75000 [00:15<00:07, 3255.42ex/s]

 66%|██████▌   | 49415/75000 [00:15<00:07, 3504.15ex/s]

 66%|██████▋   | 49785/75000 [00:15<00:07, 3557.95ex/s]

 67%|██████▋   | 50145/75000 [00:15<00:07, 3386.30ex/s]

 67%|██████▋   | 50488/75000 [00:16<00:07, 3370.13ex/s]

 68%|██████▊   | 50913/75000 [00:16<00:06, 3620.82ex/s]

 68%|██████▊   | 51279/75000 [00:16<00:07, 3066.47ex/s]

 69%|██████▉   | 51708/75000 [00:16<00:06, 3381.51ex/s]

 69%|██████▉   | 52063/75000 [00:16<00:06, 3341.51ex/s]

 70%|██████▉   | 52447/75000 [00:16<00:06, 3477.38ex/s]

 70%|███████   | 52804/75000 [00:16<00:06, 3459.50ex/s]

 71%|███████   | 53157/75000 [00:16<00:06, 3300.59ex/s]

 71%|███████▏  | 53520/75000 [00:16<00:06, 3389.45ex/s]

 72%|███████▏  | 53922/75000 [00:17<00:05, 3566.75ex/s]

 72%|███████▏  | 54283/75000 [00:17<00:05, 3475.95ex/s]

 73%|███████▎  | 54644/75000 [00:17<00:05, 3513.06ex/s]

 73%|███████▎  | 55000/75000 [00:17<00:05, 3424.96ex/s]

 74%|███████▍  | 55402/75000 [00:17<00:05, 3595.20ex/s]

 74%|███████▍  | 55814/75000 [00:17<00:05, 3746.17ex/s]

 75%|███████▍  | 56191/75000 [00:17<00:05, 3552.72ex/s]

 75%|███████▌  | 56602/75000 [00:17<00:04, 3708.26ex/s]

 76%|███████▌  | 56976/75000 [00:17<00:04, 3669.59ex/s]

 76%|███████▋  | 57346/75000 [00:17<00:05, 3523.48ex/s]

 77%|███████▋  | 57750/75000 [00:18<00:04, 3667.14ex/s]

 77%|███████▋  | 58120/75000 [00:18<00:04, 3560.69ex/s]

 78%|███████▊  | 58515/75000 [00:18<00:04, 3671.32ex/s]

 79%|███████▊  | 58921/75000 [00:18<00:04, 3781.68ex/s]

 79%|███████▉  | 59301/75000 [00:18<00:04, 3566.11ex/s]

 80%|███████▉  | 59662/75000 [00:18<00:04, 3439.84ex/s]

 80%|████████  | 60009/75000 [00:18<00:05, 2988.48ex/s]

 80%|████████  | 60352/75000 [00:18<00:04, 3101.68ex/s]

 81%|████████  | 60672/75000 [00:18<00:04, 3027.78ex/s]

 81%|████████▏ | 61000/75000 [00:19<00:04, 2911.67ex/s]

 82%|████████▏ | 61297/75000 [00:19<00:04, 2922.41ex/s]

 82%|████████▏ | 61593/75000 [00:19<00:04, 2864.53ex/s]

 83%|████████▎ | 61882/75000 [00:19<00:04, 2848.33ex/s]

 83%|████████▎ | 62169/75000 [00:19<00:04, 2767.48ex/s]

 83%|████████▎ | 62545/75000 [00:19<00:04, 3044.87ex/s]

 84%|████████▍ | 62853/75000 [00:19<00:03, 3049.53ex/s]

 84%|████████▍ | 63160/75000 [00:19<00:04, 2657.01ex/s]

 85%|████████▍ | 63463/75000 [00:19<00:04, 2755.47ex/s]

 85%|████████▍ | 63747/75000 [00:20<00:04, 2760.71ex/s]

 85%|████████▌ | 64029/75000 [00:20<00:04, 2580.74ex/s]

 86%|████████▌ | 64382/75000 [00:20<00:03, 2838.01ex/s]

 86%|████████▋ | 64738/75000 [00:20<00:03, 3038.88ex/s]

 87%|████████▋ | 65049/75000 [00:20<00:03, 2934.40ex/s]

 87%|████████▋ | 65404/75000 [00:20<00:03, 3107.03ex/s]

 88%|████████▊ | 65795/75000 [00:20<00:02, 3335.22ex/s]

 88%|████████▊ | 66133/75000 [00:20<00:02, 3266.70ex/s]

 89%|████████▊ | 66463/75000 [00:20<00:02, 2887.52ex/s]

 89%|████████▉ | 66799/75000 [00:21<00:02, 3012.98ex/s]

 90%|████████▉ | 67126/75000 [00:21<00:02, 3081.89ex/s]

 90%|████████▉ | 67497/75000 [00:21<00:02, 3257.00ex/s]

 90%|█████████ | 67829/75000 [00:21<00:03, 2332.20ex/s]

 91%|█████████ | 68126/75000 [00:21<00:02, 2475.16ex/s]

 91%|█████████▏| 68446/75000 [00:21<00:02, 2651.06ex/s]

 92%|█████████▏| 68804/75000 [00:21<00:02, 2890.47ex/s]

 92%|█████████▏| 69138/75000 [00:21<00:01, 3011.03ex/s]

 93%|█████████▎| 69559/75000 [00:22<00:01, 3343.98ex/s]

 93%|█████████▎| 69987/75000 [00:22<00:01, 3608.33ex/s]

 94%|█████████▍| 70360/75000 [00:22<00:01, 3389.48ex/s]

 94%|█████████▍| 70732/75000 [00:22<00:01, 3480.39ex/s]

 95%|█████████▍| 71089/75000 [00:22<00:01, 3351.98ex/s]

 95%|█████████▌| 71431/75000 [00:22<00:01, 3106.26ex/s]

 96%|█████████▌| 71829/75000 [00:22<00:00, 3337.87ex/s]

 96%|█████████▌| 72171/75000 [00:22<00:00, 3024.61ex/s]

 97%|█████████▋| 72526/75000 [00:22<00:00, 3159.44ex/s]

 97%|█████████▋| 72851/75000 [00:23<00:00, 3073.12ex/s]

 98%|█████████▊| 73165/75000 [00:23<00:00, 2902.10ex/s]

 98%|█████████▊| 73461/75000 [00:23<00:00, 2842.21ex/s]

 98%|█████████▊| 73800/75000 [00:23<00:00, 2990.85ex/s]

 99%|█████████▉| 74129/75000 [00:23<00:00, 3072.84ex/s]

 99%|█████████▉| 74440/75000 [00:23<00:00, 3036.94ex/s]

100%|█████████▉| 74913/75000 [00:23<00:00, 3521.14ex/s]

100%|██████████| 75000/75000 [00:23<00:00, 3159.92ex/s]




  0%|          | 0/69825 [00:00<?, ?ex/s]

  1%|          | 398/69825 [00:00<00:17, 3973.87ex/s]

  1%|          | 796/69825 [00:00<00:18, 3750.69ex/s]

  2%|▏         | 1172/69825 [00:00<00:19, 3559.16ex/s]

  2%|▏         | 1533/69825 [00:00<00:19, 3575.37ex/s]

  3%|▎         | 1944/69825 [00:00<00:18, 3760.86ex/s]

  3%|▎         | 2322/69825 [00:00<00:18, 3700.98ex/s]

  4%|▍         | 2694/69825 [00:00<00:18, 3705.57ex/s]

  4%|▍         | 3066/69825 [00:00<00:18, 3573.48ex/s]

  5%|▍         | 3425/69825 [00:00<00:18, 3502.04ex/s]

  5%|▌         | 3780/69825 [00:01<00:18, 3512.18ex/s]

  6%|▌         | 4132/69825 [00:01<00:26, 2478.51ex/s]

  6%|▋         | 4421/69825 [00:01<00:26, 2428.40ex/s]

  7%|▋         | 4692/69825 [00:01<00:33, 1953.68ex/s]

  7%|▋         | 4919/69825 [00:01<00:38, 1692.00ex/s]

  7%|▋         | 5113/69825 [00:01<00:40, 1592.47ex/s]

  8%|▊         | 5308/69825 [00:02<00:38, 1665.84ex/s]

  8%|▊         | 5684/69825 [00:02<00:29, 2144.01ex/s]

  9%|▊         | 6000/69825 [00:02<00:27, 2352.94ex/s]

  9%|▉         | 6398/69825 [00:02<00:22, 2769.65ex/s]

 10%|▉         | 6719/69825 [00:02<00:21, 2886.67ex/s]

 10%|█         | 7025/69825 [00:02<00:21, 2897.84ex/s]

 11%|█         | 7394/69825 [00:02<00:20, 3120.05ex/s]

 11%|█         | 7737/69825 [00:02<00:19, 3208.12ex/s]

 12%|█▏        | 8065/69825 [00:02<00:20, 3027.56ex/s]

 12%|█▏        | 8435/69825 [00:03<00:19, 3214.03ex/s]

 13%|█▎        | 8763/69825 [00:03<00:19, 3204.70ex/s]

 13%|█▎        | 9088/69825 [00:03<00:19, 3127.29ex/s]

 14%|█▎        | 9532/69825 [00:03<00:17, 3497.77ex/s]

 14%|█▍        | 9941/69825 [00:03<00:16, 3667.57ex/s]

 15%|█▍        | 10312/69825 [00:03<00:18, 3167.11ex/s]

 15%|█▌        | 10643/69825 [00:03<00:20, 2820.33ex/s]

 16%|█▌        | 10941/69825 [00:03<00:23, 2507.12ex/s]

 16%|█▌        | 11207/69825 [00:03<00:23, 2520.37ex/s]

 16%|█▋        | 11470/69825 [00:04<00:22, 2537.26ex/s]

 17%|█▋        | 11780/69825 [00:04<00:21, 2685.11ex/s]

 17%|█▋        | 12056/69825 [00:04<00:22, 2568.32ex/s]

 18%|█▊        | 12329/69825 [00:04<00:22, 2610.51ex/s]

 18%|█▊        | 12637/69825 [00:04<00:20, 2740.64ex/s]

 19%|█▊        | 12959/69825 [00:04<00:19, 2876.38ex/s]

 19%|█▉        | 13251/69825 [00:04<00:20, 2744.42ex/s]

 19%|█▉        | 13589/69825 [00:04<00:19, 2923.28ex/s]

 20%|█▉        | 13885/69825 [00:04<00:19, 2887.16ex/s]

 20%|██        | 14177/69825 [00:05<00:19, 2794.44ex/s]

 21%|██        | 14459/69825 [00:05<00:20, 2645.09ex/s]

 21%|██        | 14727/69825 [00:05<00:20, 2631.63ex/s]

 21%|██▏       | 15000/69825 [00:05<00:21, 2520.55ex/s]

 22%|██▏       | 15254/69825 [00:05<00:22, 2437.82ex/s]

 22%|██▏       | 15545/69825 [00:05<00:21, 2566.08ex/s]

 23%|██▎       | 15902/69825 [00:05<00:18, 2849.05ex/s]

 23%|██▎       | 16191/69825 [00:05<00:19, 2813.49ex/s]

 24%|██▎       | 16564/69825 [00:05<00:17, 3076.65ex/s]

 24%|██▍       | 16915/69825 [00:06<00:16, 3200.46ex/s]

 25%|██▍       | 17238/69825 [00:06<00:16, 3139.60ex/s]

 25%|██▌       | 17665/69825 [00:06<00:15, 3466.69ex/s]

 26%|██▌       | 18014/69825 [00:06<00:15, 3284.10ex/s]

 26%|██▋       | 18383/69825 [00:06<00:15, 3397.15ex/s]

 27%|██▋       | 18780/69825 [00:06<00:14, 3558.98ex/s]

 27%|██▋       | 19139/69825 [00:06<00:15, 3279.33ex/s]

 28%|██▊       | 19498/69825 [00:06<00:14, 3362.87ex/s]

 28%|██▊       | 19853/69825 [00:06<00:14, 3412.33ex/s]

 29%|██▉       | 20198/69825 [00:07<00:16, 3093.02ex/s]

 29%|██▉       | 20525/69825 [00:07<00:15, 3140.17ex/s]

 30%|██▉       | 20891/69825 [00:07<00:14, 3284.73ex/s]

 30%|███       | 21225/69825 [00:07<00:15, 3239.73ex/s]

 31%|███       | 21622/69825 [00:07<00:13, 3446.43ex/s]

 32%|███▏      | 22000/69825 [00:07<00:13, 3440.68ex/s]

 32%|███▏      | 22347/69825 [00:07<00:13, 3435.19ex/s]

 33%|███▎      | 22746/69825 [00:07<00:13, 3594.05ex/s]

 33%|███▎      | 23108/69825 [00:07<00:13, 3457.21ex/s]

 34%|███▎      | 23476/69825 [00:07<00:13, 3516.96ex/s]

 34%|███▍      | 23839/69825 [00:08<00:12, 3549.27ex/s]

 35%|███▍      | 24196/69825 [00:08<00:13, 3296.36ex/s]

 35%|███▌      | 24585/69825 [00:08<00:13, 3459.87ex/s]

 36%|███▌      | 24940/69825 [00:08<00:12, 3485.36ex/s]

 36%|███▌      | 25292/69825 [00:08<00:13, 3260.60ex/s]

 37%|███▋      | 25623/69825 [00:08<00:13, 3224.89ex/s]

 37%|███▋      | 25974/69825 [00:08<00:13, 3304.05ex/s]

 38%|███▊      | 26308/69825 [00:08<00:14, 3037.48ex/s]

 38%|███▊      | 26662/69825 [00:08<00:13, 3173.75ex/s]

 39%|███▊      | 27000/69825 [00:09<00:14, 3021.21ex/s]

 39%|███▉      | 27310/69825 [00:09<00:13, 3040.90ex/s]

 40%|███▉      | 27652/69825 [00:09<00:13, 3146.37ex/s]

 40%|████      | 27988/69825 [00:09<00:13, 3205.07ex/s]

 41%|████      | 28312/69825 [00:09<00:14, 2849.56ex/s]

 41%|████      | 28606/69825 [00:09<00:14, 2813.89ex/s]

 41%|████▏     | 28894/69825 [00:09<00:15, 2700.98ex/s]

 42%|████▏     | 29169/69825 [00:09<00:16, 2508.02ex/s]

 42%|████▏     | 29465/69825 [00:09<00:15, 2625.60ex/s]

 43%|████▎     | 29781/69825 [00:10<00:14, 2768.82ex/s]

 43%|████▎     | 30063/69825 [00:10<00:16, 2439.06ex/s]

 44%|████▎     | 30409/69825 [00:10<00:14, 2703.60ex/s]

 44%|████▍     | 30815/69825 [00:10<00:12, 3070.27ex/s]

 45%|████▍     | 31134/69825 [00:10<00:12, 3017.72ex/s]

 45%|████▌     | 31497/69825 [00:10<00:12, 3186.84ex/s]

 46%|████▌     | 31857/69825 [00:10<00:11, 3302.25ex/s]

 46%|████▌     | 32193/69825 [00:10<00:11, 3176.87ex/s]

 47%|████▋     | 32556/69825 [00:10<00:11, 3305.08ex/s]

 47%|████▋     | 32891/69825 [00:11<00:11, 3293.73ex/s]

 48%|████▊     | 33224/69825 [00:11<00:11, 3057.99ex/s]

 48%|████▊     | 33557/69825 [00:11<00:11, 3132.57ex/s]

 49%|████▊     | 33875/69825 [00:11<00:13, 2679.08ex/s]

 49%|████▉     | 34167/69825 [00:11<00:13, 2739.07ex/s]

 49%|████▉     | 34504/69825 [00:11<00:12, 2907.12ex/s]

 50%|████▉     | 34804/69825 [00:11<00:12, 2856.16ex/s]

 50%|█████     | 35096/69825 [00:11<00:12, 2834.33ex/s]

 51%|█████     | 35481/69825 [00:11<00:11, 3118.84ex/s]

 51%|█████▏    | 35837/69825 [00:12<00:10, 3244.62ex/s]

 52%|█████▏    | 36166/69825 [00:12<00:10, 3167.01ex/s]

 52%|█████▏    | 36561/69825 [00:12<00:09, 3391.50ex/s]

 53%|█████▎    | 36939/69825 [00:12<00:09, 3503.40ex/s]

 53%|█████▎    | 37292/69825 [00:12<00:10, 3225.56ex/s]

 54%|█████▍    | 37661/69825 [00:12<00:09, 3353.80ex/s]

 54%|█████▍    | 38002/69825 [00:12<00:09, 3240.46ex/s]

 55%|█████▌    | 38415/69825 [00:12<00:09, 3489.74ex/s]

 56%|█████▌    | 38831/69825 [00:12<00:08, 3680.09ex/s]

 56%|█████▌    | 39225/69825 [00:12<00:08, 3752.92ex/s]

 57%|█████▋    | 39604/69825 [00:13<00:08, 3663.65ex/s]

 57%|█████▋    | 39973/69825 [00:13<00:08, 3566.62ex/s]

 58%|█████▊    | 40332/69825 [00:13<00:08, 3365.12ex/s]

 58%|█████▊    | 40711/69825 [00:13<00:08, 3478.77ex/s]

 59%|█████▉    | 41062/69825 [00:13<00:09, 3166.93ex/s]

 59%|█████▉    | 41386/69825 [00:13<00:09, 3115.57ex/s]

 60%|█████▉    | 41702/69825 [00:13<00:09, 3098.63ex/s]

 60%|██████    | 42015/69825 [00:13<00:10, 2747.74ex/s]

 61%|██████    | 42385/69825 [00:14<00:09, 2994.44ex/s]

 61%|██████▏   | 42789/69825 [00:14<00:08, 3276.76ex/s]

 62%|██████▏   | 43127/69825 [00:14<00:08, 3253.66ex/s]

 62%|██████▏   | 43569/69825 [00:14<00:07, 3581.38ex/s]

 63%|██████▎   | 43950/69825 [00:14<00:07, 3645.39ex/s]

 63%|██████▎   | 44320/69825 [00:14<00:07, 3432.32ex/s]

 64%|██████▍   | 44694/69825 [00:14<00:07, 3517.17ex/s]

 65%|██████▍   | 45051/69825 [00:14<00:07, 3429.38ex/s]

 65%|██████▌   | 45473/69825 [00:14<00:06, 3653.05ex/s]

 66%|██████▌   | 45842/69825 [00:14<00:06, 3641.04ex/s]

 66%|██████▌   | 46209/69825 [00:15<00:06, 3593.26ex/s]

 67%|██████▋   | 46571/69825 [00:15<00:06, 3512.28ex/s]

 67%|██████▋   | 46924/69825 [00:15<00:07, 3036.62ex/s]

 68%|██████▊   | 47240/69825 [00:15<00:07, 3062.22ex/s]

 68%|██████▊   | 47625/69825 [00:15<00:06, 3274.43ex/s]

 69%|██████▊   | 48000/69825 [00:15<00:06, 3219.76ex/s]

 69%|██████▉   | 48406/69825 [00:15<00:06, 3449.90ex/s]

 70%|██████▉   | 48861/69825 [00:15<00:05, 3756.60ex/s]

 71%|███████   | 49244/69825 [00:15<00:05, 3503.62ex/s]

 71%|███████   | 49602/69825 [00:16<00:05, 3425.26ex/s]

 72%|███████▏  | 49950/69825 [00:16<00:05, 3418.54ex/s]

 72%|███████▏  | 50296/69825 [00:16<00:06, 3240.95ex/s]

 73%|███████▎  | 50649/69825 [00:16<00:05, 3319.65ex/s]

 73%|███████▎  | 51021/69825 [00:16<00:05, 3431.74ex/s]

 74%|███████▎  | 51384/69825 [00:16<00:05, 3484.92ex/s]

 74%|███████▍  | 51758/69825 [00:16<00:05, 3556.77ex/s]

 75%|███████▍  | 52116/69825 [00:16<00:05, 3460.13ex/s]

 75%|███████▌  | 52477/69825 [00:16<00:04, 3502.68ex/s]

 76%|███████▌  | 52829/69825 [00:17<00:04, 3465.87ex/s]

 76%|███████▌  | 53177/69825 [00:17<00:05, 3293.07ex/s]

 77%|███████▋  | 53509/69825 [00:17<00:05, 3180.72ex/s]

 77%|███████▋  | 53880/69825 [00:17<00:04, 3327.21ex/s]

 78%|███████▊  | 54215/69825 [00:17<00:05, 3090.61ex/s]

 78%|███████▊  | 54596/69825 [00:17<00:04, 3287.11ex/s]

 79%|███████▊  | 54930/69825 [00:17<00:04, 3175.90ex/s]

 79%|███████▉  | 55283/69825 [00:17<00:04, 3273.12ex/s]

 80%|███████▉  | 55722/69825 [00:17<00:03, 3589.97ex/s]

 80%|████████  | 56085/69825 [00:18<00:04, 3384.94ex/s]

 81%|████████  | 56473/69825 [00:18<00:03, 3522.31ex/s]

 81%|████████▏ | 56832/69825 [00:18<00:03, 3539.96ex/s]

 82%|████████▏ | 57190/69825 [00:18<00:05, 2319.84ex/s]

 82%|████████▏ | 57546/69825 [00:18<00:04, 2582.55ex/s]

 83%|████████▎ | 57931/69825 [00:18<00:04, 2875.85ex/s]

 83%|████████▎ | 58262/69825 [00:18<00:04, 2839.01ex/s]

 84%|████████▍ | 58618/69825 [00:18<00:03, 3020.99ex/s]

 84%|████████▍ | 58989/69825 [00:19<00:03, 3202.72ex/s]

 85%|████████▍ | 59329/69825 [00:19<00:03, 3027.22ex/s]

 85%|████████▌ | 59651/69825 [00:19<00:03, 3078.04ex/s]

 86%|████████▌ | 60000/69825 [00:19<00:03, 3025.08ex/s]

 87%|████████▋ | 60421/69825 [00:19<00:02, 3347.67ex/s]

 87%|████████▋ | 60817/69825 [00:19<00:02, 3517.50ex/s]

 88%|████████▊ | 61177/69825 [00:19<00:02, 3460.98ex/s]

 88%|████████▊ | 61593/69825 [00:19<00:02, 3659.96ex/s]

 89%|████████▉ | 62000/69825 [00:19<00:02, 3707.29ex/s]

 89%|████████▉ | 62414/69825 [00:20<00:01, 3830.09ex/s]

 90%|████████▉ | 62801/69825 [00:20<00:01, 3838.06ex/s]

 90%|█████████ | 63187/69825 [00:20<00:02, 3035.27ex/s]

 91%|█████████ | 63518/69825 [00:20<00:02, 2639.59ex/s]

 91%|█████████▏| 63889/69825 [00:20<00:02, 2887.18ex/s]

 92%|█████████▏| 64203/69825 [00:20<00:01, 2819.57ex/s]

 92%|█████████▏| 64551/69825 [00:20<00:01, 2986.11ex/s]

 93%|█████████▎| 64929/69825 [00:20<00:01, 3196.69ex/s]

 93%|█████████▎| 65262/69825 [00:20<00:01, 3110.77ex/s]

 94%|█████████▍| 65630/69825 [00:21<00:01, 3265.81ex/s]

 94%|█████████▍| 65977/69825 [00:21<00:01, 3321.71ex/s]

 95%|█████████▍| 66316/69825 [00:21<00:01, 3110.12ex/s]

 96%|█████████▌| 66686/69825 [00:21<00:00, 3272.49ex/s]

 96%|█████████▌| 67020/69825 [00:21<00:00, 2925.59ex/s]

 96%|█████████▋| 67361/69825 [00:21<00:00, 3053.44ex/s]

 97%|█████████▋| 67720/69825 [00:21<00:00, 3199.38ex/s]

 97%|█████████▋| 68048/69825 [00:21<00:00, 2947.18ex/s]

 98%|█████████▊| 68386/69825 [00:22<00:00, 3062.42ex/s]

 99%|█████████▊| 68796/69825 [00:22<00:00, 3349.86ex/s]

 99%|█████████▉| 69147/69825 [00:22<00:00, 3393.82ex/s]

100%|█████████▉| 69492/69825 [00:22<00:00, 3368.82ex/s]

100%|██████████| 69825/69825 [00:22<00:00, 3117.50ex/s]




In [17]:
len(train_dataset), len(eval_dataset)

(75000, 69825)

In [18]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] 부담없는 가격이라 이제는 고기능 앰플을 매일매일 바를수 있네요! [SEP] 부담없는 가격이라 이제는 고기능 앰플을 매일매일 바를수 있네요! # 제품 전체 # 다양성 [SEP] 1


[CLS] 딸꺼도 사길 잘했네 ^ ^ [SEP] 딸꺼도 사길 잘했네 ^ ^ # 패키지 / 구성품 # 품질 [SEP] 1


# Load Trainer

In [19]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [20]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [21]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [22]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 75000


  Num Epochs = 10


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 128


  Gradient Accumulation steps = 1


  Total optimization steps = 5860


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.0828,0.082997,0.971242,0.61912,0.985057,0.802088,0.971242
2,0.0615,0.077769,0.972216,0.640341,0.98555,0.812946,0.972216
3,0.0447,0.083014,0.972932,0.670732,0.985886,0.828309,0.972932
4,0.0368,0.100825,0.973906,0.690872,0.986378,0.838625,0.973906
5,0.0268,0.134497,0.97445,0.664409,0.98672,0.825564,0.97445
6,0.0134,0.128236,0.973176,0.67679,0.986007,0.831399,0.973176
7,0.0083,0.150752,0.973806,0.68536,0.986334,0.835847,0.973806
8,0.0076,0.176244,0.974694,0.682023,0.986823,0.834423,0.974694
9,0.0043,0.189494,0.974436,0.68005,0.986686,0.833368,0.974436
10,0.0022,0.200503,0.974522,0.68663,0.986721,0.836676,0.974522


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-586


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-586/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-586/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-586/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-586/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1172


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1172/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1172/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1172/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1172/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1758


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1758/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1758/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1758/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1758/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-586] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2344


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2344/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2344/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2344/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2344/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1758] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2930


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2930/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2930/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2930/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2930/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2344] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-3516


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-3516/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-3516/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-3516/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-3516/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-2930] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4102


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4102/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4102/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4102/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4102/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-3516] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4688


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4688/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4688/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4688/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4688/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4102] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5274


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5274/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5274/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5274/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5274/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-4688] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, id, pair. If form, id, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5860


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5860/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5860/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5860/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5860/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-5274] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from monologg_koelectra_base_v3_discriminator_uncleaned_v7/checkpoint-1172 (score: 0.07776901125907898).


Saving model checkpoint to /tmp/tmpvuysrlvn


Configuration saved in /tmp/tmpvuysrlvn/config.json


Model weights saved in /tmp/tmpvuysrlvn/pytorch_model.bin


tokenizer config file saved in /tmp/tmpvuysrlvn/tokenizer_config.json


Special tokens file saved in /tmp/tmpvuysrlvn/special_tokens_map.json


0,1
eval/accuracy,▁▃▄▆█▅▆█▇█
eval/f1_false,▁▃▄▆█▅▆█▇█
eval/f1_macro,▁▃▆█▅▇▇▇▇█
eval/f1_micro,▁▃▄▆█▅▆█▇█
eval/f1_true,▁▃▆█▅▇▇▇▇█
eval/loss,▁▁▁▂▄▄▅▇▇█
eval/runtime,█▇▁▆▂▆▅▄▄▄
eval/samples_per_second,▁▂█▃▇▃▃▅▅▅
eval/steps_per_second,▁▂█▃▇▃▃▅▅▅
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/accuracy,0.97452
eval/f1_false,0.98672
eval/f1_macro,0.83668
eval/f1_micro,0.97452
eval/f1_true,0.68663
eval/loss,0.2005
eval/runtime,247.919
eval/samples_per_second,281.644
eval/steps_per_second,2.202
train/epoch,10.0


In [23]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
