# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    ElectraTokenizerFast, ElectraForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v11'

DATA_V = 'uncleaned_v11'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'monologg/koelectra-base-v3-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/monologg_koelectra_base_v3_discriminator_uncleaned_v11/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/uncleaned_v11/ce_train.csv exists.
./dataset/uncleaned_v11/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 10
batch_size = 32
gradient_accumulation_steps = 1

optim = 'adamw_hf' # 'adamw_torch'

learning_rate = 3e-6 / 8 * batch_size * NGPU # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'linear'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='f1_macro'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 50

print(learning_rate)

4.8e-05


# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = ElectraTokenizerFast.from_pretrained(f'dataset/{DATA_V}/tokenizer')
model = ElectraForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
model.resize_token_embeddings(len(tokenizer))

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Embedding(35254, 768)

In [9]:
# train_path = f'./dataset/{DATA_V}/raw_train.csv'
# dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
# test_path = f'./dataset/{DATA_V}/raw_test.csv'
# train = pd.read_csv(train_path)
# dev = pd.read_csv(dev_path)
# test = pd.read_csv(test_path)

# ### new
# entity_property_pair = [
#     '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
#     '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
#     '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
#     '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
# ]
# sentiments = ['positive', 'negative', 'neutral']
# target = ['Target']
# special_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
# emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
# emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))
# ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

# tokens2add = special_tokens + emojis + entity_property_pair + sentiments + target

# tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
# print(len(tokenizer))
# tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame().drop_duplicates()
# tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
# new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
# new_tokens = set(list(new_tokenizer.vocab.keys()) + tokens2add) - set(tokenizer.vocab.keys())
# tokenizer.add_tokens(list(new_tokens))
# print(len(new_tokenizer))
# print(len(tokenizer))
# model.resize_token_embeddings(len(tokenizer))

In [10]:
# print(len(new_tokens))
# print(new_tokens)

In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

# Define Metric

In [12]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [13]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [14]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [15]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/124700 [00:00<?, ?ex/s]

  0%|          | 412/124700 [00:00<00:30, 4116.46ex/s]

  1%|          | 824/124700 [00:00<00:31, 3929.12ex/s]

  1%|          | 1218/124700 [00:00<00:34, 3555.13ex/s]

  1%|▏         | 1698/124700 [00:00<00:30, 4008.79ex/s]

  2%|▏         | 2105/124700 [00:00<00:31, 3898.64ex/s]

  2%|▏         | 2501/124700 [00:00<00:31, 3916.13ex/s]

  2%|▏         | 2950/124700 [00:00<00:29, 4095.11ex/s]

  3%|▎         | 3362/124700 [00:00<00:31, 3805.60ex/s]

  3%|▎         | 3778/124700 [00:00<00:30, 3906.45ex/s]

  3%|▎         | 4173/124700 [00:01<00:32, 3757.24ex/s]

  4%|▎         | 4553/124700 [00:01<00:34, 3532.00ex/s]

  4%|▍         | 4996/124700 [00:01<00:31, 3779.98ex/s]

  4%|▍         | 5380/124700 [00:01<00:33, 3570.62ex/s]

  5%|▍         | 5870/124700 [00:01<00:30, 3936.74ex/s]

  5%|▌         | 6271/124700 [00:01<00:31, 3742.28ex/s]

  5%|▌         | 6655/124700 [00:01<00:31, 3767.66ex/s]

  6%|▌         | 7037/124700 [00:01<00:34, 3437.48ex/s]

  6%|▌         | 7460/124700 [00:01<00:32, 3647.88ex/s]

  6%|▋         | 7888/124700 [00:02<00:30, 3821.56ex/s]

  7%|▋         | 8278/124700 [00:02<00:31, 3673.75ex/s]

  7%|▋         | 8684/124700 [00:02<00:30, 3780.28ex/s]

  7%|▋         | 9067/124700 [00:02<00:31, 3627.92ex/s]

  8%|▊         | 9476/124700 [00:02<00:30, 3755.37ex/s]

  8%|▊         | 9926/124700 [00:02<00:28, 3966.98ex/s]

  8%|▊         | 10327/124700 [00:02<00:29, 3849.63ex/s]

  9%|▊         | 10768/124700 [00:02<00:28, 4008.15ex/s]

  9%|▉         | 11172/124700 [00:02<00:29, 3790.25ex/s]

  9%|▉         | 11556/124700 [00:03<00:29, 3803.73ex/s]

 10%|▉         | 11940/124700 [00:03<00:29, 3798.55ex/s]

 10%|▉         | 12322/124700 [00:03<00:30, 3681.41ex/s]

 10%|█         | 12736/124700 [00:03<00:29, 3808.71ex/s]

 11%|█         | 13119/124700 [00:03<00:31, 3499.69ex/s]

 11%|█         | 13546/124700 [00:03<00:29, 3710.45ex/s]

 11%|█         | 14000/124700 [00:03<00:29, 3754.98ex/s]

 12%|█▏        | 14426/124700 [00:03<00:28, 3893.20ex/s]

 12%|█▏        | 14887/124700 [00:03<00:26, 4095.37ex/s]

 12%|█▏        | 15301/124700 [00:04<00:27, 4047.99ex/s]

 13%|█▎        | 15753/124700 [00:04<00:26, 4181.77ex/s]

 13%|█▎        | 16174/124700 [00:04<00:27, 3945.28ex/s]

 13%|█▎        | 16641/124700 [00:04<00:26, 4146.69ex/s]

 14%|█▎        | 17060/124700 [00:04<00:26, 4002.46ex/s]

 14%|█▍        | 17488/124700 [00:04<00:26, 4077.95ex/s]

 14%|█▍        | 17899/124700 [00:04<00:26, 4074.86ex/s]

 15%|█▍        | 18309/124700 [00:04<00:26, 3986.24ex/s]

 15%|█▌        | 18732/124700 [00:04<00:26, 4054.78ex/s]

 15%|█▌        | 19139/124700 [00:04<00:27, 3840.10ex/s]

 16%|█▌        | 19565/124700 [00:05<00:26, 3958.58ex/s]

 16%|█▌        | 19964/124700 [00:05<00:26, 3898.66ex/s]

 16%|█▋        | 20356/124700 [00:05<00:27, 3764.70ex/s]

 17%|█▋        | 20811/124700 [00:05<00:26, 3985.59ex/s]

 17%|█▋        | 21213/124700 [00:05<00:27, 3809.93ex/s]

 17%|█▋        | 21598/124700 [00:05<00:26, 3820.61ex/s]

 18%|█▊        | 21983/124700 [00:05<00:27, 3790.46ex/s]

 18%|█▊        | 22364/124700 [00:05<00:28, 3639.56ex/s]

 18%|█▊        | 22767/124700 [00:05<00:27, 3748.76ex/s]

 19%|█▊        | 23144/124700 [00:06<00:27, 3690.82ex/s]

 19%|█▉        | 23532/124700 [00:06<00:27, 3744.72ex/s]

 19%|█▉        | 23981/124700 [00:06<00:25, 3959.64ex/s]

 20%|█▉        | 24379/124700 [00:06<00:25, 3933.36ex/s]

 20%|█▉        | 24848/124700 [00:06<00:24, 4153.63ex/s]

 20%|██        | 25265/124700 [00:06<00:24, 4102.27ex/s]

 21%|██        | 25781/124700 [00:06<00:22, 4411.31ex/s]

 21%|██        | 26224/124700 [00:06<00:24, 3943.03ex/s]

 21%|██▏       | 26629/124700 [00:06<00:26, 3767.93ex/s]

 22%|██▏       | 27014/124700 [00:07<00:26, 3734.45ex/s]

 22%|██▏       | 27393/124700 [00:07<00:26, 3627.13ex/s]

 22%|██▏       | 27770/124700 [00:07<00:26, 3663.89ex/s]

 23%|██▎       | 28140/124700 [00:07<00:27, 3573.98ex/s]

 23%|██▎       | 28557/124700 [00:07<00:25, 3740.18ex/s]

 23%|██▎       | 28971/124700 [00:07<00:24, 3853.11ex/s]

 24%|██▎       | 29359/124700 [00:07<00:25, 3807.73ex/s]

 24%|██▍       | 29742/124700 [00:07<00:26, 3569.64ex/s]

 24%|██▍       | 30103/124700 [00:07<00:26, 3537.81ex/s]

 25%|██▍       | 30561/124700 [00:07<00:24, 3831.92ex/s]

 25%|██▍       | 30996/124700 [00:08<00:23, 3979.31ex/s]

 25%|██▌       | 31397/124700 [00:08<00:25, 3726.21ex/s]

 26%|██▌       | 31843/124700 [00:08<00:23, 3929.80ex/s]

 26%|██▌       | 32241/124700 [00:08<00:24, 3820.61ex/s]

 26%|██▌       | 32644/124700 [00:08<00:23, 3878.09ex/s]

 26%|██▋       | 33035/124700 [00:08<00:24, 3678.80ex/s]

 27%|██▋       | 33424/124700 [00:08<00:24, 3736.51ex/s]

 27%|██▋       | 33801/124700 [00:09<00:39, 2326.66ex/s]

 27%|██▋       | 34173/124700 [00:09<00:34, 2607.79ex/s]

 28%|██▊       | 34574/124700 [00:09<00:30, 2920.78ex/s]

 28%|██▊       | 34944/124700 [00:09<00:28, 3108.83ex/s]

 28%|██▊       | 35297/124700 [00:09<00:29, 3052.89ex/s]

 29%|██▊       | 35682/124700 [00:09<00:27, 3257.61ex/s]

 29%|██▉       | 36032/124700 [00:09<00:27, 3261.12ex/s]

 29%|██▉       | 36449/124700 [00:09<00:25, 3507.62ex/s]

 30%|██▉       | 36814/124700 [00:09<00:26, 3302.11ex/s]

 30%|██▉       | 37186/124700 [00:09<00:25, 3415.49ex/s]

 30%|███       | 37672/124700 [00:10<00:22, 3819.23ex/s]

 31%|███       | 38064/124700 [00:10<00:23, 3701.54ex/s]

 31%|███       | 38506/124700 [00:10<00:22, 3902.61ex/s]

 31%|███       | 38962/124700 [00:10<00:20, 4090.84ex/s]

 32%|███▏      | 39377/124700 [00:10<00:21, 3967.23ex/s]

 32%|███▏      | 39797/124700 [00:10<00:21, 4031.50ex/s]

 32%|███▏      | 40204/124700 [00:10<00:22, 3811.80ex/s]

 33%|███▎      | 40633/124700 [00:10<00:21, 3944.92ex/s]

 33%|███▎      | 41032/124700 [00:10<00:23, 3581.37ex/s]

 33%|███▎      | 41433/124700 [00:11<00:22, 3693.66ex/s]

 34%|███▎      | 41886/124700 [00:11<00:21, 3925.10ex/s]

 34%|███▍      | 42286/124700 [00:11<00:21, 3812.55ex/s]

 34%|███▍      | 42684/124700 [00:11<00:21, 3859.08ex/s]

 35%|███▍      | 43074/124700 [00:11<00:22, 3690.51ex/s]

 35%|███▍      | 43501/124700 [00:11<00:21, 3852.09ex/s]

 35%|███▌      | 43946/124700 [00:11<00:20, 4021.59ex/s]

 36%|███▌      | 44352/124700 [00:11<00:20, 3976.61ex/s]

 36%|███▌      | 44786/124700 [00:11<00:19, 4080.43ex/s]

 36%|███▌      | 45197/124700 [00:12<00:20, 3909.54ex/s]

 37%|███▋      | 45629/124700 [00:12<00:19, 4023.99ex/s]

 37%|███▋      | 46034/124700 [00:12<00:20, 3920.68ex/s]

 37%|███▋      | 46484/124700 [00:12<00:19, 4084.29ex/s]

 38%|███▊      | 46939/124700 [00:12<00:18, 4217.28ex/s]

 38%|███▊      | 47363/124700 [00:12<00:18, 4089.21ex/s]

 38%|███▊      | 47797/124700 [00:12<00:18, 4161.26ex/s]

 39%|███▊      | 48215/124700 [00:12<00:19, 3983.17ex/s]

 39%|███▉      | 48616/124700 [00:12<00:19, 3967.49ex/s]

 39%|███▉      | 49015/124700 [00:13<00:24, 3109.00ex/s]

 40%|███▉      | 49439/124700 [00:13<00:22, 3383.37ex/s]

 40%|███▉      | 49804/124700 [00:13<00:21, 3447.27ex/s]

 40%|████      | 50169/124700 [00:13<00:22, 3380.26ex/s]

 41%|████      | 50610/124700 [00:13<00:20, 3657.73ex/s]

 41%|████      | 51000/124700 [00:13<00:20, 3611.40ex/s]

 41%|████      | 51424/124700 [00:13<00:19, 3786.08ex/s]

 42%|████▏     | 51811/124700 [00:13<00:19, 3734.27ex/s]

 42%|████▏     | 52190/124700 [00:13<00:21, 3405.87ex/s]

 42%|████▏     | 52659/124700 [00:14<00:19, 3750.32ex/s]

 43%|████▎     | 53044/124700 [00:14<00:19, 3689.41ex/s]

 43%|████▎     | 53466/124700 [00:14<00:18, 3837.04ex/s]

 43%|████▎     | 53888/124700 [00:14<00:17, 3944.95ex/s]

 44%|████▎     | 54288/124700 [00:14<00:18, 3749.80ex/s]

 44%|████▍     | 54671/124700 [00:14<00:18, 3771.94ex/s]

 44%|████▍     | 55052/124700 [00:14<00:20, 3442.70ex/s]

 44%|████▍     | 55404/124700 [00:14<00:21, 3219.86ex/s]

 45%|████▍     | 55833/124700 [00:14<00:19, 3501.52ex/s]

 45%|████▌     | 56192/124700 [00:15<00:20, 3396.33ex/s]

 45%|████▌     | 56608/124700 [00:15<00:18, 3604.57ex/s]

 46%|████▌     | 57000/124700 [00:15<00:18, 3571.85ex/s]

 46%|████▌     | 57463/124700 [00:15<00:17, 3865.54ex/s]

 46%|████▋     | 57913/124700 [00:15<00:16, 4045.13ex/s]

 47%|████▋     | 58322/124700 [00:15<00:16, 3984.89ex/s]

 47%|████▋     | 58724/124700 [00:15<00:16, 3974.51ex/s]

 47%|████▋     | 59124/124700 [00:15<00:18, 3500.87ex/s]

 48%|████▊     | 59516/124700 [00:15<00:18, 3612.88ex/s]

 48%|████▊     | 59962/124700 [00:16<00:16, 3846.90ex/s]

 48%|████▊     | 60356/124700 [00:16<00:20, 3213.37ex/s]

 49%|████▊     | 60777/124700 [00:16<00:18, 3461.75ex/s]

 49%|████▉     | 61170/124700 [00:16<00:17, 3584.02ex/s]

 49%|████▉     | 61623/124700 [00:16<00:16, 3842.02ex/s]

 50%|████▉     | 62021/124700 [00:16<00:16, 3875.61ex/s]

 50%|█████     | 62489/124700 [00:16<00:15, 4104.71ex/s]

 50%|█████     | 62973/124700 [00:16<00:14, 4317.79ex/s]

 51%|█████     | 63412/124700 [00:16<00:14, 4177.77ex/s]

 51%|█████     | 63835/124700 [00:17<00:14, 4114.57ex/s]

 52%|█████▏    | 64251/124700 [00:17<00:15, 4007.93ex/s]

 52%|█████▏    | 64655/124700 [00:17<00:16, 3719.43ex/s]

 52%|█████▏    | 65033/124700 [00:17<00:17, 3325.07ex/s]

 52%|█████▏    | 65467/124700 [00:17<00:16, 3585.61ex/s]

 53%|█████▎    | 65913/124700 [00:17<00:15, 3820.12ex/s]

 53%|█████▎    | 66306/124700 [00:17<00:15, 3690.16ex/s]

 54%|█████▎    | 66725/124700 [00:17<00:15, 3827.53ex/s]

 54%|█████▍    | 67115/124700 [00:17<00:15, 3696.33ex/s]

 54%|█████▍    | 67544/124700 [00:18<00:14, 3860.49ex/s]

 55%|█████▍    | 68000/124700 [00:18<00:14, 3887.94ex/s]

 55%|█████▍    | 68453/124700 [00:18<00:13, 4065.86ex/s]

 55%|█████▌    | 68928/124700 [00:18<00:13, 4259.44ex/s]

 56%|█████▌    | 69358/124700 [00:18<00:13, 4060.20ex/s]

 56%|█████▌    | 69837/124700 [00:18<00:12, 4264.80ex/s]

 56%|█████▋    | 70268/124700 [00:18<00:12, 4187.40ex/s]

 57%|█████▋    | 70695/124700 [00:18<00:12, 4210.38ex/s]

 57%|█████▋    | 71119/124700 [00:18<00:13, 4064.73ex/s]

 57%|█████▋    | 71591/124700 [00:18<00:12, 4250.24ex/s]

 58%|█████▊    | 72019/124700 [00:19<00:12, 4163.71ex/s]

 58%|█████▊    | 72500/124700 [00:19<00:12, 4349.08ex/s]

 59%|█████▊    | 72971/124700 [00:19<00:11, 4453.45ex/s]

 59%|█████▉    | 73419/124700 [00:19<00:11, 4389.80ex/s]

 59%|█████▉    | 73899/124700 [00:19<00:11, 4506.19ex/s]

 60%|█████▉    | 74351/124700 [00:19<00:11, 4361.34ex/s]

 60%|█████▉    | 74793/124700 [00:19<00:11, 4377.25ex/s]

 60%|██████    | 75232/124700 [00:19<00:11, 4203.47ex/s]

 61%|██████    | 75711/124700 [00:19<00:11, 4369.04ex/s]

 61%|██████    | 76151/124700 [00:20<00:11, 4331.23ex/s]

 61%|██████▏   | 76616/124700 [00:20<00:10, 4421.96ex/s]

 62%|██████▏   | 77060/124700 [00:20<00:11, 4252.63ex/s]

 62%|██████▏   | 77532/124700 [00:20<00:10, 4386.43ex/s]

 63%|██████▎   | 77973/124700 [00:20<00:10, 4386.31ex/s]

 63%|██████▎   | 78414/124700 [00:20<00:10, 4244.48ex/s]

 63%|██████▎   | 78851/124700 [00:20<00:10, 4269.50ex/s]

 64%|██████▎   | 79280/124700 [00:20<00:10, 4192.64ex/s]

 64%|██████▍   | 79742/124700 [00:20<00:10, 4314.98ex/s]

 64%|██████▍   | 80175/124700 [00:20<00:10, 4207.95ex/s]

 65%|██████▍   | 80644/124700 [00:21<00:10, 4347.34ex/s]

 65%|██████▌   | 81081/124700 [00:21<00:10, 4232.73ex/s]

 65%|██████▌   | 81522/124700 [00:21<00:10, 4282.39ex/s]

 66%|██████▌   | 81963/124700 [00:21<00:09, 4318.08ex/s]

 66%|██████▌   | 82396/124700 [00:21<00:10, 3899.89ex/s]

 66%|██████▋   | 82865/124700 [00:21<00:10, 4116.98ex/s]

 67%|██████▋   | 83285/124700 [00:21<00:10, 4098.53ex/s]

 67%|██████▋   | 83756/124700 [00:21<00:09, 4269.93ex/s]

 68%|██████▊   | 84188/124700 [00:21<00:10, 4036.90ex/s]

 68%|██████▊   | 84603/124700 [00:22<00:09, 4067.52ex/s]

 68%|██████▊   | 85014/124700 [00:22<00:09, 4062.90ex/s]

 69%|██████▊   | 85460/124700 [00:22<00:09, 4176.06ex/s]

 69%|██████▉   | 85955/124700 [00:22<00:08, 4400.57ex/s]

 69%|██████▉   | 86398/124700 [00:22<00:09, 4196.61ex/s]

 70%|██████▉   | 86844/124700 [00:22<00:08, 4270.47ex/s]

 70%|██████▉   | 87274/124700 [00:22<00:10, 3707.21ex/s]

 70%|███████   | 87660/124700 [00:22<00:10, 3679.16ex/s]

 71%|███████   | 88038/124700 [00:22<00:09, 3687.38ex/s]

 71%|███████   | 88486/124700 [00:23<00:09, 3905.84ex/s]

 71%|███████▏  | 88926/124700 [00:23<00:08, 4044.36ex/s]

 72%|███████▏  | 89336/124700 [00:23<00:08, 4019.67ex/s]

 72%|███████▏  | 89742/124700 [00:23<00:09, 3612.82ex/s]

 72%|███████▏  | 90113/124700 [00:23<00:10, 3440.11ex/s]

 73%|███████▎  | 90545/124700 [00:23<00:09, 3674.99ex/s]

 73%|███████▎  | 91000/124700 [00:23<00:08, 3752.29ex/s]

 73%|███████▎  | 91506/124700 [00:23<00:08, 4110.34ex/s]

 74%|███████▍  | 91982/124700 [00:23<00:07, 4290.99ex/s]

 74%|███████▍  | 92418/124700 [00:23<00:07, 4304.55ex/s]

 75%|███████▍  | 92918/124700 [00:24<00:07, 4505.53ex/s]

 75%|███████▍  | 93373/124700 [00:24<00:07, 4248.85ex/s]

 75%|███████▌  | 93854/124700 [00:24<00:07, 4405.43ex/s]

 76%|███████▌  | 94300/124700 [00:24<00:07, 4223.92ex/s]

 76%|███████▌  | 94728/124700 [00:24<00:07, 4239.16ex/s]

 76%|███████▋  | 95156/124700 [00:24<00:07, 4019.13ex/s]

 77%|███████▋  | 95601/124700 [00:24<00:07, 4137.83ex/s]

 77%|███████▋  | 96019/124700 [00:24<00:07, 3872.59ex/s]

 77%|███████▋  | 96428/124700 [00:24<00:07, 3932.19ex/s]

 78%|███████▊  | 96851/124700 [00:25<00:06, 4013.67ex/s]

 78%|███████▊  | 97256/124700 [00:25<00:07, 3833.09ex/s]

 78%|███████▊  | 97655/124700 [00:25<00:06, 3873.99ex/s]

 79%|███████▊  | 98046/124700 [00:25<00:07, 3600.19ex/s]

 79%|███████▉  | 98433/124700 [00:25<00:07, 3673.62ex/s]

 79%|███████▉  | 98838/124700 [00:25<00:06, 3778.85ex/s]

 80%|███████▉  | 99220/124700 [00:25<00:07, 3587.29ex/s]

 80%|███████▉  | 99663/124700 [00:25<00:06, 3821.69ex/s]

 80%|████████  | 100050/124700 [00:25<00:06, 3785.60ex/s]

 81%|████████  | 100487/124700 [00:26<00:06, 3952.80ex/s]

 81%|████████  | 100886/124700 [00:26<00:09, 2504.91ex/s]

 81%|████████  | 101205/124700 [00:26<00:09, 2557.32ex/s]

 81%|████████▏ | 101588/124700 [00:26<00:08, 2839.45ex/s]

 82%|████████▏ | 102000/124700 [00:26<00:07, 3048.35ex/s]

 82%|████████▏ | 102448/124700 [00:26<00:06, 3405.70ex/s]

 82%|████████▏ | 102843/124700 [00:26<00:06, 3547.57ex/s]

 83%|████████▎ | 103222/124700 [00:26<00:06, 3466.45ex/s]

 83%|████████▎ | 103614/124700 [00:27<00:05, 3589.63ex/s]

 83%|████████▎ | 104000/124700 [00:27<00:05, 3495.98ex/s]

 84%|████████▍ | 104447/124700 [00:27<00:05, 3764.29ex/s]

 84%|████████▍ | 104911/124700 [00:27<00:04, 4008.79ex/s]

 84%|████████▍ | 105320/124700 [00:27<00:05, 3798.98ex/s]

 85%|████████▍ | 105726/124700 [00:27<00:04, 3871.69ex/s]

 85%|████████▌ | 106119/124700 [00:27<00:05, 3677.31ex/s]

 85%|████████▌ | 106524/124700 [00:27<00:04, 3778.36ex/s]

 86%|████████▌ | 106992/124700 [00:27<00:04, 4033.24ex/s]

 86%|████████▌ | 107400/124700 [00:28<00:04, 3826.61ex/s]

 86%|████████▋ | 107836/124700 [00:28<00:04, 3974.59ex/s]

 87%|████████▋ | 108238/124700 [00:28<00:04, 3850.59ex/s]

 87%|████████▋ | 108638/124700 [00:28<00:04, 3890.99ex/s]

 87%|████████▋ | 109030/124700 [00:28<00:04, 3558.18ex/s]

 88%|████████▊ | 109448/124700 [00:28<00:04, 3725.63ex/s]

 88%|████████▊ | 109847/124700 [00:28<00:03, 3798.23ex/s]

 88%|████████▊ | 110232/124700 [00:28<00:04, 3349.98ex/s]

 89%|████████▊ | 110652/124700 [00:28<00:03, 3568.22ex/s]

 89%|████████▉ | 111021/124700 [00:29<00:04, 3301.95ex/s]

 89%|████████▉ | 111414/124700 [00:29<00:03, 3466.34ex/s]

 90%|████████▉ | 111771/124700 [00:29<00:03, 3327.14ex/s]

 90%|████████▉ | 112141/124700 [00:29<00:03, 3427.78ex/s]

 90%|█████████ | 112570/124700 [00:29<00:03, 3668.05ex/s]

 91%|█████████ | 112984/124700 [00:29<00:03, 3801.25ex/s]

 91%|█████████ | 113370/124700 [00:29<00:03, 3688.63ex/s]

 91%|█████████ | 113786/124700 [00:29<00:02, 3821.19ex/s]

 92%|█████████▏| 114172/124700 [00:29<00:03, 3465.64ex/s]

 92%|█████████▏| 114527/124700 [00:30<00:03, 3173.79ex/s]

 92%|█████████▏| 114985/124700 [00:30<00:02, 3540.73ex/s]

 93%|█████████▎| 115351/124700 [00:30<00:02, 3503.72ex/s]

 93%|█████████▎| 115768/124700 [00:30<00:02, 3686.56ex/s]

 93%|█████████▎| 116144/124700 [00:30<00:02, 3654.08ex/s]

 93%|█████████▎| 116570/124700 [00:30<00:02, 3825.44ex/s]

 94%|█████████▍| 116998/124700 [00:30<00:01, 3955.32ex/s]

 94%|█████████▍| 117398/124700 [00:30<00:01, 3912.16ex/s]

 94%|█████████▍| 117812/124700 [00:30<00:01, 3976.67ex/s]

 95%|█████████▍| 118212/124700 [00:31<00:01, 3963.64ex/s]

 95%|█████████▌| 118664/124700 [00:31<00:01, 4126.80ex/s]

 95%|█████████▌| 119078/124700 [00:31<00:01, 3859.68ex/s]

 96%|█████████▌| 119469/124700 [00:31<00:01, 3846.42ex/s]

 96%|█████████▌| 119861/124700 [00:31<00:01, 3865.71ex/s]

 96%|█████████▋| 120250/124700 [00:31<00:01, 3774.56ex/s]

 97%|█████████▋| 120645/124700 [00:31<00:01, 3824.64ex/s]

 97%|█████████▋| 121029/124700 [00:31<00:01, 3549.26ex/s]

 97%|█████████▋| 121389/124700 [00:31<00:00, 3488.11ex/s]

 98%|█████████▊| 121802/124700 [00:31<00:00, 3666.50ex/s]

 98%|█████████▊| 122172/124700 [00:32<00:00, 3367.22ex/s]

 98%|█████████▊| 122529/124700 [00:32<00:00, 3421.02ex/s]

 99%|█████████▊| 122973/124700 [00:32<00:00, 3704.97ex/s]

 99%|█████████▉| 123349/124700 [00:32<00:00, 3667.08ex/s]

 99%|█████████▉| 123770/124700 [00:32<00:00, 3820.01ex/s]

100%|█████████▉| 124156/124700 [00:32<00:00, 3551.83ex/s]

100%|█████████▉| 124547/124700 [00:32<00:00, 3650.99ex/s]

100%|██████████| 124700/124700 [00:32<00:00, 3801.22ex/s]




  0%|          | 0/30775 [00:00<?, ?ex/s]

  1%|▏         | 456/30775 [00:00<00:06, 4559.32ex/s]

  3%|▎         | 912/30775 [00:00<00:06, 4494.98ex/s]

  4%|▍         | 1362/30775 [00:00<00:07, 3993.75ex/s]

  6%|▌         | 1827/30775 [00:00<00:06, 4232.25ex/s]

  7%|▋         | 2256/30775 [00:00<00:06, 4078.13ex/s]

  9%|▊         | 2668/30775 [00:00<00:07, 3845.32ex/s]

 10%|▉         | 3057/30775 [00:00<00:07, 3772.71ex/s]

 11%|█▏        | 3490/30775 [00:00<00:06, 3935.38ex/s]

 13%|█▎        | 3940/30775 [00:00<00:06, 4101.07ex/s]

 14%|█▍        | 4353/30775 [00:01<00:06, 4023.82ex/s]

 15%|█▌        | 4759/30775 [00:01<00:06, 4033.70ex/s]

 17%|█▋        | 5164/30775 [00:01<00:07, 3312.93ex/s]

 18%|█▊        | 5517/30775 [00:01<00:07, 3363.57ex/s]

 19%|█▉        | 5896/30775 [00:01<00:07, 3477.52ex/s]

 20%|██        | 6257/30775 [00:01<00:07, 3441.26ex/s]

 22%|██▏       | 6700/30775 [00:01<00:06, 3717.15ex/s]

 23%|██▎       | 7080/30775 [00:01<00:06, 3629.50ex/s]

 24%|██▍       | 7470/30775 [00:01<00:06, 3703.90ex/s]

 26%|██▌       | 7875/30775 [00:02<00:06, 3802.21ex/s]

 27%|██▋       | 8259/30775 [00:02<00:06, 3668.33ex/s]

 28%|██▊       | 8651/30775 [00:02<00:05, 3739.63ex/s]

 29%|██▉       | 9028/30775 [00:02<00:05, 3686.83ex/s]

 31%|███       | 9481/30775 [00:02<00:05, 3930.77ex/s]

 32%|███▏      | 9892/30775 [00:02<00:05, 3981.20ex/s]

 33%|███▎      | 10292/30775 [00:02<00:05, 3939.49ex/s]

 35%|███▍      | 10762/30775 [00:02<00:04, 4161.88ex/s]

 36%|███▋      | 11180/30775 [00:02<00:04, 3968.73ex/s]

 38%|███▊      | 11656/30775 [00:03<00:04, 4194.21ex/s]

 39%|███▉      | 12079/30775 [00:03<00:04, 4129.57ex/s]

 41%|████      | 12520/30775 [00:03<00:04, 4210.07ex/s]

 42%|████▏     | 13000/30775 [00:03<00:04, 4252.26ex/s]

 44%|████▎     | 13427/30775 [00:03<00:04, 4176.95ex/s]

 45%|████▌     | 13850/30775 [00:03<00:04, 4184.01ex/s]

 46%|████▋     | 14269/30775 [00:03<00:04, 3996.82ex/s]

 48%|████▊     | 14708/30775 [00:03<00:03, 4106.49ex/s]

 49%|████▉     | 15121/30775 [00:03<00:04, 3892.14ex/s]

 50%|█████     | 15514/30775 [00:03<00:04, 3738.52ex/s]

 52%|█████▏    | 15891/30775 [00:04<00:04, 3610.91ex/s]

 53%|█████▎    | 16255/30775 [00:04<00:04, 3319.57ex/s]

 54%|█████▍    | 16612/30775 [00:04<00:04, 3383.79ex/s]

 55%|█████▌    | 17000/30775 [00:04<00:04, 3307.00ex/s]

 57%|█████▋    | 17461/30775 [00:04<00:03, 3658.23ex/s]

 58%|█████▊    | 17872/30775 [00:04<00:03, 3783.54ex/s]

 59%|█████▉    | 18293/30775 [00:04<00:03, 3902.61ex/s]

 61%|██████    | 18754/30775 [00:04<00:02, 4105.55ex/s]

 62%|██████▏   | 19169/30775 [00:04<00:03, 3841.88ex/s]

 64%|██████▎   | 19559/30775 [00:05<00:03, 3481.35ex/s]

 65%|██████▍   | 19990/30775 [00:05<00:02, 3698.65ex/s]

 66%|██████▌   | 20371/30775 [00:05<00:02, 3728.48ex/s]

 68%|██████▊   | 20802/30775 [00:05<00:02, 3890.58ex/s]

 69%|██████▉   | 21197/30775 [00:05<00:02, 3769.84ex/s]

 70%|███████   | 21634/30775 [00:05<00:02, 3938.23ex/s]

 72%|███████▏  | 22032/30775 [00:05<00:02, 3787.39ex/s]

 73%|███████▎  | 22500/30775 [00:05<00:02, 4038.95ex/s]

 75%|███████▍  | 22950/30775 [00:05<00:01, 4169.86ex/s]

 76%|███████▌  | 23371/30775 [00:06<00:01, 4047.46ex/s]

 77%|███████▋  | 23810/30775 [00:06<00:01, 4144.70ex/s]

 79%|███████▊  | 24227/30775 [00:06<00:01, 4041.78ex/s]

 80%|████████  | 24681/30775 [00:06<00:01, 4181.48ex/s]

 82%|████████▏ | 25102/30775 [00:06<00:01, 4043.46ex/s]

 83%|████████▎ | 25509/30775 [00:06<00:01, 4032.27ex/s]

 84%|████████▍ | 25914/30775 [00:06<00:01, 4002.09ex/s]

 86%|████████▌ | 26316/30775 [00:06<00:01, 3811.99ex/s]

 87%|████████▋ | 26700/30775 [00:06<00:01, 3803.22ex/s]

 88%|████████▊ | 27082/30775 [00:07<00:01, 3679.31ex/s]

 89%|████████▉ | 27452/30775 [00:07<00:00, 3528.72ex/s]

 91%|█████████ | 27869/30775 [00:07<00:00, 3707.10ex/s]

 92%|█████████▏| 28243/30775 [00:07<00:00, 3672.57ex/s]

 93%|█████████▎| 28702/30775 [00:07<00:00, 3934.32ex/s]

 95%|█████████▍| 29098/30775 [00:07<00:00, 3707.87ex/s]

 96%|█████████▌| 29513/30775 [00:07<00:00, 3831.23ex/s]

 97%|█████████▋| 29925/30775 [00:07<00:00, 3911.84ex/s]

 99%|█████████▊| 30320/30775 [00:07<00:00, 3698.84ex/s]

100%|█████████▉| 30750/30775 [00:07<00:00, 3866.53ex/s]

100%|██████████| 30775/30775 [00:07<00:00, 3851.67ex/s]




In [16]:
len(train_dataset), len(eval_dataset)

(124700, 30775)

In [17]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] Target 2. 흡수가 잘되는 영국산 순수 비타민C 20 % 함유한 리얼 비타민 앰플 [SEP] 영국산 순수 비타민C 20 % # 제품 전체 # 가격 [SEP] 1


[CLS] Target 손상이 많은 머릿결때문에 고민이었는데 아론샵 실크트리트먼트 쓰고는 머릿결 좋아보인다 소리 듣고 다녀요😚 [SEP] 아론샵 실크트리트먼트 # 제품 전체 # 디자인 [SEP] 1


# Load Trainer

In [18]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [19]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [20]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [21]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 124700


  Num Epochs = 10


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 128


  Gradient Accumulation steps = 1


  Total optimization steps = 9750


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.0788,0.078516,0.973777,0.638279,0.986396,0.812337,0.973777
2,0.0575,0.071502,0.97472,0.667805,0.98686,0.827333,0.97472
3,0.0373,0.075339,0.975695,0.688073,0.987355,0.837714,0.975695
4,0.0372,0.088638,0.975727,0.686266,0.987375,0.836821,0.975727
5,0.0255,0.0997,0.974785,0.681445,0.986873,0.834159,0.974785
6,0.0159,0.112664,0.974362,0.673292,0.986658,0.829975,0.974362
7,0.0113,0.137678,0.97472,0.675563,0.986847,0.831205,0.97472
8,0.0045,0.162438,0.974037,0.678471,0.986473,0.832472,0.974037
9,0.0046,0.162588,0.975532,0.685332,0.987271,0.836302,0.975532
10,0.0027,0.180984,0.974427,0.678644,0.986684,0.832664,0.974427


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 30775


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-975


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-975/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-975/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-975/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-975/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 30775


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-1950


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-1950/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-1950/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-1950/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-1950/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 30775


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-2925


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-2925/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-2925/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-2925/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-2925/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-975] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 30775


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-3900


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-3900/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-3900/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-3900/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-3900/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-1950] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 30775


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-4875


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-4875/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-4875/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-4875/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-4875/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-3900] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 30775


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-5850


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-5850/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-5850/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-5850/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-5850/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-4875] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 30775


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-6825


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-6825/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-6825/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-6825/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-6825/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-5850] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 30775


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-7800


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-7800/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-7800/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-7800/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-7800/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-6825] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 30775


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-8775


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-8775/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-8775/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-8775/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-8775/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-7800] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 30775


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-9750


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-9750/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-9750/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-9750/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-9750/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-8775] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from monologg_koelectra_base_v3_discriminator_uncleaned_v11/checkpoint-2925 (score: 0.8377140031714283).


Saving model checkpoint to /tmp/tmp1cc3kpm7


Configuration saved in /tmp/tmp1cc3kpm7/config.json


Model weights saved in /tmp/tmp1cc3kpm7/pytorch_model.bin


tokenizer config file saved in /tmp/tmp1cc3kpm7/tokenizer_config.json


Special tokens file saved in /tmp/tmp1cc3kpm7/special_tokens_map.json


0,1
eval/accuracy,▁▄██▅▃▄▂▇▃
eval/f1_false,▁▄██▄▃▄▂▇▃
eval/f1_macro,▁▅██▇▆▆▇█▇
eval/f1_micro,▁▄██▅▃▄▂▇▃
eval/f1_true,▁▅██▇▆▆▇█▇
eval/loss,▁▁▁▂▃▄▅▇▇█
eval/runtime,▂▆▄█▅▇▁█▇▅
eval/samples_per_second,▇▃▅▁▄▂█▁▂▄
eval/steps_per_second,▇▃▅▁▄▂█▁▂▄
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/accuracy,0.97443
eval/f1_false,0.98668
eval/f1_macro,0.83266
eval/f1_micro,0.97443
eval/f1_true,0.67864
eval/loss,0.18098
eval/runtime,140.3657
eval/samples_per_second,219.249
eval/steps_per_second,1.717
train/epoch,10.0


In [22]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
