# Description


# Modules and Global Variables

In [1]:
from transformers import (
    ElectraTokenizerFast, ElectraForSequenceClassification, 
    DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

import torch
import wandb

import datasets
import evaluate

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v13'

DATA_V = 'uncleaned_v13'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'monologg/koelectra-base-v3-discriminator'

notebook_name = 'trainer_for_acd_binary.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/monologg_koelectra_base_v3_discriminator_uncleaned_v13/acd exists.
./trainer_for_acd_binary.ipynb exists.
./dataset/uncleaned_v13/ce_train.csv exists.
./dataset/uncleaned_v13/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 20
batch_size = 32
gradient_accumulation_steps = 1

optim = 'adamw_hf' # 'adamw_torch'

learning_rate = 3e-6 / 8 * batch_size * NGPU # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'linear'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='f1_macro'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 100

print(learning_rate)

4.8e-05


# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./trainer_for_acd_binary.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = ElectraTokenizerFast.from_pretrained(model_checkpoint)
model = ElectraForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = f'./dataset/{DATA_V}/raw_train.csv'
dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
test_path = f'./dataset/{DATA_V}/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

print(len(tokenizer))
tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form]).to_frame().drop_duplicates()
tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
new_tokenizer = ElectraTokenizerFast.from_pretrained(model_checkpoint)
new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
new_tokens = set(list(new_tokenizer.vocab.keys())) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(new_tokenizer))
print(len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

35000





3060
35212


Embedding(35212, 768)

In [10]:
print(len(new_tokens))
print(new_tokens)

212
{'##🌝', '##🖒', '##읒', '😡', '죱', 'ᴡ', '##🧚', '♪', '👌', '꺠', '쨕', '##쫜', 'ᴏ', '옄', '💬', '##ᴇ', '🐱', '##ɴ', '##🚗', '귯', '쏨', '💝', '😺', 'ɢ', '##띡', 'ᴘ', '곘', '🙃', '닠', '˃', '쫜', '🍼', '💇', '##®', '🌝', '💞', '##ᴡ', 'ෆ', '🚗', '##잍', '##💝', '뿤', '🔸', '##펏', '##👨', '##ᴘ', '챦', '웻', '##ᴏ', '##ˇ', '##젔', '##➰', '㉦', '🤡', '##쵝', '🎀', '##👠', 'ʀ', '🧚', '🖐', '🍎', '##곘', '뜌', 'ㅖ', 'ˇ', '⏰', '##죱', '➕', '##ɢ', '횽', '챳', '##👆', '🕺', 'ꈍ', '##ꈍ', '##💆', '🐄', '🕸', '🤟', '💧', '🥤', '##🕸', '◍', '##🍎', '띡', '똭', '##➕', '##💇', '👨', '##듕', '##🤮', '🕷', '##옄', '##☝', '##닠', 'ɪ', '##ʀ', '̵', '🎂', '👠', '##🤘', 'ʜ', '퐉', '##˚', '##😲', '💄', '##🤡', '##🎵', '♩', '##앝', '##💄', '##◍', '👦', '♬', '˂', 'ᴠ', '📸', '##💞', '##˂', '##💋', '◡', 'ღ', '##̵', '💡', '읒', '##😶', '👆', '##ᴍ', '##촥', '☝', '##ᴜ', '💯', 'ᴗ', '듕', '##❔', '##ᴀ', '##♪', '🤘', '##◡', '잍', '젔', '##🎂', '핡', '🌹', '##츌', '##🌹', '##귯', '😲', '𖤐', 'ᵕ', '˚', '😮', '##ɪ', '😴', '졓', '⁉', '💋', '##♩', '##💧', '##👌', '##쨕', '##㉦', '❔', '🐥', '밪', '##♬', '##😬', '##ᴛ', '͈', 'ᴇ', 'ᴛ'

In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

# Define Metric

In [12]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [13]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [14]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [15]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/75000 [00:00<?, ?ex/s]

  1%|          | 472/75000 [00:00<00:15, 4710.06ex/s]

  1%|▏         | 944/75000 [00:00<00:15, 4669.01ex/s]

  2%|▏         | 1411/75000 [00:00<00:17, 4187.38ex/s]

  2%|▏         | 1874/75000 [00:00<00:16, 4347.98ex/s]

  3%|▎         | 2314/75000 [00:00<00:16, 4323.98ex/s]

  4%|▎         | 2770/75000 [00:00<00:16, 4400.26ex/s]

  4%|▍         | 3213/75000 [00:00<00:18, 3792.11ex/s]

  5%|▍         | 3728/75000 [00:00<00:17, 4172.89ex/s]

  6%|▌         | 4160/75000 [00:00<00:17, 4074.44ex/s]

  6%|▌         | 4601/75000 [00:01<00:16, 4165.82ex/s]

  7%|▋         | 5026/75000 [00:01<00:16, 4116.45ex/s]

  7%|▋         | 5556/75000 [00:01<00:15, 4456.45ex/s]

  8%|▊         | 6008/75000 [00:01<00:15, 4369.60ex/s]

  9%|▊         | 6520/75000 [00:01<00:14, 4585.34ex/s]

  9%|▉         | 6983/75000 [00:01<00:14, 4581.03ex/s]

 10%|▉         | 7444/75000 [00:01<00:15, 4311.60ex/s]

 11%|█         | 7959/75000 [00:01<00:14, 4546.39ex/s]

 11%|█         | 8419/75000 [00:01<00:14, 4439.47ex/s]

 12%|█▏        | 8946/75000 [00:02<00:14, 4674.85ex/s]

 13%|█▎        | 9418/75000 [00:02<00:14, 4403.93ex/s]

 13%|█▎        | 9864/75000 [00:02<00:15, 4231.67ex/s]

 14%|█▎        | 10292/75000 [00:02<00:15, 4129.71ex/s]

 14%|█▍        | 10853/75000 [00:02<00:14, 4538.76ex/s]

 15%|█▌        | 11313/75000 [00:02<00:14, 4427.55ex/s]

 16%|█▌        | 11760/75000 [00:02<00:14, 4403.57ex/s]

 16%|█▋        | 12203/75000 [00:02<00:14, 4228.82ex/s]

 17%|█▋        | 12629/75000 [00:02<00:14, 4190.64ex/s]

 17%|█▋        | 13050/75000 [00:03<00:15, 3900.68ex/s]

 18%|█▊        | 13489/75000 [00:03<00:15, 4033.30ex/s]

 19%|█▊        | 13931/75000 [00:03<00:14, 4141.61ex/s]

 19%|█▉        | 14349/75000 [00:03<00:15, 3791.09ex/s]

 20%|█▉        | 14833/75000 [00:03<00:14, 4074.35ex/s]

 20%|██        | 15249/75000 [00:03<00:15, 3960.34ex/s]

 21%|██        | 15671/75000 [00:03<00:14, 4029.77ex/s]

 21%|██▏       | 16079/75000 [00:03<00:14, 3991.35ex/s]

 22%|██▏       | 16500/75000 [00:03<00:14, 4052.20ex/s]

 23%|██▎       | 16956/75000 [00:04<00:13, 4197.82ex/s]

 23%|██▎       | 17378/75000 [00:04<00:13, 4196.79ex/s]

 24%|██▍       | 17844/75000 [00:04<00:13, 4331.11ex/s]

 24%|██▍       | 18279/75000 [00:04<00:13, 4279.98ex/s]

 25%|██▍       | 18723/75000 [00:04<00:13, 4326.49ex/s]

 26%|██▌       | 19157/75000 [00:04<00:12, 4300.70ex/s]

 26%|██▌       | 19639/75000 [00:04<00:12, 4452.79ex/s]

 27%|██▋       | 20085/75000 [00:04<00:12, 4354.49ex/s]

 27%|██▋       | 20595/75000 [00:04<00:11, 4571.30ex/s]

 28%|██▊       | 21054/75000 [00:04<00:12, 4269.62ex/s]

 29%|██▊       | 21486/75000 [00:05<00:12, 4224.05ex/s]

 29%|██▉       | 21912/75000 [00:05<00:13, 4038.25ex/s]

 30%|██▉       | 22319/75000 [00:05<00:14, 3721.66ex/s]

 30%|███       | 22737/75000 [00:05<00:13, 3844.02ex/s]

 31%|███       | 23149/75000 [00:05<00:13, 3919.10ex/s]

 32%|███▏      | 23633/75000 [00:05<00:12, 4178.72ex/s]

 32%|███▏      | 24059/75000 [00:05<00:12, 4200.28ex/s]

 33%|███▎      | 24548/75000 [00:05<00:11, 4400.67ex/s]

 33%|███▎      | 25000/75000 [00:05<00:11, 4209.21ex/s]

 34%|███▍      | 25514/75000 [00:06<00:11, 4473.73ex/s]

 35%|███▍      | 26000/75000 [00:06<00:11, 4413.63ex/s]

 35%|███▌      | 26471/75000 [00:06<00:10, 4496.56ex/s]

 36%|███▌      | 26968/75000 [00:06<00:10, 4632.10ex/s]

 37%|███▋      | 27434/75000 [00:06<00:10, 4401.26ex/s]

 37%|███▋      | 27878/75000 [00:06<00:11, 4072.24ex/s]

 38%|███▊      | 28350/75000 [00:06<00:10, 4247.56ex/s]

 39%|███▊      | 28907/75000 [00:06<00:09, 4616.29ex/s]

 39%|███▉      | 29376/75000 [00:06<00:09, 4634.13ex/s]

 40%|███▉      | 29845/75000 [00:07<00:11, 3910.23ex/s]

 40%|████      | 30258/75000 [00:07<00:11, 3930.47ex/s]

 41%|████      | 30752/75000 [00:07<00:10, 4200.34ex/s]

 42%|████▏     | 31187/75000 [00:07<00:10, 4042.27ex/s]

 42%|████▏     | 31690/75000 [00:07<00:10, 4310.40ex/s]

 43%|████▎     | 32131/75000 [00:07<00:11, 3682.69ex/s]

 43%|████▎     | 32612/75000 [00:07<00:10, 3968.90ex/s]

 44%|████▍     | 33030/75000 [00:07<00:10, 3973.60ex/s]

 45%|████▍     | 33442/75000 [00:08<00:16, 2496.32ex/s]

 45%|████▌     | 33911/75000 [00:08<00:14, 2923.86ex/s]

 46%|████▌     | 34280/75000 [00:08<00:13, 3087.66ex/s]

 46%|████▋     | 34793/75000 [00:08<00:11, 3571.66ex/s]

 47%|████▋     | 35238/75000 [00:08<00:10, 3792.80ex/s]

 48%|████▊     | 35680/75000 [00:08<00:09, 3957.21ex/s]

 48%|████▊     | 36108/75000 [00:08<00:09, 3998.30ex/s]

 49%|████▊     | 36554/75000 [00:08<00:09, 4125.19ex/s]

 49%|████▉     | 37000/75000 [00:08<00:09, 4086.54ex/s]

 50%|█████     | 37511/75000 [00:09<00:08, 4363.69ex/s]

 51%|█████     | 37958/75000 [00:09<00:08, 4215.87ex/s]

 51%|█████     | 38388/75000 [00:09<00:08, 4205.69ex/s]

 52%|█████▏    | 38848/75000 [00:09<00:08, 4314.65ex/s]

 52%|█████▏    | 39284/75000 [00:09<00:08, 4087.26ex/s]

 53%|█████▎    | 39768/75000 [00:09<00:08, 4297.30ex/s]

 54%|█████▎    | 40203/75000 [00:09<00:08, 4217.31ex/s]

 54%|█████▍    | 40682/75000 [00:09<00:07, 4380.13ex/s]

 55%|█████▍    | 41134/75000 [00:09<00:07, 4418.06ex/s]

 56%|█████▌    | 41638/75000 [00:10<00:07, 4598.46ex/s]

 56%|█████▌    | 42100/75000 [00:10<00:07, 4523.30ex/s]

 57%|█████▋    | 42601/75000 [00:10<00:06, 4663.14ex/s]

 57%|█████▋    | 43069/75000 [00:10<00:07, 4494.54ex/s]

 58%|█████▊    | 43549/75000 [00:10<00:06, 4579.84ex/s]

 59%|█████▊    | 44009/75000 [00:10<00:07, 4374.58ex/s]

 59%|█████▉    | 44450/75000 [00:10<00:06, 4372.67ex/s]

 60%|█████▉    | 44890/75000 [00:10<00:07, 3855.72ex/s]

 60%|██████    | 45288/75000 [00:10<00:07, 3780.74ex/s]

 61%|██████    | 45809/75000 [00:11<00:07, 4165.54ex/s]

 62%|██████▏   | 46236/75000 [00:11<00:06, 4157.13ex/s]

 62%|██████▏   | 46707/75000 [00:11<00:06, 4311.22ex/s]

 63%|██████▎   | 47170/75000 [00:11<00:06, 4399.30ex/s]

 64%|██████▎   | 47680/75000 [00:11<00:05, 4603.05ex/s]

 64%|██████▍   | 48144/75000 [00:11<00:06, 4473.28ex/s]

 65%|██████▍   | 48631/75000 [00:11<00:05, 4586.63ex/s]

 65%|██████▌   | 49093/75000 [00:11<00:05, 4445.07ex/s]

 66%|██████▌   | 49573/75000 [00:11<00:05, 4546.22ex/s]

 67%|██████▋   | 50030/75000 [00:11<00:05, 4410.64ex/s]

 67%|██████▋   | 50494/75000 [00:12<00:05, 4474.57ex/s]

 68%|██████▊   | 51009/75000 [00:12<00:05, 4669.01ex/s]

 69%|██████▊   | 51501/75000 [00:12<00:04, 4738.73ex/s]

 69%|██████▉   | 52000/75000 [00:12<00:05, 4300.18ex/s]

 70%|██████▉   | 52487/75000 [00:12<00:05, 4454.81ex/s]

 71%|███████   | 52956/75000 [00:12<00:04, 4517.90ex/s]

 71%|███████   | 53414/75000 [00:12<00:04, 4357.28ex/s]

 72%|███████▏  | 53949/75000 [00:12<00:04, 4635.51ex/s]

 73%|███████▎  | 54427/75000 [00:12<00:04, 4675.52ex/s]

 73%|███████▎  | 54950/75000 [00:13<00:04, 4834.83ex/s]

 74%|███████▍  | 55437/75000 [00:13<00:04, 4649.66ex/s]

 75%|███████▍  | 55941/75000 [00:13<00:04, 4760.58ex/s]

 75%|███████▌  | 56420/75000 [00:13<00:03, 4659.24ex/s]

 76%|███████▌  | 56906/75000 [00:13<00:03, 4714.59ex/s]

 77%|███████▋  | 57380/75000 [00:13<00:04, 3798.34ex/s]

 77%|███████▋  | 57947/75000 [00:13<00:03, 4268.21ex/s]

 78%|███████▊  | 58447/75000 [00:13<00:03, 4460.65ex/s]

 79%|███████▊  | 59000/75000 [00:13<00:03, 4612.58ex/s]

 79%|███████▉  | 59519/75000 [00:14<00:03, 4771.29ex/s]

 80%|████████  | 60010/75000 [00:14<00:03, 4182.26ex/s]

 81%|████████  | 60450/75000 [00:14<00:03, 4090.81ex/s]

 81%|████████  | 60874/75000 [00:14<00:03, 4056.17ex/s]

 82%|████████▏ | 61290/75000 [00:14<00:03, 3799.08ex/s]

 82%|████████▏ | 61703/75000 [00:14<00:03, 3884.55ex/s]

 83%|████████▎ | 62099/75000 [00:14<00:03, 3666.56ex/s]

 83%|████████▎ | 62589/75000 [00:14<00:03, 3996.45ex/s]

 84%|████████▍ | 63000/75000 [00:14<00:03, 3917.31ex/s]

 85%|████████▍ | 63398/75000 [00:15<00:02, 3889.25ex/s]

 85%|████████▌ | 63851/75000 [00:15<00:02, 4053.43ex/s]

 86%|████████▌ | 64260/75000 [00:15<00:03, 3322.81ex/s]

 86%|████████▋ | 64753/75000 [00:15<00:02, 3724.32ex/s]

 87%|████████▋ | 65193/75000 [00:15<00:02, 3900.93ex/s]

 88%|████████▊ | 65724/75000 [00:15<00:02, 4285.72ex/s]

 88%|████████▊ | 66198/75000 [00:15<00:01, 4411.45ex/s]

 89%|████████▉ | 66706/75000 [00:15<00:01, 4602.04ex/s]

 90%|████████▉ | 67185/75000 [00:15<00:01, 4654.16ex/s]

 90%|█████████ | 67755/75000 [00:16<00:01, 4958.49ex/s]

 91%|█████████ | 68257/75000 [00:16<00:01, 4839.80ex/s]

 92%|█████████▏| 68756/75000 [00:16<00:01, 4880.65ex/s]

 92%|█████████▏| 69248/75000 [00:16<00:01, 4813.04ex/s]

 93%|█████████▎| 69777/75000 [00:16<00:01, 4950.33ex/s]

 94%|█████████▎| 70275/75000 [00:16<00:00, 4829.13ex/s]

 94%|█████████▍| 70760/75000 [00:16<00:00, 4758.30ex/s]

 95%|█████████▍| 71238/75000 [00:16<00:00, 4477.30ex/s]

 96%|█████████▌| 71695/75000 [00:16<00:00, 4502.61ex/s]

 96%|█████████▌| 72149/75000 [00:17<00:00, 4216.68ex/s]

 97%|█████████▋| 72617/75000 [00:17<00:00, 4342.91ex/s]

 97%|█████████▋| 73056/75000 [00:17<00:00, 3968.56ex/s]

 98%|█████████▊| 73461/75000 [00:17<00:00, 3908.20ex/s]

 99%|█████████▊| 73902/75000 [00:17<00:00, 4044.68ex/s]

 99%|█████████▉| 74312/75000 [00:17<00:00, 3933.04ex/s]

100%|█████████▉| 74839/75000 [00:17<00:00, 4306.52ex/s]

100%|██████████| 75000/75000 [00:17<00:00, 4233.16ex/s]




  0%|          | 0/69825 [00:00<?, ?ex/s]

  1%|          | 497/69825 [00:00<00:13, 4967.26ex/s]

  1%|▏         | 994/69825 [00:00<00:15, 4586.59ex/s]

  2%|▏         | 1455/69825 [00:00<00:16, 4256.75ex/s]

  3%|▎         | 1893/69825 [00:00<00:15, 4298.94ex/s]

  3%|▎         | 2325/69825 [00:00<00:16, 4194.66ex/s]

  4%|▍         | 2771/69825 [00:00<00:15, 4278.24ex/s]

  5%|▍         | 3201/69825 [00:00<00:16, 4111.49ex/s]

  5%|▌         | 3660/69825 [00:00<00:15, 4255.82ex/s]

  6%|▌         | 4088/69825 [00:00<00:16, 4048.06ex/s]

  7%|▋         | 4552/69825 [00:01<00:15, 4217.75ex/s]

  7%|▋         | 4987/69825 [00:01<00:15, 4256.31ex/s]

  8%|▊         | 5415/69825 [00:01<00:16, 4008.37ex/s]

  8%|▊         | 5864/69825 [00:01<00:15, 4142.95ex/s]

  9%|▉         | 6282/69825 [00:01<00:15, 4113.87ex/s]

 10%|▉         | 6738/69825 [00:01<00:14, 4242.50ex/s]

 10%|█         | 7165/69825 [00:01<00:15, 4152.61ex/s]

 11%|█         | 7612/69825 [00:01<00:14, 4243.58ex/s]

 12%|█▏        | 8038/69825 [00:01<00:15, 4074.84ex/s]

 12%|█▏        | 8503/69825 [00:02<00:14, 4237.70ex/s]

 13%|█▎        | 9000/69825 [00:02<00:14, 4328.18ex/s]

 14%|█▎        | 9537/69825 [00:02<00:13, 4626.82ex/s]

 14%|█▍        | 10002/69825 [00:02<00:13, 4486.87ex/s]

 15%|█▍        | 10453/69825 [00:02<00:13, 4445.76ex/s]

 16%|█▌        | 10900/69825 [00:02<00:13, 4306.08ex/s]

 16%|█▌        | 11333/69825 [00:02<00:14, 3972.70ex/s]

 17%|█▋        | 11749/69825 [00:02<00:14, 4022.02ex/s]

 17%|█▋        | 12156/69825 [00:02<00:14, 3939.86ex/s]

 18%|█▊        | 12594/69825 [00:02<00:14, 4062.20ex/s]

 19%|█▊        | 13003/69825 [00:03<00:14, 3814.57ex/s]

 19%|█▉        | 13429/69825 [00:03<00:14, 3936.53ex/s]

 20%|█▉        | 13831/69825 [00:03<00:14, 3958.02ex/s]

 20%|██        | 14230/69825 [00:03<00:14, 3771.91ex/s]

 21%|██        | 14620/69825 [00:03<00:14, 3807.19ex/s]

 21%|██▏       | 15004/69825 [00:03<00:14, 3739.05ex/s]

 22%|██▏       | 15466/69825 [00:03<00:13, 3989.89ex/s]

 23%|██▎       | 15933/69825 [00:03<00:12, 4184.97ex/s]

 23%|██▎       | 16354/69825 [00:03<00:13, 4054.19ex/s]

 24%|██▍       | 16789/69825 [00:04<00:12, 4138.40ex/s]

 25%|██▍       | 17205/69825 [00:04<00:12, 4095.80ex/s]

 25%|██▌       | 17710/69825 [00:04<00:11, 4372.89ex/s]

 26%|██▌       | 18150/69825 [00:04<00:12, 4220.35ex/s]

 27%|██▋       | 18600/69825 [00:04<00:11, 4298.32ex/s]

 27%|██▋       | 19032/69825 [00:04<00:12, 4058.68ex/s]

 28%|██▊       | 19473/69825 [00:04<00:12, 4156.67ex/s]

 29%|██▊       | 19979/69825 [00:04<00:11, 4412.52ex/s]

 29%|██▉       | 20424/69825 [00:04<00:11, 4145.62ex/s]

 30%|██▉       | 20882/69825 [00:05<00:11, 4265.94ex/s]

 31%|███       | 21314/69825 [00:05<00:11, 4055.26ex/s]

 31%|███▏      | 21845/69825 [00:05<00:10, 4404.28ex/s]

 32%|███▏      | 22292/69825 [00:05<00:11, 4275.26ex/s]

 33%|███▎      | 22724/69825 [00:05<00:16, 2885.17ex/s]

 33%|███▎      | 23151/69825 [00:05<00:14, 3180.18ex/s]

 34%|███▍      | 23626/69825 [00:05<00:13, 3546.60ex/s]

 34%|███▍      | 24030/69825 [00:05<00:13, 3438.65ex/s]

 35%|███▍      | 24408/69825 [00:06<00:13, 3256.09ex/s]

 36%|███▌      | 24907/69825 [00:06<00:12, 3687.06ex/s]

 36%|███▌      | 25302/69825 [00:06<00:12, 3670.23ex/s]

 37%|███▋      | 25747/69825 [00:06<00:11, 3878.68ex/s]

 37%|███▋      | 26176/69825 [00:06<00:10, 3991.03ex/s]

 38%|███▊      | 26647/69825 [00:06<00:10, 4191.53ex/s]

 39%|███▉      | 27076/69825 [00:06<00:10, 4099.79ex/s]

 39%|███▉      | 27518/69825 [00:06<00:10, 4190.17ex/s]

 40%|████      | 27969/69825 [00:06<00:09, 4282.68ex/s]

 41%|████      | 28402/69825 [00:07<00:10, 4092.01ex/s]

 41%|████▏     | 28824/69825 [00:07<00:09, 4127.29ex/s]

 42%|████▏     | 29240/69825 [00:07<00:10, 4009.44ex/s]

 42%|████▏     | 29673/69825 [00:07<00:09, 4100.31ex/s]

 43%|████▎     | 30086/69825 [00:07<00:10, 3926.10ex/s]

 44%|████▍     | 30565/69825 [00:07<00:09, 4170.93ex/s]

 44%|████▍     | 31000/69825 [00:07<00:09, 4208.64ex/s]

 45%|████▌     | 31482/69825 [00:07<00:08, 4385.23ex/s]

 46%|████▌     | 31952/69825 [00:07<00:08, 4476.44ex/s]

 46%|████▋     | 32402/69825 [00:07<00:08, 4423.33ex/s]

 47%|████▋     | 32885/69825 [00:08<00:08, 4542.47ex/s]

 48%|████▊     | 33341/69825 [00:08<00:09, 3966.14ex/s]

 48%|████▊     | 33812/69825 [00:08<00:08, 4165.74ex/s]

 49%|████▉     | 34241/69825 [00:08<00:08, 4076.73ex/s]

 50%|████▉     | 34711/69825 [00:08<00:08, 4248.02ex/s]

 50%|█████     | 35143/69825 [00:08<00:08, 4185.11ex/s]

 51%|█████     | 35647/69825 [00:08<00:07, 4427.34ex/s]

 52%|█████▏    | 36095/69825 [00:08<00:07, 4256.40ex/s]

 52%|█████▏    | 36581/69825 [00:08<00:07, 4427.27ex/s]

 53%|█████▎    | 37028/69825 [00:09<00:07, 4223.64ex/s]

 54%|█████▎    | 37498/69825 [00:09<00:07, 4357.38ex/s]

 54%|█████▍    | 37986/69825 [00:09<00:07, 4505.53ex/s]

 55%|█████▌    | 38440/69825 [00:09<00:06, 4505.41ex/s]

 56%|█████▌    | 38990/69825 [00:09<00:06, 4796.40ex/s]

 57%|█████▋    | 39472/69825 [00:09<00:06, 4741.88ex/s]

 57%|█████▋    | 39948/69825 [00:09<00:06, 4734.33ex/s]

 58%|█████▊    | 40423/69825 [00:09<00:06, 4591.93ex/s]

 59%|█████▊    | 40895/69825 [00:09<00:06, 4628.53ex/s]

 59%|█████▉    | 41360/69825 [00:09<00:06, 4302.45ex/s]

 60%|█████▉    | 41796/69825 [00:10<00:07, 3625.89ex/s]

 60%|██████    | 42188/69825 [00:10<00:07, 3698.34ex/s]

 61%|██████    | 42714/69825 [00:10<00:06, 4110.25ex/s]

 62%|██████▏   | 43148/69825 [00:10<00:06, 4172.71ex/s]

 63%|██████▎   | 43666/69825 [00:10<00:05, 4455.85ex/s]

 63%|██████▎   | 44122/69825 [00:10<00:05, 4339.91ex/s]

 64%|██████▍   | 44618/69825 [00:10<00:05, 4513.10ex/s]

 65%|██████▍   | 45076/69825 [00:10<00:05, 4305.84ex/s]

 65%|██████▌   | 45615/69825 [00:10<00:05, 4608.62ex/s]

 66%|██████▌   | 46082/69825 [00:11<00:05, 4433.49ex/s]

 67%|██████▋   | 46556/69825 [00:11<00:05, 4519.25ex/s]

 67%|██████▋   | 47013/69825 [00:11<00:05, 4306.46ex/s]

 68%|██████▊   | 47537/69825 [00:11<00:04, 4567.74ex/s]

 69%|██████▊   | 48000/69825 [00:11<00:04, 4433.10ex/s]

 69%|██████▉   | 48491/69825 [00:11<00:04, 4567.81ex/s]

 70%|███████   | 49000/69825 [00:11<00:04, 4645.10ex/s]

 71%|███████   | 49488/69825 [00:11<00:04, 4711.77ex/s]

 72%|███████▏  | 49992/69825 [00:11<00:04, 4805.50ex/s]

 72%|███████▏  | 50475/69825 [00:12<00:04, 4643.09ex/s]

 73%|███████▎  | 50995/69825 [00:12<00:03, 4801.43ex/s]

 74%|███████▎  | 51478/69825 [00:12<00:04, 4206.20ex/s]

 74%|███████▍  | 52000/69825 [00:12<00:04, 4354.90ex/s]

 75%|███████▌  | 52486/69825 [00:12<00:03, 4489.72ex/s]

 76%|███████▌  | 52944/69825 [00:12<00:03, 4475.31ex/s]

 76%|███████▋  | 53398/69825 [00:12<00:03, 4208.19ex/s]

 77%|███████▋  | 53882/69825 [00:12<00:03, 4381.56ex/s]

 78%|███████▊  | 54327/69825 [00:12<00:03, 4350.49ex/s]

 79%|███████▊  | 54864/69825 [00:13<00:03, 4639.43ex/s]

 79%|███████▉  | 55342/69825 [00:13<00:03, 4679.92ex/s]

 80%|████████  | 55883/69825 [00:13<00:02, 4891.49ex/s]

 81%|████████  | 56376/69825 [00:13<00:02, 4771.02ex/s]

 81%|████████▏ | 56900/69825 [00:13<00:02, 4906.15ex/s]

 82%|████████▏ | 57393/69825 [00:13<00:02, 4663.60ex/s]

 83%|████████▎ | 57866/69825 [00:13<00:02, 4680.77ex/s]

 84%|████████▎ | 58337/69825 [00:13<00:02, 4561.28ex/s]

 84%|████████▍ | 58796/69825 [00:13<00:02, 4357.48ex/s]

 85%|████████▍ | 59235/69825 [00:14<00:02, 4128.66ex/s]

 85%|████████▌ | 59673/69825 [00:14<00:02, 4196.76ex/s]

 86%|████████▌ | 60096/69825 [00:14<00:02, 4093.40ex/s]

 87%|████████▋ | 60508/69825 [00:14<00:02, 3666.27ex/s]

 87%|████████▋ | 60884/69825 [00:14<00:02, 3678.40ex/s]

 88%|████████▊ | 61311/69825 [00:14<00:02, 3838.66ex/s]

 89%|████████▊ | 61825/69825 [00:14<00:01, 4203.07ex/s]

 89%|████████▉ | 62252/69825 [00:14<00:01, 4132.50ex/s]

 90%|████████▉ | 62729/69825 [00:14<00:01, 4314.61ex/s]

 90%|█████████ | 63165/69825 [00:14<00:01, 4184.18ex/s]

 91%|█████████ | 63611/69825 [00:15<00:01, 4261.77ex/s]

 92%|█████████▏| 64058/69825 [00:15<00:01, 4318.61ex/s]

 92%|█████████▏| 64510/69825 [00:15<00:01, 4371.02ex/s]

 93%|█████████▎| 65000/69825 [00:15<00:01, 4318.59ex/s]

 94%|█████████▍| 65504/69825 [00:15<00:00, 4524.59ex/s]

 94%|█████████▍| 65962/69825 [00:15<00:00, 4540.14ex/s]

 95%|█████████▌| 66418/69825 [00:15<00:00, 4252.12ex/s]

 96%|█████████▌| 66848/69825 [00:15<00:00, 4261.66ex/s]

 96%|█████████▋| 67278/69825 [00:15<00:00, 4121.43ex/s]

 97%|█████████▋| 67765/69825 [00:16<00:00, 4333.22ex/s]

 98%|█████████▊| 68202/69825 [00:16<00:00, 4131.48ex/s]

 98%|█████████▊| 68721/69825 [00:16<00:00, 4426.81ex/s]

 99%|█████████▉| 69169/69825 [00:16<00:00, 4379.01ex/s]

100%|█████████▉| 69610/69825 [00:16<00:00, 4374.37ex/s]

100%|██████████| 69825/69825 [00:16<00:00, 4226.93ex/s]




In [16]:
len(train_dataset), len(eval_dataset)

(75000, 69825)

In [17]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] 그리고 인스타에서도 완전 인기몰이중 ~ ~ ~ [SEP] 본품 # 디자인 [SEP] 1


[CLS] 쫀쫀함이 느껴지는 점성있는 제형이라 피부에 닿았을 때 너무 부드럽고 흡수 된 후에는 끈적이지 않고 산뜻하네용 ㅎㅎ [SEP] 본품 # 품질 [SEP] 0


# Load Trainer

In [18]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [19]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [20]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [21]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 75000


  Num Epochs = 20


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 128


  Gradient Accumulation steps = 1


  Total optimization steps = 11720


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.0876,0.083426,0.970097,0.648603,0.984384,0.816494,0.970097
2,0.0665,0.072953,0.973434,0.656163,0.986183,0.821173,0.973434
3,0.0485,0.084682,0.973119,0.673508,0.985982,0.829745,0.973119
4,0.0426,0.090242,0.972603,0.679618,0.98569,0.832654,0.972603
5,0.0282,0.095632,0.974021,0.678938,0.986463,0.8327,0.974021
6,0.0223,0.111986,0.973734,0.690412,0.986285,0.838349,0.973734
7,0.0133,0.139149,0.972904,0.685714,0.985842,0.835778,0.972904
8,0.0125,0.148762,0.974021,0.677568,0.986465,0.832017,0.974021
9,0.0084,0.172338,0.971572,0.665205,0.985156,0.82518,0.971572
10,0.0087,0.174359,0.972445,0.677614,0.985608,0.831611,0.972445


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-586


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-586/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-586/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-586/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-586/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1172


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1172/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1172/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1172/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1172/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1758


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1758/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1758/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1758/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1758/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-586] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2344


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2344/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2344/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2344/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2344/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1172] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2930


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2930/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2930/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2930/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2930/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-1758] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-3516


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-3516/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-3516/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-3516/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-3516/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2344] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4102


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4102/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4102/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4102/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4102/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-2930] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4688


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4688/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4688/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4688/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4688/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4102] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5274


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5274/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5274/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5274/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5274/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-4688] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5860


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5860/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5860/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5860/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5860/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5274] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-6446


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-6446/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-6446/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-6446/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-6446/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-5860] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7032


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7032/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7032/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7032/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7032/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-6446] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7618


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7618/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7618/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7618/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7618/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-3516] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8204


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8204/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8204/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8204/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8204/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7032] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8790


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8790/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8790/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8790/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8790/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8204] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9376


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9376/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9376/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9376/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9376/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-8790] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9962


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9962/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9962/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9962/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9962/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-7618] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-10548


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-10548/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-10548/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-10548/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-10548/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9376] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11134


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11134/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11134/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11134/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11134/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-10548] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11720


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11720/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11720/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11720/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11720/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-9962] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from monologg_koelectra_base_v3_discriminator_uncleaned_v13/checkpoint-11720 (score: 0.8393530379596958).


Saving model checkpoint to /tmp/tmpn4q2ccvb


Configuration saved in /tmp/tmpn4q2ccvb/config.json


Model weights saved in /tmp/tmpn4q2ccvb/pytorch_model.bin


tokenizer config file saved in /tmp/tmpn4q2ccvb/tokenizer_config.json


Special tokens file saved in /tmp/tmpn4q2ccvb/special_tokens_map.json


0,1
eval/accuracy,▁▆▅▅▇▆▅▇▃▄▅▅▆▆▇█▇███
eval/f1_false,▁▆▅▅▇▆▅▇▃▄▄▅▆▆▇█▇███
eval/f1_macro,▁▂▅▆▆█▇▆▄▆▆▇█▇█▇████
eval/f1_micro,▁▆▅▅▇▆▅▇▃▄▅▅▆▆▇█▇███
eval/f1_true,▁▂▅▆▆█▇▆▄▆▆▇█▇█▇████
eval/loss,▁▁▂▂▂▃▄▄▆▆▅▆▆█▇██▇██
eval/runtime,█▁▁▁▁▁▁▁▂▂▁▁▁▁▁▁▁▂▁▁
eval/samples_per_second,▁█▇█████▇▇███████▇█▇
eval/steps_per_second,▁█▇█████▇▇███████▇█▇
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/accuracy,0.97499
eval/f1_false,0.98697
eval/f1_macro,0.83935
eval/f1_micro,0.97499
eval/f1_true,0.69174
eval/loss,0.22686
eval/runtime,227.5388
eval/samples_per_second,306.871
eval/steps_per_second,2.4
train/epoch,20.0


In [22]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
