# Description


# Modules and Global Variables

In [1]:
from transformers import (
    ElectraTokenizerFast, ElectraForSequenceClassification, 
    DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

import torch
import wandb

import datasets
import evaluate

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v13'

DATA_V = 'uncleaned_v13'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'snunlp/KR-ELECTRA-discriminator'

notebook_name = 'trainer_for_acd_binary.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/snunlp_kr_electra_discriminator_uncleaned_v13/acd exists.
./trainer_for_acd_binary.ipynb exists.
./dataset/uncleaned_v13/ce_train.csv exists.
./dataset/uncleaned_v13/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 20
batch_size = 32
gradient_accumulation_steps = 1

optim = 'adamw_hf' # 'adamw_torch'

learning_rate = 3e-6 / 8 * batch_size * NGPU # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'linear'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='f1_macro'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 100

print(learning_rate)

4.8e-05


# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./trainer_for_acd_binary.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = ElectraTokenizerFast.from_pretrained(model_checkpoint)
model = ElectraForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at snunlp/KR-ELECTRA-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = f'./dataset/{DATA_V}/raw_train.csv'
dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
test_path = f'./dataset/{DATA_V}/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

print(len(tokenizer))
tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form]).to_frame().drop_duplicates()
tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
new_tokenizer = ElectraTokenizerFast.from_pretrained(model_checkpoint)
new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
new_tokens = set(list(new_tokenizer.vocab.keys())) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(new_tokenizer))
print(len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

30000





3060
30076


Embedding(30076, 768)

In [10]:
print(len(new_tokens))
print(new_tokens)

76
{'🤡', '##ɢ', 'ᴠ', 'ɪ', '🚗', '⏰', '🍷', '🕺', '##ᵕ', '##ᴜ', '##🤡', '##🕷', '##쨕', 'ɴ', '❔', '읒', 'ʜ', '쨕', '##읒', '##🥤', '##ʀ', 'ᴜ', '##ᴠ', '##ᴛ', '##💆', 'ᴘ', '➕', 'ᴡ', '##쫜', '🐄', '##👠', 'ᵕ', '##㉦', '🕷', '💆', '💡', '##💄', '##ꈍ', '##ˇ', 'ɢ', '##◍', 'ᴛ', '##➕', '💇', 'ғ', '🍼', '##🕸', '##🚗', '쫜', '뿤', '💬', '🥤', 'ʀ', '쓩', '##ɴ', '##ᴘ', '◍', '##ɪ', '챳', '😯', 'ꈍ', 'ᴍ', '💄', '👠', '😺', '##뜌', '##ᴡ', '##💇', '##❔', '##죱', '🕸', '뜌', 'ˇ', '㉦', '죱', '##ᴍ'}


In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

# Define Metric

In [12]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [13]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [14]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [15]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/75000 [00:00<?, ?ex/s]

  1%|          | 444/75000 [00:00<00:16, 4433.95ex/s]

  1%|          | 898/75000 [00:00<00:16, 4491.03ex/s]

  2%|▏         | 1348/75000 [00:00<00:18, 4050.66ex/s]

  2%|▏         | 1781/75000 [00:00<00:17, 4153.39ex/s]

  3%|▎         | 2200/75000 [00:00<00:18, 3948.82ex/s]

  4%|▎         | 2650/75000 [00:00<00:17, 4124.08ex/s]

  4%|▍         | 3066/75000 [00:00<00:18, 3907.18ex/s]

  5%|▍         | 3533/75000 [00:00<00:17, 4131.51ex/s]

  5%|▌         | 3999/75000 [00:00<00:16, 4287.54ex/s]

  6%|▌         | 4432/75000 [00:01<00:17, 4070.81ex/s]

  7%|▋         | 4890/75000 [00:01<00:16, 4215.92ex/s]

  7%|▋         | 5326/75000 [00:01<00:16, 4255.87ex/s]

  8%|▊         | 5864/75000 [00:01<00:15, 4583.31ex/s]

  8%|▊         | 6326/75000 [00:01<00:15, 4482.05ex/s]

  9%|▉         | 6798/75000 [00:01<00:14, 4550.02ex/s]

 10%|▉         | 7255/75000 [00:01<00:15, 4298.02ex/s]

 10%|█         | 7739/75000 [00:01<00:15, 4450.72ex/s]

 11%|█         | 8188/75000 [00:01<00:15, 4444.97ex/s]

 12%|█▏        | 8696/75000 [00:02<00:14, 4629.02ex/s]

 12%|█▏        | 9162/75000 [00:02<00:14, 4477.57ex/s]

 13%|█▎        | 9614/75000 [00:02<00:14, 4487.75ex/s]

 13%|█▎        | 10065/75000 [00:02<00:14, 4330.57ex/s]

 14%|█▍        | 10598/75000 [00:02<00:13, 4615.06ex/s]

 15%|█▍        | 11063/75000 [00:02<00:13, 4620.25ex/s]

 15%|█▌        | 11553/75000 [00:02<00:13, 4699.61ex/s]

 16%|█▌        | 12025/75000 [00:02<00:13, 4554.82ex/s]

 17%|█▋        | 12483/75000 [00:02<00:13, 4488.70ex/s]

 17%|█▋        | 12934/75000 [00:02<00:13, 4468.16ex/s]

 18%|█▊        | 13382/75000 [00:03<00:14, 4198.67ex/s]

 18%|█▊        | 13831/75000 [00:03<00:14, 4279.55ex/s]

 19%|█▉        | 14262/75000 [00:03<00:14, 4168.01ex/s]

 20%|█▉        | 14764/75000 [00:03<00:13, 4409.49ex/s]

 20%|██        | 15208/75000 [00:03<00:14, 4175.50ex/s]

 21%|██        | 15630/75000 [00:03<00:14, 4161.91ex/s]

 21%|██▏       | 16049/75000 [00:03<00:14, 4073.76ex/s]

 22%|██▏       | 16470/75000 [00:03<00:14, 4111.90ex/s]

 23%|██▎       | 16922/75000 [00:03<00:13, 4227.89ex/s]

 23%|██▎       | 17351/75000 [00:04<00:13, 4245.48ex/s]

 24%|██▎       | 17802/75000 [00:04<00:13, 4320.67ex/s]

 24%|██▍       | 18235/75000 [00:04<00:14, 3991.53ex/s]

 25%|██▍       | 18660/75000 [00:04<00:13, 4062.95ex/s]

 25%|██▌       | 19071/75000 [00:04<00:13, 4066.96ex/s]

 26%|██▌       | 19520/75000 [00:04<00:13, 4187.00ex/s]

 27%|██▋       | 20000/75000 [00:04<00:12, 4259.93ex/s]

 27%|██▋       | 20493/75000 [00:04<00:12, 4452.90ex/s]

 28%|██▊       | 20975/75000 [00:04<00:11, 4558.84ex/s]

 29%|██▊       | 21433/75000 [00:04<00:12, 4386.98ex/s]

 29%|██▉       | 21874/75000 [00:05<00:12, 4235.90ex/s]

 30%|██▉       | 22300/75000 [00:05<00:13, 3910.56ex/s]

 30%|███       | 22740/75000 [00:05<00:12, 4042.99ex/s]

 31%|███       | 23150/75000 [00:05<00:12, 4025.12ex/s]

 32%|███▏      | 23661/75000 [00:05<00:11, 4331.87ex/s]

 32%|███▏      | 24099/75000 [00:05<00:12, 4188.58ex/s]

 33%|███▎      | 24575/75000 [00:05<00:11, 4348.54ex/s]

 33%|███▎      | 25014/75000 [00:05<00:12, 4131.77ex/s]

 34%|███▍      | 25482/75000 [00:05<00:11, 4283.77ex/s]

 35%|███▍      | 25976/75000 [00:06<00:10, 4470.25ex/s]

 35%|███▌      | 26427/75000 [00:06<00:11, 4210.87ex/s]

 36%|███▌      | 26904/75000 [00:06<00:11, 4364.67ex/s]

 36%|███▋      | 27346/75000 [00:06<00:11, 4293.21ex/s]

 37%|███▋      | 27782/75000 [00:06<00:10, 4311.50ex/s]

 38%|███▊      | 28216/75000 [00:06<00:11, 4183.79ex/s]

 38%|███▊      | 28637/75000 [00:06<00:11, 4009.08ex/s]

 39%|███▉      | 29103/75000 [00:06<00:10, 4190.72ex/s]

 39%|███▉      | 29601/75000 [00:06<00:10, 4413.96ex/s]

 40%|████      | 30046/75000 [00:07<00:10, 4382.93ex/s]

 41%|████      | 30496/75000 [00:07<00:10, 4412.09ex/s]

 41%|████▏     | 30939/75000 [00:07<00:10, 4031.03ex/s]

 42%|████▏     | 31350/75000 [00:07<00:16, 2641.97ex/s]

 42%|████▏     | 31861/75000 [00:07<00:13, 3146.94ex/s]

 43%|████▎     | 32245/75000 [00:07<00:13, 3246.13ex/s]

 44%|████▎     | 32663/75000 [00:07<00:12, 3470.43ex/s]

 44%|████▍     | 33051/75000 [00:07<00:12, 3446.16ex/s]

 45%|████▍     | 33425/75000 [00:08<00:11, 3504.57ex/s]

 45%|████▌     | 33907/75000 [00:08<00:10, 3859.69ex/s]

 46%|████▌     | 34342/75000 [00:08<00:10, 3995.88ex/s]

 46%|████▋     | 34865/75000 [00:08<00:09, 4347.01ex/s]

 47%|████▋     | 35312/75000 [00:08<00:09, 4174.77ex/s]

 48%|████▊     | 35752/75000 [00:08<00:09, 4236.94ex/s]

 48%|████▊     | 36183/75000 [00:08<00:09, 4104.40ex/s]

 49%|████▉     | 36611/75000 [00:08<00:09, 4152.84ex/s]

 49%|████▉     | 37031/75000 [00:08<00:09, 4110.41ex/s]

 50%|█████     | 37553/75000 [00:08<00:08, 4429.85ex/s]

 51%|█████     | 38000/75000 [00:09<00:08, 4415.88ex/s]

 51%|█████▏    | 38492/75000 [00:09<00:08, 4563.13ex/s]

 52%|█████▏    | 38951/75000 [00:09<00:08, 4404.38ex/s]

 53%|█████▎    | 39394/75000 [00:09<00:08, 4096.03ex/s]

 53%|█████▎    | 39852/75000 [00:09<00:08, 4228.62ex/s]

 54%|█████▎    | 40280/75000 [00:09<00:08, 4164.55ex/s]

 54%|█████▍    | 40763/75000 [00:09<00:07, 4352.19ex/s]

 55%|█████▍    | 41239/75000 [00:09<00:07, 4467.63ex/s]

 56%|█████▌    | 41735/75000 [00:09<00:07, 4609.57ex/s]

 56%|█████▋    | 42199/75000 [00:10<00:07, 4491.24ex/s]

 57%|█████▋    | 42671/75000 [00:10<00:07, 4556.02ex/s]

 58%|█████▊    | 43129/75000 [00:10<00:07, 4412.61ex/s]

 58%|█████▊    | 43627/75000 [00:10<00:06, 4574.95ex/s]

 59%|█████▉    | 44087/75000 [00:10<00:06, 4498.82ex/s]

 59%|█████▉    | 44563/75000 [00:10<00:06, 4571.62ex/s]

 60%|██████    | 45022/75000 [00:10<00:07, 4188.00ex/s]

 61%|██████    | 45510/75000 [00:10<00:06, 4378.42ex/s]

 61%|██████▏   | 46000/75000 [00:10<00:06, 4397.17ex/s]

 62%|██████▏   | 46501/75000 [00:11<00:06, 4569.35ex/s]

 63%|██████▎   | 47000/75000 [00:11<00:06, 4509.16ex/s]

 63%|██████▎   | 47455/75000 [00:11<00:06, 4439.70ex/s]

 64%|██████▍   | 47944/75000 [00:11<00:05, 4565.47ex/s]

 65%|██████▍   | 48403/75000 [00:11<00:06, 4411.45ex/s]

 65%|██████▌   | 48882/75000 [00:11<00:05, 4516.19ex/s]

 66%|██████▌   | 49336/75000 [00:11<00:05, 4398.93ex/s]

 66%|██████▋   | 49845/75000 [00:11<00:05, 4595.05ex/s]

 67%|██████▋   | 50307/75000 [00:11<00:05, 4597.10ex/s]

 68%|██████▊   | 50818/75000 [00:11<00:05, 4745.79ex/s]

 68%|██████▊   | 51294/75000 [00:12<00:05, 4679.46ex/s]

 69%|██████▉   | 51784/75000 [00:12<00:04, 4742.39ex/s]

 70%|██████▉   | 52260/75000 [00:12<00:05, 4347.85ex/s]

 70%|███████   | 52712/75000 [00:12<00:05, 4395.55ex/s]

 71%|███████   | 53161/75000 [00:12<00:04, 4419.51ex/s]

 72%|███████▏  | 53678/75000 [00:12<00:04, 4634.54ex/s]

 72%|███████▏  | 54145/75000 [00:12<00:04, 4459.94ex/s]

 73%|███████▎  | 54655/75000 [00:12<00:04, 4641.74ex/s]

 73%|███████▎  | 55123/75000 [00:12<00:04, 4416.02ex/s]

 74%|███████▍  | 55603/75000 [00:13<00:04, 4520.93ex/s]

 75%|███████▍  | 56059/75000 [00:13<00:04, 4471.52ex/s]

 75%|███████▌  | 56570/75000 [00:13<00:03, 4654.52ex/s]

 76%|███████▌  | 57038/75000 [00:13<00:04, 4282.14ex/s]

 77%|███████▋  | 57558/75000 [00:13<00:03, 4535.99ex/s]

 77%|███████▋  | 58019/75000 [00:13<00:03, 4452.58ex/s]

 78%|███████▊  | 58513/75000 [00:13<00:03, 4589.04ex/s]

 79%|███████▊  | 59000/75000 [00:13<00:03, 4601.43ex/s]

 79%|███████▉  | 59498/75000 [00:13<00:03, 4710.24ex/s]

 80%|███████▉  | 59972/75000 [00:13<00:03, 4592.39ex/s]

 81%|████████  | 60434/75000 [00:14<00:03, 4334.16ex/s]

 81%|████████  | 60872/75000 [00:14<00:03, 4244.95ex/s]

 82%|████████▏ | 61300/75000 [00:14<00:03, 3931.68ex/s]

 82%|████████▏ | 61741/75000 [00:14<00:03, 4060.51ex/s]

 83%|████████▎ | 62153/75000 [00:14<00:03, 3968.88ex/s]

 84%|████████▎ | 62647/75000 [00:14<00:02, 4238.37ex/s]

 84%|████████▍ | 63076/75000 [00:14<00:03, 3936.87ex/s]

 85%|████████▍ | 63477/75000 [00:14<00:02, 3919.87ex/s]

 85%|████████▌ | 63910/75000 [00:14<00:02, 4033.61ex/s]

 86%|████████▌ | 64318/75000 [00:15<00:02, 3747.34ex/s]

 86%|████████▋ | 64807/75000 [00:15<00:02, 4058.80ex/s]

 87%|████████▋ | 65245/75000 [00:15<00:02, 4147.02ex/s]

 88%|████████▊ | 65751/75000 [00:15<00:02, 4407.26ex/s]

 88%|████████▊ | 66197/75000 [00:15<00:02, 4395.12ex/s]

 89%|████████▉ | 66680/75000 [00:15<00:01, 4512.49ex/s]

 90%|████████▉ | 67134/75000 [00:15<00:01, 4453.28ex/s]

 90%|█████████ | 67687/75000 [00:15<00:01, 4766.14ex/s]

 91%|█████████ | 68166/75000 [00:15<00:01, 4694.05ex/s]

 92%|█████████▏| 68658/75000 [00:16<00:01, 4759.67ex/s]

 92%|█████████▏| 69136/75000 [00:16<00:01, 4610.40ex/s]

 93%|█████████▎| 69651/75000 [00:16<00:01, 4765.22ex/s]

 94%|█████████▎| 70130/75000 [00:16<00:01, 4558.98ex/s]

 94%|█████████▍| 70589/75000 [00:16<00:00, 4540.75ex/s]

 95%|█████████▍| 71045/75000 [00:16<00:00, 4409.36ex/s]

 95%|█████████▌| 71523/75000 [00:16<00:00, 4513.45ex/s]

 96%|█████████▌| 71977/75000 [00:16<00:00, 4520.30ex/s]

 97%|█████████▋| 72431/75000 [00:16<00:00, 4192.73ex/s]

 97%|█████████▋| 72856/75000 [00:17<00:00, 4065.65ex/s]

 98%|█████████▊| 73267/75000 [00:17<00:00, 3847.61ex/s]

 98%|█████████▊| 73675/75000 [00:17<00:00, 3909.20ex/s]

 99%|█████████▉| 74123/75000 [00:17<00:00, 4068.28ex/s]

100%|█████████▉| 74660/75000 [00:17<00:00, 4439.08ex/s]

100%|██████████| 75000/75000 [00:17<00:00, 4282.03ex/s]




  0%|          | 0/69825 [00:00<?, ?ex/s]

  0%|          | 346/69825 [00:00<00:20, 3456.71ex/s]

  1%|          | 692/69825 [00:00<00:26, 2641.41ex/s]

  2%|▏         | 1095/69825 [00:00<00:21, 3182.75ex/s]

  2%|▏         | 1628/69825 [00:00<00:17, 3950.05ex/s]

  3%|▎         | 2088/69825 [00:00<00:16, 4168.53ex/s]

  4%|▎         | 2613/69825 [00:00<00:14, 4520.14ex/s]

  4%|▍         | 3075/69825 [00:00<00:15, 4309.92ex/s]

  5%|▌         | 3515/69825 [00:00<00:16, 4020.80ex/s]

  6%|▌         | 3926/69825 [00:01<00:17, 3708.63ex/s]

  6%|▌         | 4359/69825 [00:01<00:16, 3875.71ex/s]

  7%|▋         | 4831/69825 [00:01<00:15, 4108.40ex/s]

  8%|▊         | 5250/69825 [00:01<00:16, 3973.91ex/s]

  8%|▊         | 5696/69825 [00:01<00:15, 4110.19ex/s]

  9%|▉         | 6113/69825 [00:01<00:15, 3999.78ex/s]

  9%|▉         | 6596/69825 [00:01<00:14, 4234.12ex/s]

 10%|█         | 7032/69825 [00:01<00:14, 4268.12ex/s]

 11%|█         | 7543/69825 [00:01<00:13, 4512.08ex/s]

 11%|█▏        | 7997/69825 [00:02<00:16, 3787.63ex/s]

 12%|█▏        | 8397/69825 [00:02<00:16, 3821.42ex/s]

 13%|█▎        | 8890/69825 [00:02<00:14, 4118.93ex/s]

 13%|█▎        | 9316/69825 [00:02<00:14, 4143.92ex/s]

 14%|█▍        | 9852/69825 [00:02<00:13, 4485.78ex/s]

 15%|█▍        | 10310/69825 [00:02<00:14, 4124.64ex/s]

 15%|█▌        | 10742/69825 [00:02<00:14, 4175.82ex/s]

 16%|█▌        | 11168/69825 [00:02<00:15, 3908.52ex/s]

 17%|█▋        | 11571/69825 [00:02<00:14, 3940.24ex/s]

 17%|█▋        | 12000/69825 [00:02<00:14, 3887.93ex/s]

 18%|█▊        | 12443/69825 [00:03<00:14, 4037.03ex/s]

 18%|█▊        | 12851/69825 [00:03<00:15, 3787.65ex/s]

 19%|█▉        | 13245/69825 [00:03<00:14, 3829.48ex/s]

 20%|█▉        | 13684/69825 [00:03<00:14, 3987.24ex/s]

 20%|██        | 14087/69825 [00:03<00:14, 3845.47ex/s]

 21%|██        | 14479/69825 [00:03<00:14, 3864.77ex/s]

 21%|██▏       | 14895/69825 [00:03<00:13, 3948.89ex/s]

 22%|██▏       | 15292/69825 [00:03<00:13, 3904.08ex/s]

 23%|██▎       | 15754/69825 [00:03<00:13, 4112.32ex/s]

 23%|██▎       | 16167/69825 [00:04<00:13, 4027.57ex/s]

 24%|██▍       | 16620/69825 [00:04<00:12, 4172.15ex/s]

 24%|██▍       | 17039/69825 [00:04<00:12, 4077.77ex/s]

 25%|██▌       | 17550/69825 [00:04<00:11, 4375.24ex/s]

 26%|██▌       | 18000/69825 [00:04<00:11, 4359.17ex/s]

 27%|██▋       | 18515/69825 [00:04<00:11, 4589.97ex/s]

 27%|██▋       | 18987/69825 [00:04<00:10, 4627.13ex/s]

 28%|██▊       | 19451/69825 [00:04<00:11, 4489.59ex/s]

 29%|██▊       | 19990/69825 [00:04<00:10, 4748.43ex/s]

 29%|██▉       | 20467/69825 [00:05<00:11, 4394.33ex/s]

 30%|██▉       | 20913/69825 [00:05<00:17, 2852.50ex/s]

 31%|███       | 21372/69825 [00:05<00:15, 3207.58ex/s]

 31%|███▏      | 21909/69825 [00:05<00:12, 3696.83ex/s]

 32%|███▏      | 22342/69825 [00:05<00:12, 3768.37ex/s]

 33%|███▎      | 22860/69825 [00:05<00:11, 4126.53ex/s]

 33%|███▎      | 23311/69825 [00:05<00:11, 4148.73ex/s]

 34%|███▍      | 23820/69825 [00:05<00:10, 4402.85ex/s]

 35%|███▍      | 24282/69825 [00:06<00:10, 4317.22ex/s]

 36%|███▌      | 24796/69825 [00:06<00:09, 4546.38ex/s]

 36%|███▌      | 25263/69825 [00:06<00:10, 4411.29ex/s]

 37%|███▋      | 25714/69825 [00:06<00:10, 4406.60ex/s]

 37%|███▋      | 26161/69825 [00:06<00:10, 4163.95ex/s]

 38%|███▊      | 26584/69825 [00:06<00:10, 4130.07ex/s]

 39%|███▊      | 27002/69825 [00:06<00:10, 4139.44ex/s]

 39%|███▉      | 27434/69825 [00:06<00:10, 4190.58ex/s]

 40%|███▉      | 27856/69825 [00:06<00:10, 4143.41ex/s]

 40%|████      | 28273/69825 [00:06<00:10, 4053.75ex/s]

 41%|████      | 28716/69825 [00:07<00:09, 4160.47ex/s]

 42%|████▏     | 29134/69825 [00:07<00:10, 3927.41ex/s]

 42%|████▏     | 29580/69825 [00:07<00:09, 4077.40ex/s]

 43%|████▎     | 30000/69825 [00:07<00:09, 3988.49ex/s]

 44%|████▎     | 30483/69825 [00:07<00:09, 4225.45ex/s]

 44%|████▍     | 31000/69825 [00:07<00:09, 4308.59ex/s]

 45%|████▌     | 31493/69825 [00:07<00:08, 4484.29ex/s]

 46%|████▌     | 31989/69825 [00:07<00:08, 4621.01ex/s]

 46%|████▋     | 32454/69825 [00:07<00:08, 4429.46ex/s]

 47%|████▋     | 32954/69825 [00:08<00:08, 4590.14ex/s]

 48%|████▊     | 33416/69825 [00:08<00:08, 4538.39ex/s]

 49%|████▊     | 33902/69825 [00:08<00:07, 4629.25ex/s]

 49%|████▉     | 34367/69825 [00:08<00:08, 4418.33ex/s]

 50%|████▉     | 34818/69825 [00:08<00:07, 4443.34ex/s]

 51%|█████     | 35265/69825 [00:08<00:08, 4251.90ex/s]

 51%|█████     | 35771/69825 [00:08<00:07, 4480.51ex/s]

 52%|█████▏    | 36223/69825 [00:08<00:07, 4472.89ex/s]

 53%|█████▎    | 36758/69825 [00:08<00:06, 4726.88ex/s]

 53%|█████▎    | 37234/69825 [00:08<00:07, 4644.46ex/s]

 54%|█████▍    | 37775/69825 [00:09<00:06, 4865.66ex/s]

 55%|█████▍    | 38264/69825 [00:09<00:06, 4871.82ex/s]

 56%|█████▌    | 38837/69825 [00:09<00:06, 5124.88ex/s]

 56%|█████▋    | 39351/69825 [00:09<00:05, 5102.86ex/s]

 57%|█████▋    | 39879/69825 [00:09<00:05, 5152.85ex/s]

 58%|█████▊    | 40395/69825 [00:09<00:05, 5001.35ex/s]

 59%|█████▊    | 40897/69825 [00:09<00:05, 4842.33ex/s]

 59%|█████▉    | 41383/69825 [00:09<00:06, 4314.25ex/s]

 60%|█████▉    | 41826/69825 [00:09<00:06, 4180.22ex/s]

 61%|██████    | 42252/69825 [00:10<00:06, 4046.14ex/s]

 61%|██████▏   | 42811/69825 [00:10<00:06, 4461.99ex/s]

 62%|██████▏   | 43266/69825 [00:10<00:06, 4380.34ex/s]

 63%|██████▎   | 43751/69825 [00:10<00:05, 4510.08ex/s]

 63%|██████▎   | 44207/69825 [00:10<00:05, 4328.87ex/s]

 64%|██████▍   | 44650/69825 [00:10<00:05, 4356.56ex/s]

 65%|██████▍   | 45089/69825 [00:10<00:05, 4325.03ex/s]

 65%|██████▌   | 45571/69825 [00:10<00:05, 4465.46ex/s]

 66%|██████▌   | 46020/69825 [00:10<00:05, 4448.29ex/s]

 67%|██████▋   | 46526/69825 [00:11<00:05, 4625.28ex/s]

 67%|██████▋   | 47000/69825 [00:11<00:05, 4506.87ex/s]

 68%|██████▊   | 47547/69825 [00:11<00:04, 4784.80ex/s]

 69%|██████▉   | 48028/69825 [00:11<00:04, 4403.73ex/s]

 69%|██████▉   | 48486/69825 [00:11<00:04, 4452.00ex/s]

 70%|███████   | 49000/69825 [00:11<00:04, 4526.53ex/s]

 71%|███████   | 49492/69825 [00:11<00:04, 4636.11ex/s]

 72%|███████▏  | 49982/69825 [00:11<00:04, 4709.96ex/s]

 72%|███████▏  | 50456/69825 [00:11<00:04, 4532.11ex/s]

 73%|███████▎  | 50987/69825 [00:11<00:03, 4753.14ex/s]

 74%|███████▎  | 51466/69825 [00:12<00:04, 4478.62ex/s]

 74%|███████▍  | 51969/69825 [00:12<00:03, 4631.72ex/s]

 75%|███████▌  | 52437/69825 [00:12<00:03, 4394.28ex/s]

 76%|███████▌  | 52900/69825 [00:12<00:03, 4459.44ex/s]

 76%|███████▋  | 53350/69825 [00:12<00:03, 4229.82ex/s]

 77%|███████▋  | 53839/69825 [00:12<00:03, 4411.39ex/s]

 78%|███████▊  | 54285/69825 [00:12<00:03, 4365.41ex/s]

 78%|███████▊  | 54789/69825 [00:12<00:03, 4556.85ex/s]

 79%|███████▉  | 55267/69825 [00:12<00:03, 4621.32ex/s]

 80%|███████▉  | 55822/69825 [00:13<00:02, 4891.32ex/s]

 81%|████████  | 56314/69825 [00:13<00:03, 4244.18ex/s]

 81%|████████▏ | 56764/69825 [00:13<00:03, 4311.57ex/s]

 82%|████████▏ | 57208/69825 [00:13<00:03, 4161.77ex/s]

 83%|████████▎ | 57659/69825 [00:13<00:02, 4256.80ex/s]

 83%|████████▎ | 58092/69825 [00:13<00:02, 4168.75ex/s]

 84%|████████▍ | 58559/69825 [00:13<00:02, 4309.35ex/s]

 84%|████████▍ | 59000/69825 [00:13<00:02, 4186.58ex/s]

 85%|████████▌ | 59442/69825 [00:13<00:02, 4250.97ex/s]

 86%|████████▌ | 59880/69825 [00:14<00:02, 4287.63ex/s]

 86%|████████▋ | 60311/69825 [00:14<00:02, 4118.81ex/s]

 87%|████████▋ | 60815/69825 [00:14<00:02, 4380.60ex/s]

 88%|████████▊ | 61260/69825 [00:14<00:01, 4399.87ex/s]

 89%|████████▊ | 61809/69825 [00:14<00:01, 4717.79ex/s]

 89%|████████▉ | 62284/69825 [00:14<00:01, 4297.23ex/s]

 90%|████████▉ | 62789/69825 [00:14<00:01, 4503.39ex/s]

 91%|█████████ | 63248/69825 [00:14<00:01, 4086.93ex/s]

 91%|█████████ | 63674/69825 [00:14<00:01, 4131.04ex/s]

 92%|█████████▏| 64096/69825 [00:15<00:01, 3941.18ex/s]

 92%|█████████▏| 64508/69825 [00:15<00:01, 3987.70ex/s]

 93%|█████████▎| 64918/69825 [00:15<00:01, 4018.60ex/s]

 94%|█████████▎| 65324/69825 [00:15<00:01, 3856.60ex/s]

 94%|█████████▍| 65714/69825 [00:15<00:01, 3868.44ex/s]

 95%|█████████▍| 66104/69825 [00:15<00:01, 3687.00ex/s]

 95%|█████████▌| 66550/69825 [00:15<00:00, 3903.21ex/s]

 96%|█████████▌| 66944/69825 [00:15<00:00, 3876.90ex/s]

 96%|█████████▋| 67335/69825 [00:15<00:00, 3722.37ex/s]

 97%|█████████▋| 67710/69825 [00:16<00:00, 3713.96ex/s]

 98%|█████████▊| 68084/69825 [00:16<00:00, 3438.72ex/s]

 98%|█████████▊| 68490/69825 [00:16<00:00, 3609.18ex/s]

 99%|█████████▉| 68997/69825 [00:16<00:00, 4020.62ex/s]

 99%|█████████▉| 69418/69825 [00:16<00:00, 4073.57ex/s]

100%|██████████| 69825/69825 [00:16<00:00, 4227.08ex/s]




In [16]:
len(train_dataset), len(eval_dataset)

(75000, 69825)

In [17]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] 작년부터 유달리 열심히 쓰게 되는.. # 프리메라 # 망고버터컴포팅바디로션 # 망고버터컴포팅바디워시 ‘ - ‘.. [SEP] 패키지 / 구성품 # 일반 [SEP] 1


[CLS] 자연스런 물광효과 💧 [SEP] 본품 # 다양성 [SEP] 1


# Load Trainer

In [18]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [19]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [20]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [21]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 75000


  Num Epochs = 20


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 128


  Gradient Accumulation steps = 1


  Total optimization steps = 11720


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.1119,0.105317,0.965657,0.565737,0.982122,0.773929,0.965657
2,0.0892,0.082957,0.971958,0.602194,0.985467,0.793831,0.971958
3,0.076,0.093807,0.97087,0.608092,0.984873,0.796483,0.97087
4,0.0615,0.077571,0.973534,0.668461,0.986217,0.827339,0.973534
5,0.0467,0.088471,0.973605,0.661773,0.986267,0.82402,0.973605
6,0.0403,0.09594,0.973333,0.66402,0.986116,0.825068,0.973333
7,0.0287,0.108023,0.973663,0.669898,0.986284,0.828091,0.973663
8,0.024,0.120167,0.973663,0.658115,0.986304,0.822209,0.973663
9,0.0149,0.129781,0.97382,0.675311,0.98636,0.830836,0.97382
10,0.0149,0.15931,0.973806,0.653664,0.986388,0.820026,0.973806


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-586


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-586/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-586/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-586/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-586/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1172


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1172/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1172/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1172/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1172/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1758


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1758/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1758/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1758/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1758/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-586] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2344


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2344/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2344/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2344/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2344/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1172] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2930


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2930/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2930/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2930/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2930/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-1758] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-3516


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-3516/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-3516/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-3516/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-3516/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2930] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4102


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4102/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4102/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4102/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4102/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-2344] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4688


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4688/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4688/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4688/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4688/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-3516] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5274


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5274/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5274/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5274/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5274/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4102] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5860


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5860/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5860/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5860/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5860/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-4688] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-6446


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-6446/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-6446/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-6446/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-6446/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5860] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7032


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7032/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7032/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7032/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7032/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-6446] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7618


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7618/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7618/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7618/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7618/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7032] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8204


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8204/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8204/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8204/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8204/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-7618] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8790


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8790/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8790/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8790/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8790/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8204] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9376


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9376/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9376/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9376/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9376/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-8790] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9962


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9962/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9962/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9962/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9962/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9376] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-10548


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-10548/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-10548/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-10548/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-10548/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-9962] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11134


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11134/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11134/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11134/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11134/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-10548] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11720


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11720/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11720/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11720/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11720/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-11134] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from snunlp_kr_electra_discriminator_uncleaned_v13/checkpoint-5274 (score: 0.8308355397765451).


Saving model checkpoint to /tmp/tmp7kq1_qqa


Configuration saved in /tmp/tmp7kq1_qqa/config.json


Model weights saved in /tmp/tmp7kq1_qqa/pytorch_model.bin


tokenizer config file saved in /tmp/tmp7kq1_qqa/tokenizer_config.json


Special tokens file saved in /tmp/tmp7kq1_qqa/special_tokens_map.json


0,1
eval/accuracy,▁▆▅▇▇▇▇▇██▇▇▇▇█▇████
eval/f1_false,▁▆▅▇▇▇▇▇██▇▇▇▇█▇████
eval/f1_macro,▁▃▄█▇▇█▇█▇▇█▇██▇█▇██
eval/f1_micro,▁▆▅▇▇▇▇▇██▇▇▇▇█▇████
eval/f1_true,▁▃▄█▇▇█▇█▇▇█▇██▇█▇▇▇
eval/loss,▂▁▂▁▂▂▃▃▄▅▅▅▅▆▇▇████
eval/runtime,▅▄▄▅▄▄▆▃▅▇▅▄▅▆█▆▁▆▂▄
eval/samples_per_second,▄▅▅▄▅▅▃▆▄▂▄▅▄▃▁▃█▃▇▅
eval/steps_per_second,▄▅▅▄▅▅▃▆▄▂▄▅▄▃▁▃█▃▆▅
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/accuracy,0.97429
eval/f1_false,0.98663
eval/f1_macro,0.82677
eval/f1_micro,0.97429
eval/f1_true,0.66691
eval/loss,0.21497
eval/runtime,212.2224
eval/samples_per_second,329.018
eval/steps_per_second,2.573
train/epoch,20.0


In [22]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
