# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v4'

DATA_V = 'uncleaned_v4'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'snunlp/KR-ELECTRA-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/snunlp_kr_electra_discriminator_uncleaned_v4/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/uncleaned_v4/ce_train.csv exists.
./dataset/uncleaned_v4/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 10
batch_size = 25
gradient_accumulation_steps = 1

optim = 'adamw_torch' # 'adamw_hf'

learning_rate = 3e-6 # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'cosine'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='eval_loss'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 500

# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at snunlp/KR-ELECTRA-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = f'./dataset/{DATA_V}/raw_train.csv'
dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
test_path = f'./dataset/{DATA_V}/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

### new
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]
special_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

tokens2add = special_tokens + emojis

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))
tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame().drop_duplicates()
tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
new_tokens = set(list(new_tokenizer.vocab.keys()) + tokens2add) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(new_tokenizer))
print(len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

30000





3018
30111


Embedding(30111, 768)

In [10]:
print(len(new_tokens))
print(new_tokens)

111
{'##👠', '♥️', 'ɴ', '##뜌', 'ᴘ', '👠', '##ʀ', '👋🏻', '💆', '🤘🏻', 'ᴛ', '##ᴡ', 'ʀ', '&social-security-num&', '✔️', '🚗', '🙆\u200d♂️', '##ɢ', '🙌🏻', '##💄', 'ғ', '👩\u200d👦', '##◍', '😯', '🙋\u200d♀️', '🤡', '➕', '🍼', '👨\u200d👧', '👏🏻', '💄', '💪🏻', '##💆', '##죱', '&card-num&', '&affiliation&', 'ᴜ', '##ᴜ', '##쫜', '##ˇ', '⁉️', '👌🏻', '◍', '&tel-num&', '##ᴠ', 'ɪ', '##💇', '〰️', '##➕', '☝️', '🙆🏻', '🍷', '💆\u200d♀️', '🥤', '☝🏻', '💆🏻\u200d♀️', '쨕', '😺', '뜌', '⏰', 'ˇ', '뿤', '☺️', 'ɢ', '##ɪ', 'ꈍ', '💇🏼\u200d♀️', '💡', '✌️', '🐄', '👉🏻', '🙏🏻', '‼️', '🧚\u200d♀️', 'ᴡ', '##🥤', '👦🏼', '🏃\u200d♀️', '##ꈍ', '쓩', '쫜', '&online-account&', '##ᴍ', '##ɴ', 'ᵕ', '##ᵕ', '##쨕', '죱', '🕺', '🙋🏻', '🙋🏻\u200d♀️', '💇', '##ᴛ', 'ᴠ', '##🚗', '##읒', '##㉦', '읒', '❤️', '❣️', '&num&', '##ᴘ', '##🤡', '💬', '&bank-account&', '&name&', '✌🏻', '챳', 'ᴍ', '㉦', 'ʜ'}


In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [12]:
# entity_property_pair = [
#     '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
#     '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
#     '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
#     '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
# ]
# polarity_id_to_name = ['positive', 'negative', 'neutral']
# tokenizer_tester = []
# for pair in entity_property_pair:
#     for polarity in polarity_id_to_name:
#         tokenizer_tester.append('#'.join([pair, polarity]))
# for e in tokenizer_tester:
#     print(tokenizer.decode(tokenizer.encode(e)))
# for e in tokenizer_tester:
#     print(tokenizer.encode(e))

# Define Metric

In [13]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [14]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [15]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [16]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/67900 [00:00<?, ?ex/s]

  1%|          | 500/67900 [00:00<00:13, 4996.54ex/s]

  1%|▏         | 1000/67900 [00:00<00:32, 2080.53ex/s]

  2%|▏         | 1451/67900 [00:00<00:24, 2709.41ex/s]

  3%|▎         | 1902/67900 [00:00<00:20, 3194.11ex/s]

  3%|▎         | 2301/67900 [00:00<00:19, 3387.25ex/s]

  4%|▍         | 2746/67900 [00:00<00:17, 3686.37ex/s]

  5%|▍         | 3170/67900 [00:00<00:16, 3841.03ex/s]

  5%|▌         | 3664/67900 [00:01<00:15, 4157.57ex/s]

  6%|▌         | 4104/67900 [00:01<00:15, 4080.84ex/s]

  7%|▋         | 4583/67900 [00:01<00:14, 4284.13ex/s]

  7%|▋         | 5025/67900 [00:01<00:15, 4093.96ex/s]

  8%|▊         | 5510/67900 [00:01<00:14, 4306.42ex/s]

  9%|▉         | 5982/67900 [00:01<00:13, 4424.78ex/s]

  9%|▉         | 6432/67900 [00:01<00:14, 4256.11ex/s]

 10%|█         | 6867/67900 [00:01<00:14, 4280.21ex/s]

 11%|█         | 7300/67900 [00:01<00:16, 3624.89ex/s]

 11%|█▏        | 7738/67900 [00:02<00:15, 3818.62ex/s]

 12%|█▏        | 8137/67900 [00:02<00:15, 3793.01ex/s]

 13%|█▎        | 8600/67900 [00:02<00:14, 4019.85ex/s]

 13%|█▎        | 9013/67900 [00:02<00:14, 4037.32ex/s]

 14%|█▍        | 9426/67900 [00:02<00:14, 4063.22ex/s]

 14%|█▍        | 9841/67900 [00:02<00:14, 4087.82ex/s]

 15%|█▌        | 10254/67900 [00:02<00:14, 3881.79ex/s]

 16%|█▌        | 10677/67900 [00:02<00:14, 3979.92ex/s]

 16%|█▋        | 11089/67900 [00:02<00:14, 4019.10ex/s]

 17%|█▋        | 11526/67900 [00:02<00:13, 4121.12ex/s]

 18%|█▊        | 12000/67900 [00:03<00:13, 4093.01ex/s]

 18%|█▊        | 12412/67900 [00:03<00:13, 4084.28ex/s]

 19%|█▉        | 12831/67900 [00:03<00:13, 4114.21ex/s]

 20%|█▉        | 13244/67900 [00:03<00:13, 3943.62ex/s]

 20%|██        | 13641/67900 [00:03<00:13, 3929.98ex/s]

 21%|██        | 14036/67900 [00:03<00:13, 3909.85ex/s]

 21%|██▏       | 14512/67900 [00:03<00:12, 4156.29ex/s]

 22%|██▏       | 14929/67900 [00:03<00:12, 4078.43ex/s]

 23%|██▎       | 15338/67900 [00:03<00:13, 3954.63ex/s]

 23%|██▎       | 15761/67900 [00:04<00:12, 4033.43ex/s]

 24%|██▍       | 16166/67900 [00:04<00:13, 3750.00ex/s]

 24%|██▍       | 16577/67900 [00:04<00:13, 3849.32ex/s]

 25%|██▌       | 17032/67900 [00:04<00:12, 4048.30ex/s]

 26%|██▌       | 17534/67900 [00:04<00:11, 4328.77ex/s]

 27%|██▋       | 18000/67900 [00:04<00:11, 4279.71ex/s]

 27%|██▋       | 18476/67900 [00:04<00:11, 4412.11ex/s]

 28%|██▊       | 18920/67900 [00:04<00:11, 4331.44ex/s]

 29%|██▊       | 19356/67900 [00:04<00:11, 4177.74ex/s]

 29%|██▉       | 19822/67900 [00:04<00:11, 4312.51ex/s]

 30%|██▉       | 20256/67900 [00:05<00:11, 4245.52ex/s]

 31%|███       | 20756/67900 [00:05<00:10, 4459.42ex/s]

 31%|███       | 21204/67900 [00:05<00:11, 4234.09ex/s]

 32%|███▏      | 21631/67900 [00:05<00:11, 4158.92ex/s]

 32%|███▏      | 22050/67900 [00:05<00:11, 4090.83ex/s]

 33%|███▎      | 22497/67900 [00:05<00:10, 4197.76ex/s]

 34%|███▍      | 22919/67900 [00:05<00:10, 4192.38ex/s]

 34%|███▍      | 23340/67900 [00:05<00:10, 4079.42ex/s]

 35%|███▌      | 23830/67900 [00:05<00:10, 4315.16ex/s]

 36%|███▌      | 24264/67900 [00:06<00:10, 4076.84ex/s]

 36%|███▋      | 24705/67900 [00:06<00:10, 4170.12ex/s]

 37%|███▋      | 25126/67900 [00:06<00:10, 4045.75ex/s]

 38%|███▊      | 25577/67900 [00:06<00:10, 4175.56ex/s]

 38%|███▊      | 26000/67900 [00:06<00:10, 4043.96ex/s]

 39%|███▉      | 26509/67900 [00:06<00:09, 4338.93ex/s]

 40%|███▉      | 26963/67900 [00:06<00:09, 4396.55ex/s]

 40%|████      | 27406/67900 [00:06<00:09, 4352.51ex/s]

 41%|████      | 27923/67900 [00:06<00:08, 4589.10ex/s]

 42%|████▏     | 28384/67900 [00:07<00:09, 4378.63ex/s]

 42%|████▏     | 28839/67900 [00:07<00:08, 4426.99ex/s]

 43%|████▎     | 29285/67900 [00:07<00:09, 4282.91ex/s]

 44%|████▍     | 29716/67900 [00:07<00:09, 4187.28ex/s]

 44%|████▍     | 30137/67900 [00:07<00:09, 4012.21ex/s]

 45%|████▌     | 30557/67900 [00:07<00:09, 4064.49ex/s]

 46%|████▌     | 31000/67900 [00:07<00:09, 4062.29ex/s]

 46%|████▋     | 31440/67900 [00:07<00:08, 4156.60ex/s]

 47%|████▋     | 31862/67900 [00:07<00:08, 4172.00ex/s]

 48%|████▊     | 32309/67900 [00:07<00:08, 4257.01ex/s]

 48%|████▊     | 32856/67900 [00:08<00:07, 4611.77ex/s]

 49%|████▉     | 33349/67900 [00:08<00:07, 4704.66ex/s]

 50%|████▉     | 33891/67900 [00:08<00:06, 4914.55ex/s]

 51%|█████     | 34384/67900 [00:08<00:06, 4837.13ex/s]

 51%|█████▏    | 34869/67900 [00:08<00:06, 4826.11ex/s]

 52%|█████▏    | 35353/67900 [00:08<00:06, 4769.81ex/s]

 53%|█████▎    | 35912/67900 [00:08<00:06, 5010.89ex/s]

 54%|█████▎    | 36414/67900 [00:08<00:07, 4349.13ex/s]

 54%|█████▍    | 36918/67900 [00:08<00:06, 4532.03ex/s]

 55%|█████▌    | 37385/67900 [00:09<00:06, 4535.43ex/s]

 56%|█████▌    | 37887/67900 [00:09<00:06, 4670.16ex/s]

 56%|█████▋    | 38362/67900 [00:09<00:06, 4642.53ex/s]

 57%|█████▋    | 38892/67900 [00:09<00:06, 4831.12ex/s]

 58%|█████▊    | 39383/67900 [00:09<00:05, 4852.94ex/s]

 59%|█████▉    | 39900/67900 [00:09<00:05, 4943.33ex/s]

 59%|█████▉    | 40397/67900 [00:09<00:05, 4684.94ex/s]

 60%|██████    | 40985/67900 [00:09<00:05, 5024.36ex/s]

 61%|██████    | 41493/67900 [00:09<00:05, 4741.57ex/s]

 62%|██████▏   | 41974/67900 [00:09<00:05, 4749.53ex/s]

 63%|██████▎   | 42454/67900 [00:10<00:05, 4612.26ex/s]

 63%|██████▎   | 42921/67900 [00:10<00:05, 4626.58ex/s]

 64%|██████▍   | 43387/67900 [00:10<00:05, 4439.23ex/s]

 65%|██████▍   | 43852/67900 [00:10<00:05, 4494.63ex/s]

 65%|██████▌   | 44304/67900 [00:10<00:05, 4382.02ex/s]

 66%|██████▌   | 44811/67900 [00:10<00:05, 4577.79ex/s]

 67%|██████▋   | 45276/67900 [00:10<00:04, 4596.76ex/s]

 68%|██████▊   | 45877/67900 [00:10<00:04, 5008.36ex/s]

 68%|██████▊   | 46380/67900 [00:10<00:04, 4934.85ex/s]

 69%|██████▉   | 46876/67900 [00:11<00:04, 4842.32ex/s]

 70%|██████▉   | 47362/67900 [00:11<00:04, 4347.23ex/s]

 70%|███████   | 47842/67900 [00:11<00:04, 4467.49ex/s]

 71%|███████   | 48300/67900 [00:11<00:04, 4497.36ex/s]

 72%|███████▏  | 48827/67900 [00:11<00:04, 4717.55ex/s]

 73%|███████▎  | 49304/67900 [00:11<00:03, 4651.66ex/s]

 73%|███████▎  | 49833/67900 [00:11<00:03, 4834.99ex/s]

 74%|███████▍  | 50320/67900 [00:11<00:03, 4687.81ex/s]

 75%|███████▍  | 50792/67900 [00:11<00:03, 4672.13ex/s]

 75%|███████▌  | 51262/67900 [00:11<00:03, 4562.48ex/s]

 76%|███████▌  | 51744/67900 [00:12<00:03, 4635.87ex/s]

 77%|███████▋  | 52210/67900 [00:12<00:03, 4360.40ex/s]

 78%|███████▊  | 52735/67900 [00:12<00:03, 4608.10ex/s]

 78%|███████▊  | 53201/67900 [00:12<00:03, 4256.33ex/s]

 79%|███████▉  | 53702/67900 [00:12<00:03, 4460.82ex/s]

 80%|███████▉  | 54156/67900 [00:12<00:03, 4326.14ex/s]

 80%|████████  | 54649/67900 [00:12<00:02, 4494.44ex/s]

 81%|████████  | 55104/67900 [00:12<00:02, 4371.67ex/s]

 82%|████████▏ | 55545/67900 [00:12<00:02, 4362.61ex/s]

 82%|████████▏ | 56000/67900 [00:13<00:02, 4276.09ex/s]

 83%|████████▎ | 56430/67900 [00:13<00:02, 4223.50ex/s]

 84%|████████▍ | 56871/67900 [00:13<00:02, 4274.30ex/s]

 84%|████████▍ | 57300/67900 [00:13<00:02, 3977.11ex/s]

 85%|████████▌ | 57733/67900 [00:13<00:02, 4072.65ex/s]

 86%|████████▌ | 58145/67900 [00:13<00:02, 3970.91ex/s]

 86%|████████▋ | 58602/67900 [00:13<00:02, 4139.28ex/s]

 87%|████████▋ | 59019/67900 [00:13<00:02, 4134.64ex/s]

 88%|████████▊ | 59512/67900 [00:13<00:01, 4363.54ex/s]

 88%|████████▊ | 60000/67900 [00:14<00:01, 4395.49ex/s]

 89%|████████▉ | 60522/67900 [00:14<00:01, 4633.70ex/s]

 90%|████████▉ | 61000/67900 [00:14<00:01, 4571.17ex/s]

 91%|█████████ | 61577/67900 [00:14<00:01, 4917.49ex/s]

 91%|█████████▏| 62071/67900 [00:14<00:01, 4806.39ex/s]

 92%|█████████▏| 62554/67900 [00:14<00:01, 4750.39ex/s]

 93%|█████████▎| 63031/67900 [00:14<00:01, 4598.21ex/s]

 94%|█████████▎| 63536/67900 [00:14<00:00, 4726.00ex/s]

 94%|█████████▍| 64011/67900 [00:14<00:00, 4632.58ex/s]

 95%|█████████▌| 64524/67900 [00:14<00:00, 4775.70ex/s]

 96%|█████████▌| 65004/67900 [00:15<00:00, 4662.65ex/s]

 96%|█████████▋| 65472/67900 [00:15<00:00, 4666.06ex/s]

 97%|█████████▋| 65940/67900 [00:15<00:00, 4531.15ex/s]

 98%|█████████▊| 66395/67900 [00:15<00:00, 4221.72ex/s]

 98%|█████████▊| 66822/67900 [00:15<00:00, 4172.30ex/s]

 99%|█████████▉| 67243/67900 [00:15<00:00, 4044.43ex/s]

100%|█████████▉| 67777/67900 [00:15<00:00, 4405.40ex/s]

100%|██████████| 67900/67900 [00:15<00:00, 4308.22ex/s]




  0%|          | 0/65675 [00:00<?, ?ex/s]

  0%|          | 120/65675 [00:00<01:37, 671.15ex/s]

  1%|          | 665/65675 [00:00<00:23, 2790.79ex/s]

  2%|▏         | 1074/65675 [00:00<00:19, 3294.30ex/s]

  2%|▏         | 1496/65675 [00:00<00:17, 3624.11ex/s]

  3%|▎         | 1936/65675 [00:00<00:16, 3883.10ex/s]

  4%|▎         | 2352/65675 [00:00<00:15, 3971.36ex/s]

  4%|▍         | 2888/65675 [00:00<00:14, 4411.06ex/s]

  5%|▌         | 3382/65675 [00:00<00:13, 4575.03ex/s]

  6%|▌         | 3896/65675 [00:00<00:13, 4747.07ex/s]

  7%|▋         | 4377/65675 [00:01<00:13, 4676.63ex/s]

  7%|▋         | 4885/65675 [00:01<00:12, 4796.41ex/s]

  8%|▊         | 5368/65675 [00:01<00:12, 4750.98ex/s]

  9%|▉         | 5917/65675 [00:01<00:12, 4970.08ex/s]

 10%|▉         | 6416/65675 [00:01<00:12, 4918.32ex/s]

 11%|█         | 6950/65675 [00:01<00:11, 5042.24ex/s]

 11%|█▏        | 7456/65675 [00:01<00:12, 4832.03ex/s]

 12%|█▏        | 7953/65675 [00:01<00:11, 4870.65ex/s]

 13%|█▎        | 8442/65675 [00:01<00:12, 4762.93ex/s]

 14%|█▎        | 8953/65675 [00:02<00:11, 4861.72ex/s]

 14%|█▍        | 9441/65675 [00:02<00:11, 4718.28ex/s]

 15%|█▌        | 9915/65675 [00:02<00:12, 4488.59ex/s]

 16%|█▌        | 10367/65675 [00:02<00:13, 4124.42ex/s]

 16%|█▋        | 10796/65675 [00:02<00:13, 4167.01ex/s]

 17%|█▋        | 11218/65675 [00:02<00:13, 3991.33ex/s]

 18%|█▊        | 11681/65675 [00:02<00:12, 4161.67ex/s]

 18%|█▊        | 12102/65675 [00:02<00:12, 4129.89ex/s]

 19%|█▉        | 12518/65675 [00:02<00:12, 4105.51ex/s]

 20%|█▉        | 12940/65675 [00:03<00:12, 4136.34ex/s]

 20%|██        | 13356/65675 [00:03<00:13, 3892.29ex/s]

 21%|██        | 13795/65675 [00:03<00:12, 4029.87ex/s]

 22%|██▏       | 14202/65675 [00:03<00:12, 3975.84ex/s]

 22%|██▏       | 14679/65675 [00:03<00:12, 4201.66ex/s]

 23%|██▎       | 15102/65675 [00:03<00:12, 4121.73ex/s]

 24%|██▎       | 15575/65675 [00:03<00:11, 4296.99ex/s]

 24%|██▍       | 16007/65675 [00:03<00:11, 4272.06ex/s]

 25%|██▌       | 16495/65675 [00:03<00:11, 4448.30ex/s]

 26%|██▌       | 16948/65675 [00:03<00:10, 4471.68ex/s]

 26%|██▋       | 17397/65675 [00:04<00:11, 4146.10ex/s]

 27%|██▋       | 17870/65675 [00:04<00:11, 4310.11ex/s]

 28%|██▊       | 18329/65675 [00:04<00:10, 4387.61ex/s]

 29%|██▊       | 18772/65675 [00:04<00:10, 4377.25ex/s]

 29%|██▉       | 19213/65675 [00:04<00:11, 4116.95ex/s]

 30%|███       | 19716/65675 [00:04<00:10, 4373.30ex/s]

 31%|███       | 20159/65675 [00:04<00:10, 4300.94ex/s]

 31%|███▏      | 20599/65675 [00:04<00:10, 4328.51ex/s]

 32%|███▏      | 21035/65675 [00:04<00:10, 4318.09ex/s]

 33%|███▎      | 21519/65675 [00:05<00:09, 4470.39ex/s]

 33%|███▎      | 21972/65675 [00:05<00:09, 4487.72ex/s]

 34%|███▍      | 22422/65675 [00:05<00:10, 4182.61ex/s]

 35%|███▍      | 22895/65675 [00:05<00:09, 4334.75ex/s]

 36%|███▌      | 23333/65675 [00:05<00:10, 4068.43ex/s]

 36%|███▌      | 23753/65675 [00:05<00:10, 4103.96ex/s]

 37%|███▋      | 24168/65675 [00:05<00:10, 4006.04ex/s]

 37%|███▋      | 24572/65675 [00:05<00:10, 3990.63ex/s]

 38%|███▊      | 24979/65675 [00:05<00:10, 4011.99ex/s]

 39%|███▊      | 25382/65675 [00:05<00:10, 3713.60ex/s]

 39%|███▉      | 25776/65675 [00:06<00:10, 3775.99ex/s]

 40%|███▉      | 26158/65675 [00:06<00:11, 3506.19ex/s]

 40%|████      | 26574/65675 [00:06<00:10, 3683.39ex/s]

 41%|████      | 26963/65675 [00:06<00:10, 3738.23ex/s]

 42%|████▏     | 27342/65675 [00:06<00:10, 3570.50ex/s]

 42%|████▏     | 27751/65675 [00:06<00:10, 3713.73ex/s]

 43%|████▎     | 28127/65675 [00:06<00:10, 3589.58ex/s]

 44%|████▎     | 28582/65675 [00:06<00:09, 3859.77ex/s]

 44%|████▍     | 29009/65675 [00:06<00:09, 3976.18ex/s]

 45%|████▍     | 29496/65675 [00:07<00:08, 4235.19ex/s]

 46%|████▌     | 29941/65675 [00:07<00:08, 4295.88ex/s]

 46%|████▌     | 30373/65675 [00:07<00:08, 4091.61ex/s]

 47%|████▋     | 30830/65675 [00:07<00:08, 4226.78ex/s]

 48%|████▊     | 31256/65675 [00:07<00:08, 4017.35ex/s]

 48%|████▊     | 31700/65675 [00:07<00:08, 4134.36ex/s]

 49%|████▉     | 32117/65675 [00:07<00:08, 4117.73ex/s]

 50%|████▉     | 32577/65675 [00:07<00:07, 4256.54ex/s]

 50%|█████     | 33005/65675 [00:07<00:08, 4040.43ex/s]

 51%|█████     | 33468/65675 [00:08<00:07, 4207.22ex/s]

 52%|█████▏    | 33938/65675 [00:08<00:07, 4348.08ex/s]

 52%|█████▏    | 34376/65675 [00:08<00:07, 4203.54ex/s]

 53%|█████▎    | 34841/65675 [00:08<00:07, 4329.86ex/s]

 54%|█████▎    | 35277/65675 [00:08<00:07, 4077.61ex/s]

 54%|█████▍    | 35772/65675 [00:08<00:06, 4322.10ex/s]

 55%|█████▌    | 36209/65675 [00:08<00:07, 3936.42ex/s]

 56%|█████▌    | 36681/65675 [00:08<00:06, 4145.86ex/s]

 57%|█████▋    | 37107/65675 [00:08<00:06, 4176.90ex/s]

 57%|█████▋    | 37656/65675 [00:08<00:06, 4549.13ex/s]

 58%|█████▊    | 38149/65675 [00:09<00:05, 4659.00ex/s]

 59%|█████▉    | 38638/65675 [00:09<00:05, 4725.44ex/s]

 60%|█████▉    | 39115/65675 [00:09<00:05, 4481.27ex/s]

 60%|██████    | 39624/65675 [00:09<00:05, 4653.13ex/s]

 61%|██████    | 40094/65675 [00:09<00:06, 4212.69ex/s]

 62%|██████▏   | 40553/65675 [00:09<00:05, 4313.33ex/s]

 62%|██████▏   | 41007/65675 [00:09<00:05, 4374.95ex/s]

 63%|██████▎   | 41518/65675 [00:09<00:05, 4583.30ex/s]

 64%|██████▍   | 41982/65675 [00:09<00:05, 4582.53ex/s]

 65%|██████▍   | 42445/65675 [00:10<00:05, 4398.06ex/s]

 65%|██████▌   | 42892/65675 [00:10<00:05, 4415.87ex/s]

 66%|██████▌   | 43337/65675 [00:10<00:05, 4278.55ex/s]

 67%|██████▋   | 43792/65675 [00:10<00:05, 4353.42ex/s]

 67%|██████▋   | 44230/65675 [00:10<00:04, 4316.26ex/s]

 68%|██████▊   | 44664/65675 [00:10<00:05, 3690.95ex/s]

 69%|██████▊   | 45050/65675 [00:10<00:05, 3481.16ex/s]

 69%|██████▉   | 45569/65675 [00:10<00:05, 3922.23ex/s]

 70%|███████   | 46006/65675 [00:10<00:04, 4040.93ex/s]

 71%|███████   | 46452/65675 [00:11<00:04, 4155.66ex/s]

 72%|███████▏  | 46961/65675 [00:11<00:04, 4420.42ex/s]

 72%|███████▏  | 47426/65675 [00:11<00:04, 4486.27ex/s]

 73%|███████▎  | 47955/65675 [00:11<00:03, 4718.77ex/s]

 74%|███████▎  | 48432/65675 [00:11<00:03, 4566.82ex/s]

 75%|███████▍  | 48950/65675 [00:11<00:03, 4741.67ex/s]

 75%|███████▌  | 49428/65675 [00:11<00:03, 4458.65ex/s]

 76%|███████▌  | 49927/65675 [00:11<00:03, 4607.15ex/s]

 77%|███████▋  | 50393/65675 [00:11<00:03, 4484.07ex/s]

 78%|███████▊  | 50970/65675 [00:12<00:03, 4848.75ex/s]

 78%|███████▊  | 51460/65675 [00:12<00:02, 4762.33ex/s]

 79%|███████▉  | 51965/65675 [00:12<00:02, 4844.41ex/s]

 80%|███████▉  | 52453/65675 [00:12<00:02, 4691.06ex/s]

 81%|████████  | 52925/65675 [00:12<00:02, 4643.20ex/s]

 81%|████████▏ | 53392/65675 [00:12<00:02, 4426.59ex/s]

 82%|████████▏ | 53908/65675 [00:12<00:02, 4630.60ex/s]

 83%|████████▎ | 54375/65675 [00:12<00:02, 4592.70ex/s]

 84%|████████▎ | 54858/65675 [00:12<00:02, 4661.00ex/s]

 84%|████████▍ | 55326/65675 [00:12<00:02, 4384.92ex/s]

 85%|████████▍ | 55781/65675 [00:13<00:02, 4429.03ex/s]

 86%|████████▌ | 56228/65675 [00:13<00:02, 4160.98ex/s]

 86%|████████▋ | 56684/65675 [00:13<00:02, 4270.65ex/s]

 87%|████████▋ | 57141/65675 [00:13<00:01, 4352.46ex/s]

 88%|████████▊ | 57656/65675 [00:13<00:01, 4581.17ex/s]

 89%|████████▊ | 58132/65675 [00:13<00:01, 4631.82ex/s]

 89%|████████▉ | 58639/65675 [00:13<00:01, 4759.44ex/s]

 90%|█████████ | 59117/65675 [00:13<00:01, 4551.87ex/s]

 91%|█████████ | 59585/65675 [00:13<00:01, 4585.81ex/s]

 91%|█████████▏| 60046/65675 [00:14<00:01, 4303.60ex/s]

 92%|█████████▏| 60515/65675 [00:14<00:01, 4410.10ex/s]

 93%|█████████▎| 60986/65675 [00:14<00:01, 4495.33ex/s]

 94%|█████████▎| 61439/65675 [00:14<00:00, 4424.79ex/s]

 94%|█████████▍| 61913/65675 [00:14<00:00, 4513.58ex/s]

 95%|█████████▍| 62367/65675 [00:14<00:00, 4276.91ex/s]

 96%|█████████▌| 62810/65675 [00:14<00:00, 4318.21ex/s]

 96%|█████████▋| 63245/65675 [00:14<00:00, 4087.70ex/s]

 97%|█████████▋| 63748/65675 [00:14<00:00, 4350.43ex/s]

 98%|█████████▊| 64188/65675 [00:14<00:00, 4267.95ex/s]

 99%|█████████▊| 64738/65675 [00:15<00:00, 4617.86ex/s]

 99%|█████████▉| 65219/65675 [00:15<00:00, 4672.88ex/s]

100%|██████████| 65675/65675 [00:15<00:00, 4294.59ex/s]




In [17]:
len(train_dataset), len(eval_dataset)

(67900, 65675)

In [18]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] 세련되고 우아한향 ~ [SEP] 브랜드 # 인지도 [SEP] 1


[CLS] # 셀더마 에서 요번에 제대로 간편한 # 뷰티템 골라왔네요 🙌 [SEP] 본품 # 인지도 [SEP] 1


# Load Trainer

In [19]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [20]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [21]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [22]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 67900


  Num Epochs = 10


  Instantaneous batch size per device = 25


  Total train batch size (w. parallel, distributed & accumulation) = 100


  Gradient Accumulation steps = 1


  Total optimization steps = 6790


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.1556,0.101942,0.964294,0.499039,0.981487,0.740263,0.964294
2,0.1002,0.079909,0.971085,0.628594,0.984957,0.806775,0.971085
3,0.0741,0.069281,0.973521,0.683415,0.986183,0.834799,0.973521
4,0.0679,0.067175,0.974937,0.695749,0.98693,0.841339,0.974937
5,0.0646,0.068944,0.975759,0.704857,0.987361,0.846109,0.975759
6,0.0578,0.066492,0.976094,0.717423,0.987519,0.852471,0.976094
7,0.0548,0.068769,0.976277,0.71938,0.987615,0.853498,0.976277
8,0.0542,0.068498,0.97582,0.71794,0.987369,0.852654,0.97582
9,0.0518,0.068875,0.976627,0.721163,0.987802,0.854483,0.976627
10,0.053,0.069034,0.976825,0.720631,0.987911,0.854271,0.976825


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-679


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-679/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-679/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-679/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-679/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1358


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1358/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1358/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1358/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1358/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2037


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2037/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2037/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2037/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2037/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-679] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2716


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2716/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2716/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2716/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2716/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1358] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3395


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3395/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3395/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3395/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3395/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2037] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4074


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4074/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4074/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4074/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4074/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2716] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4753


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4753/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4753/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4753/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4753/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3395] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-5432


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-5432/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-5432/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-5432/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-5432/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4753] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6111


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6111/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6111/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6111/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6111/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-5432] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, form, id. If pair, form, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6790


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6790/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6790/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6790/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6790/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-6111] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-4074 (score: 0.06649243086576462).


Saving model checkpoint to /tmp/tmpmloyzy7c


Configuration saved in /tmp/tmpmloyzy7c/config.json


Model weights saved in /tmp/tmpmloyzy7c/pytorch_model.bin


tokenizer config file saved in /tmp/tmpmloyzy7c/tokenizer_config.json


Special tokens file saved in /tmp/tmpmloyzy7c/special_tokens_map.json


0,1
eval/accuracy,▁▅▆▇▇██▇██
eval/f1_false,▁▅▆▇▇██▇██
eval/f1_macro,▁▅▇▇▇█████
eval/f1_micro,▁▅▆▇▇██▇██
eval/f1_true,▁▅▇▇▇█████
eval/loss,█▄▂▁▁▁▁▁▁▂
eval/runtime,▃▂▂▂▃▂▂▁█▃
eval/samples_per_second,▆▇▇▇▆▆▇█▁▆
eval/steps_per_second,▆▇▇▇▆▆▇█▁▆
train/epoch,▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇███

0,1
eval/accuracy,0.97683
eval/f1_false,0.98791
eval/f1_macro,0.85427
eval/f1_micro,0.97683
eval/f1_true,0.72063
eval/loss,0.06903
eval/runtime,240.5754
eval/samples_per_second,272.991
eval/steps_per_second,2.731
train/epoch,10.0


In [23]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
