# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v4'

DATA_V = 'uncleaned_v4'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'snunlp/KR-ELECTRA-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/snunlp_kr_electra_discriminator_uncleaned_v4/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/uncleaned_v4/ce_train.csv exists.
./dataset/uncleaned_v4/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 10
batch_size = 25 * 2
gradient_accumulation_steps = 1

optim = 'adamw_torch' # 'adamw_hf'

learning_rate = 3e-6 / 8 * batch_size * 4 # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'cosine'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='eval_loss'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 5

# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at snunlp/KR-ELECTRA-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = f'./dataset/{DATA_V}/raw_train.csv'
dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
test_path = f'./dataset/{DATA_V}/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

### new
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]
special_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

tokens2add = special_tokens + emojis

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))
tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame().drop_duplicates()
tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
new_tokens = set(list(new_tokenizer.vocab.keys()) + tokens2add) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(new_tokenizer))
print(len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

30000





3018
30111


Embedding(30111, 768)

In [10]:
print(len(new_tokens))
print(new_tokens)

111
{'◍', 'ᴠ', '⁉️', '##ᴍ', 'ʀ', '💇', '&tel-num&', '##🥤', '##쨕', '👨\u200d👧', '##ᵕ', '🙆🏻', '##ˇ', '💬', '💄', '㉦', 'ғ', '🤘🏻', '쨕', '🏃\u200d♀️', '☺️', 'ꈍ', '##ꈍ', '##➕', '##ᴡ', '##ɴ', '&bank-account&', '💡', '👦🏼', '🕺', '😺', '👏🏻', '&affiliation&', '👩\u200d👦', '🐄', '💆🏻\u200d♀️', '##◍', '✌🏻', '뿤', '🙋🏻', '##ᴛ', 'ᴜ', 'ᴛ', '➕', '🙆\u200d♂️', '##ᴠ', '챳', 'ɢ', '☝️', '✌️', '💆', '💇🏼\u200d♀️', 'ᵕ', '##읒', '👌🏻', '👉🏻', '&social-security-num&', '😯', 'ᴡ', '✔️', '&name&', '##ᴜ', '##㉦', '쓩', '##ɪ', '🥤', '👠', '##💇', '🙋\u200d♀️', '👋🏻', '🙋🏻\u200d♀️', '〰️', '##뜌', '읒', 'ɪ', 'ᴘ', '##👠', '💆\u200d♀️', '&online-account&', '##죱', '&num&', '⏰', 'ˇ', '##🤡', '뜌', '##쫜', '##💄', 'ʜ', '##🚗', 'ᴍ', '🤡', '&card-num&', '🧚\u200d♀️', '##ᴘ', '‼️', '❤️', '죱', '##💆', '♥️', '💪🏻', '❣️', '🙌🏻', '🍷', '🚗', '쫜', '🙏🏻', '☝🏻', '##ʀ', 'ɴ', '##ɢ', '🍼'}


In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [12]:
# entity_property_pair = [
#     '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
#     '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
#     '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
#     '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
# ]
# polarity_id_to_name = ['positive', 'negative', 'neutral']
# tokenizer_tester = []
# for pair in entity_property_pair:
#     for polarity in polarity_id_to_name:
#         tokenizer_tester.append('#'.join([pair, polarity]))
# for e in tokenizer_tester:
#     print(tokenizer.decode(tokenizer.encode(e)))
# for e in tokenizer_tester:
#     print(tokenizer.encode(e))

# Define Metric

In [13]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [14]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [15]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [16]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/67900 [00:00<?, ?ex/s]

  1%|          | 491/67900 [00:00<00:13, 4907.80ex/s]

  1%|▏         | 982/67900 [00:00<00:30, 2192.09ex/s]

  2%|▏         | 1345/67900 [00:00<00:25, 2583.57ex/s]

  3%|▎         | 1872/67900 [00:00<00:19, 3336.11ex/s]

  3%|▎         | 2321/67900 [00:00<00:17, 3664.17ex/s]

  4%|▍         | 2819/67900 [00:00<00:16, 4045.23ex/s]

  5%|▍         | 3266/67900 [00:00<00:15, 4161.84ex/s]

  6%|▌         | 3762/67900 [00:01<00:14, 4395.46ex/s]

  6%|▌         | 4224/67900 [00:01<00:16, 3749.96ex/s]

  7%|▋         | 4726/67900 [00:01<00:15, 4080.54ex/s]

  8%|▊         | 5161/67900 [00:01<00:15, 3926.30ex/s]

  8%|▊         | 5633/67900 [00:01<00:15, 4139.40ex/s]

  9%|▉         | 6063/67900 [00:01<00:15, 4026.07ex/s]

 10%|▉         | 6513/67900 [00:01<00:14, 4154.29ex/s]

 10%|█         | 6998/67900 [00:01<00:13, 4350.53ex/s]

 11%|█         | 7441/67900 [00:01<00:14, 4248.66ex/s]

 12%|█▏        | 7960/67900 [00:02<00:13, 4514.96ex/s]

 12%|█▏        | 8460/67900 [00:02<00:12, 4654.63ex/s]

 13%|█▎        | 8930/67900 [00:02<00:12, 4643.11ex/s]

 14%|█▍        | 9398/67900 [00:02<00:13, 4330.31ex/s]

 14%|█▍        | 9837/67900 [00:02<00:13, 4304.96ex/s]

 15%|█▌        | 10272/67900 [00:02<00:14, 3998.59ex/s]

 16%|█▌        | 10679/67900 [00:02<00:14, 4007.44ex/s]

 16%|█▋        | 11101/67900 [00:02<00:13, 4065.71ex/s]

 17%|█▋        | 11540/67900 [00:02<00:13, 4157.86ex/s]

 18%|█▊        | 12000/67900 [00:02<00:13, 4127.75ex/s]

 18%|█▊        | 12458/67900 [00:03<00:13, 4256.40ex/s]

 19%|█▉        | 12886/67900 [00:03<00:14, 3752.71ex/s]

 20%|█▉        | 13273/67900 [00:03<00:15, 3554.17ex/s]

 20%|██        | 13721/67900 [00:03<00:14, 3796.69ex/s]

 21%|██        | 14111/67900 [00:03<00:14, 3802.92ex/s]

 21%|██▏       | 14548/67900 [00:03<00:13, 3961.22ex/s]

 22%|██▏       | 15000/67900 [00:03<00:13, 3936.15ex/s]

 23%|██▎       | 15438/67900 [00:03<00:12, 4059.94ex/s]

 23%|██▎       | 15947/67900 [00:03<00:11, 4351.87ex/s]

 24%|██▍       | 16387/67900 [00:04<00:11, 4330.45ex/s]

 25%|██▍       | 16914/67900 [00:04<00:11, 4603.77ex/s]

 26%|██▌       | 17378/67900 [00:04<00:11, 4530.73ex/s]

 26%|██▋       | 17863/67900 [00:04<00:10, 4622.55ex/s]

 27%|██▋       | 18327/67900 [00:04<00:11, 4402.41ex/s]

 28%|██▊       | 18780/67900 [00:04<00:11, 4437.13ex/s]

 28%|██▊       | 19227/67900 [00:04<00:11, 4206.19ex/s]

 29%|██▉       | 19660/67900 [00:04<00:11, 4239.60ex/s]

 30%|██▉       | 20087/67900 [00:04<00:11, 4151.95ex/s]

 30%|███       | 20564/67900 [00:05<00:10, 4326.62ex/s]

 31%|███       | 21020/67900 [00:05<00:10, 4392.53ex/s]

 32%|███▏      | 21505/67900 [00:05<00:10, 4524.94ex/s]

 32%|███▏      | 21974/67900 [00:05<00:10, 4572.02ex/s]

 33%|███▎      | 22433/67900 [00:05<00:10, 4517.33ex/s]

 34%|███▎      | 22909/67900 [00:05<00:09, 4587.74ex/s]

 34%|███▍      | 23369/67900 [00:05<00:10, 4409.50ex/s]

 35%|███▌      | 23846/67900 [00:05<00:09, 4512.32ex/s]

 36%|███▌      | 24299/67900 [00:05<00:10, 4305.04ex/s]

 37%|███▋      | 24796/67900 [00:05<00:09, 4492.69ex/s]

 37%|███▋      | 25271/67900 [00:06<00:09, 4566.74ex/s]

 38%|███▊      | 25817/67900 [00:06<00:08, 4826.16ex/s]

 39%|███▊      | 26302/67900 [00:06<00:08, 4671.01ex/s]

 40%|███▉      | 26847/67900 [00:06<00:08, 4895.08ex/s]

 40%|████      | 27340/67900 [00:06<00:08, 4761.64ex/s]

 41%|████      | 27907/67900 [00:06<00:07, 5021.49ex/s]

 42%|████▏     | 28412/67900 [00:06<00:08, 4876.21ex/s]

 43%|████▎     | 28905/67900 [00:06<00:07, 4891.04ex/s]

 43%|████▎     | 29396/67900 [00:06<00:08, 4654.36ex/s]

 44%|████▍     | 29865/67900 [00:07<00:08, 4590.87ex/s]

 45%|████▍     | 30337/67900 [00:07<00:08, 4626.98ex/s]

 45%|████▌     | 30802/67900 [00:07<00:08, 4508.15ex/s]

 46%|████▌     | 31255/67900 [00:07<00:09, 3919.06ex/s]

 47%|████▋     | 31730/67900 [00:07<00:08, 4135.76ex/s]

 47%|████▋     | 32177/67900 [00:07<00:08, 4226.00ex/s]

 48%|████▊     | 32635/67900 [00:07<00:08, 4324.09ex/s]

 49%|████▊     | 33075/67900 [00:07<00:08, 4279.29ex/s]

 49%|████▉     | 33574/67900 [00:07<00:07, 4480.99ex/s]

 50%|█████     | 34027/67900 [00:08<00:07, 4372.19ex/s]

 51%|█████     | 34471/67900 [00:08<00:07, 4390.67ex/s]

 51%|█████▏    | 34914/67900 [00:08<00:07, 4401.66ex/s]

 52%|█████▏    | 35356/67900 [00:08<00:07, 4303.28ex/s]

 53%|█████▎    | 35852/67900 [00:08<00:07, 4493.74ex/s]

 53%|█████▎    | 36304/67900 [00:08<00:07, 4335.82ex/s]

 54%|█████▍    | 36751/67900 [00:08<00:07, 4372.12ex/s]

 55%|█████▍    | 37190/67900 [00:08<00:07, 4058.69ex/s]

 55%|█████▌    | 37652/67900 [00:08<00:07, 4212.17ex/s]

 56%|█████▌    | 38078/67900 [00:08<00:07, 4114.72ex/s]

 57%|█████▋    | 38526/67900 [00:09<00:06, 4216.96ex/s]

 57%|█████▋    | 38960/67900 [00:09<00:06, 4250.57ex/s]

 58%|█████▊    | 39388/67900 [00:09<00:07, 3981.41ex/s]

 59%|█████▊    | 39791/67900 [00:09<00:07, 3824.17ex/s]

 59%|█████▉    | 40178/67900 [00:09<00:08, 3440.69ex/s]

 60%|█████▉    | 40618/67900 [00:09<00:07, 3691.15ex/s]

 60%|██████    | 41000/67900 [00:09<00:07, 3585.08ex/s]

 61%|██████    | 41413/67900 [00:09<00:07, 3732.06ex/s]

 62%|██████▏   | 41845/67900 [00:09<00:06, 3895.40ex/s]

 62%|██████▏   | 42240/67900 [00:10<00:06, 3791.45ex/s]

 63%|██████▎   | 42649/67900 [00:10<00:06, 3875.70ex/s]

 63%|██████▎   | 43040/67900 [00:10<00:06, 3785.72ex/s]

 64%|██████▍   | 43448/67900 [00:10<00:06, 3867.60ex/s]

 65%|██████▍   | 43877/67900 [00:10<00:06, 3988.67ex/s]

 65%|██████▌   | 44278/67900 [00:10<00:06, 3890.92ex/s]

 66%|██████▌   | 44709/67900 [00:10<00:05, 4011.13ex/s]

 66%|██████▋   | 45112/67900 [00:10<00:05, 3914.91ex/s]

 67%|██████▋   | 45573/67900 [00:10<00:05, 4114.73ex/s]

 68%|██████▊   | 45987/67900 [00:11<00:05, 3797.33ex/s]

 68%|██████▊   | 46373/67900 [00:11<00:05, 3771.06ex/s]

 69%|██████▉   | 46925/67900 [00:11<00:04, 4263.84ex/s]

 70%|██████▉   | 47380/67900 [00:11<00:04, 4343.81ex/s]

 71%|███████   | 47917/67900 [00:11<00:04, 4641.18ex/s]

 71%|███████▏  | 48391/67900 [00:11<00:04, 4669.01ex/s]

 72%|███████▏  | 48953/67900 [00:11<00:03, 4948.52ex/s]

 73%|███████▎  | 49457/67900 [00:11<00:03, 4974.09ex/s]

 74%|███████▎  | 50000/67900 [00:11<00:03, 5039.63ex/s]

 74%|███████▍  | 50535/67900 [00:11<00:03, 5128.70ex/s]

 75%|███████▌  | 51049/67900 [00:12<00:03, 4770.61ex/s]

 76%|███████▌  | 51532/67900 [00:12<00:03, 4541.23ex/s]

 77%|███████▋  | 52000/67900 [00:12<00:03, 4525.37ex/s]

 77%|███████▋  | 52507/67900 [00:12<00:03, 4677.16ex/s]

 78%|███████▊  | 53000/67900 [00:12<00:03, 4406.55ex/s]

 79%|███████▊  | 53447/67900 [00:12<00:03, 3975.90ex/s]

 79%|███████▉  | 53916/67900 [00:12<00:03, 4161.31ex/s]

 80%|████████  | 54342/67900 [00:12<00:03, 4080.28ex/s]

 81%|████████  | 54797/67900 [00:12<00:03, 4208.28ex/s]

 81%|████████▏ | 55224/67900 [00:13<00:03, 4075.40ex/s]

 82%|████████▏ | 55636/67900 [00:13<00:03, 3515.80ex/s]

 82%|████████▏ | 56003/67900 [00:13<00:03, 3484.27ex/s]

 83%|████████▎ | 56471/67900 [00:13<00:03, 3798.19ex/s]

 84%|████████▍ | 56914/67900 [00:13<00:02, 3969.61ex/s]

 84%|████████▍ | 57321/67900 [00:13<00:02, 3799.77ex/s]

 85%|████████▌ | 57741/67900 [00:13<00:02, 3909.15ex/s]

 86%|████████▌ | 58139/67900 [00:13<00:02, 3909.88ex/s]

 86%|████████▋ | 58614/67900 [00:13<00:02, 4149.26ex/s]

 87%|████████▋ | 59034/67900 [00:14<00:02, 4115.84ex/s]

 88%|████████▊ | 59525/67900 [00:14<00:01, 4344.72ex/s]

 88%|████████▊ | 59963/67900 [00:14<00:01, 4165.33ex/s]

 89%|████████▉ | 60385/67900 [00:14<00:01, 4180.70ex/s]

 90%|████████▉ | 60871/67900 [00:14<00:01, 4375.82ex/s]

 90%|█████████ | 61325/67900 [00:14<00:01, 4422.93ex/s]

 91%|█████████ | 61769/67900 [00:14<00:01, 4294.22ex/s]

 92%|█████████▏| 62201/67900 [00:14<00:01, 4227.96ex/s]

 92%|█████████▏| 62675/67900 [00:14<00:01, 4374.70ex/s]

 93%|█████████▎| 63114/67900 [00:15<00:01, 4291.38ex/s]

 94%|█████████▎| 63620/67900 [00:15<00:00, 4512.34ex/s]

 94%|█████████▍| 64073/67900 [00:15<00:00, 4488.18ex/s]

 95%|█████████▌| 64574/67900 [00:15<00:00, 4641.22ex/s]

 96%|█████████▌| 65040/67900 [00:15<00:00, 4496.82ex/s]

 96%|█████████▋| 65499/67900 [00:15<00:00, 4522.49ex/s]

 97%|█████████▋| 65953/67900 [00:15<00:00, 4414.12ex/s]

 98%|█████████▊| 66396/67900 [00:15<00:00, 4085.97ex/s]

 98%|█████████▊| 66813/67900 [00:15<00:00, 4108.54ex/s]

 99%|█████████▉| 67228/67900 [00:15<00:00, 4097.61ex/s]

100%|█████████▉| 67749/67900 [00:16<00:00, 4415.04ex/s]

100%|██████████| 67900/67900 [00:16<00:00, 4217.20ex/s]




  0%|          | 0/65675 [00:00<?, ?ex/s]

  0%|          | 121/65675 [00:00<01:56, 564.03ex/s]

  1%|          | 626/65675 [00:00<00:27, 2355.76ex/s]

  2%|▏         | 1075/65675 [00:00<00:20, 3129.79ex/s]

  2%|▏         | 1583/65675 [00:00<00:16, 3793.65ex/s]

  3%|▎         | 2013/65675 [00:00<00:16, 3852.93ex/s]

  4%|▎         | 2431/65675 [00:00<00:17, 3532.68ex/s]

  4%|▍         | 2810/65675 [00:00<00:17, 3600.13ex/s]

  5%|▍         | 3259/65675 [00:00<00:16, 3857.11ex/s]

  6%|▌         | 3774/65675 [00:01<00:14, 4234.40ex/s]

  6%|▋         | 4211/65675 [00:01<00:14, 4256.52ex/s]

  7%|▋         | 4695/65675 [00:01<00:13, 4426.02ex/s]

  8%|▊         | 5145/65675 [00:01<00:13, 4409.47ex/s]

  9%|▊         | 5660/65675 [00:01<00:12, 4626.64ex/s]

  9%|▉         | 6137/65675 [00:01<00:12, 4668.33ex/s]

 10%|█         | 6657/65675 [00:01<00:12, 4825.82ex/s]

 11%|█         | 7142/65675 [00:01<00:12, 4753.49ex/s]

 12%|█▏        | 7640/65675 [00:01<00:12, 4817.97ex/s]

 12%|█▏        | 8124/65675 [00:01<00:12, 4608.31ex/s]

 13%|█▎        | 8629/65675 [00:02<00:12, 4735.40ex/s]

 14%|█▍        | 9105/65675 [00:02<00:12, 4487.45ex/s]

 15%|█▍        | 9558/65675 [00:02<00:12, 4456.08ex/s]

 15%|█▌        | 10007/65675 [00:02<00:13, 4198.42ex/s]

 16%|█▌        | 10451/65675 [00:02<00:12, 4263.36ex/s]

 17%|█▋        | 10910/65675 [00:02<00:12, 4354.51ex/s]

 17%|█▋        | 11349/65675 [00:02<00:13, 4103.90ex/s]

 18%|█▊        | 11771/65675 [00:02<00:13, 4135.97ex/s]

 19%|█▊        | 12188/65675 [00:02<00:13, 3969.75ex/s]

 19%|█▉        | 12622/65675 [00:03<00:13, 4073.37ex/s]

 20%|█▉        | 13033/65675 [00:03<00:13, 3970.83ex/s]

 21%|██        | 13489/65675 [00:03<00:12, 4137.30ex/s]

 21%|██        | 13934/65675 [00:03<00:12, 4226.82ex/s]

 22%|██▏       | 14359/65675 [00:03<00:12, 4031.25ex/s]

 23%|██▎       | 14800/65675 [00:03<00:12, 4138.55ex/s]

 23%|██▎       | 15217/65675 [00:03<00:12, 3942.66ex/s]

 24%|██▍       | 15731/65675 [00:03<00:11, 4279.79ex/s]

 25%|██▍       | 16192/65675 [00:03<00:11, 4373.50ex/s]

 25%|██▌       | 16685/65675 [00:04<00:10, 4533.32ex/s]

 26%|██▌       | 17142/65675 [00:04<00:11, 4308.57ex/s]

 27%|██▋       | 17577/65675 [00:04<00:11, 4253.52ex/s]

 27%|██▋       | 18006/65675 [00:04<00:11, 4130.94ex/s]

 28%|██▊       | 18486/65675 [00:04<00:10, 4320.22ex/s]

 29%|██▉       | 18935/65675 [00:04<00:10, 4367.77ex/s]

 30%|██▉       | 19383/65675 [00:04<00:10, 4399.98ex/s]

 30%|███       | 19859/65675 [00:04<00:10, 4503.84ex/s]

 31%|███       | 20311/65675 [00:04<00:10, 4490.26ex/s]

 32%|███▏      | 20761/65675 [00:04<00:10, 4446.75ex/s]

 32%|███▏      | 21207/65675 [00:05<00:10, 4290.00ex/s]

 33%|███▎      | 21650/65675 [00:05<00:10, 4329.59ex/s]

 34%|███▎      | 22085/65675 [00:05<00:10, 4117.83ex/s]

 34%|███▍      | 22500/65675 [00:05<00:10, 4065.04ex/s]

 35%|███▍      | 22939/65675 [00:05<00:10, 4156.22ex/s]

 36%|███▌      | 23357/65675 [00:05<00:10, 3861.97ex/s]

 36%|███▌      | 23748/65675 [00:05<00:10, 3826.95ex/s]

 37%|███▋      | 24134/65675 [00:05<00:11, 3620.29ex/s]

 37%|███▋      | 24535/65675 [00:05<00:11, 3726.53ex/s]

 38%|███▊      | 24938/65675 [00:06<00:10, 3810.82ex/s]

 39%|███▊      | 25323/65675 [00:06<00:11, 3593.29ex/s]

 39%|███▉      | 25702/65675 [00:06<00:10, 3647.49ex/s]

 40%|███▉      | 26070/65675 [00:06<00:11, 3449.43ex/s]

 40%|████      | 26461/65675 [00:06<00:10, 3576.07ex/s]

 41%|████      | 26857/65675 [00:06<00:10, 3683.70ex/s]

 41%|████▏     | 27229/65675 [00:06<00:10, 3586.99ex/s]

 42%|████▏     | 27644/65675 [00:06<00:10, 3744.17ex/s]

 43%|████▎     | 28021/65675 [00:06<00:10, 3633.82ex/s]

 43%|████▎     | 28458/65675 [00:07<00:09, 3843.17ex/s]

 44%|████▍     | 28946/65675 [00:07<00:08, 4142.80ex/s]

 45%|████▍     | 29364/65675 [00:07<00:08, 4103.11ex/s]

 45%|████▌     | 29831/65675 [00:07<00:08, 4266.42ex/s]

 46%|████▌     | 30260/65675 [00:07<00:08, 4253.77ex/s]

 47%|████▋     | 30721/65675 [00:07<00:08, 4357.28ex/s]

 47%|████▋     | 31158/65675 [00:07<00:08, 4188.92ex/s]

 48%|████▊     | 31579/65675 [00:07<00:08, 3885.66ex/s]

 49%|████▊     | 32000/65675 [00:07<00:08, 3881.39ex/s]

 49%|████▉     | 32498/65675 [00:07<00:07, 4185.02ex/s]

 50%|█████     | 32977/65675 [00:08<00:07, 4355.38ex/s]

 51%|█████     | 33417/65675 [00:08<00:07, 4156.82ex/s]

 52%|█████▏    | 33888/65675 [00:08<00:07, 4310.45ex/s]

 52%|█████▏    | 34324/65675 [00:08<00:07, 4172.13ex/s]

 53%|█████▎    | 34745/65675 [00:08<00:07, 4027.77ex/s]

 54%|█████▎    | 35151/65675 [00:08<00:07, 3832.41ex/s]

 54%|█████▍    | 35671/65675 [00:08<00:07, 4208.02ex/s]

 55%|█████▌    | 36138/65675 [00:08<00:06, 4337.88ex/s]

 56%|█████▌    | 36680/65675 [00:08<00:06, 4646.00ex/s]

 57%|█████▋    | 37150/65675 [00:09<00:06, 4402.46ex/s]

 57%|█████▋    | 37649/65675 [00:09<00:06, 4566.01ex/s]

 58%|█████▊    | 38111/65675 [00:09<00:06, 4452.24ex/s]

 59%|█████▊    | 38560/65675 [00:09<00:06, 4428.32ex/s]

 59%|█████▉    | 39006/65675 [00:09<00:06, 4210.68ex/s]

 60%|██████    | 39431/65675 [00:09<00:06, 4195.03ex/s]

 61%|██████    | 39916/65675 [00:09<00:05, 4377.85ex/s]

 61%|██████▏   | 40357/65675 [00:09<00:05, 4241.34ex/s]

 62%|██████▏   | 40850/65675 [00:09<00:05, 4435.64ex/s]

 63%|██████▎   | 41297/65675 [00:10<00:05, 4299.06ex/s]

 64%|██████▎   | 41801/65675 [00:10<00:05, 4510.20ex/s]

 64%|██████▍   | 42265/65675 [00:10<00:05, 4546.47ex/s]

 65%|██████▌   | 42760/65675 [00:10<00:04, 4664.06ex/s]

 66%|██████▌   | 43229/65675 [00:10<00:05, 4379.48ex/s]

 67%|██████▋   | 43695/65675 [00:10<00:04, 4457.45ex/s]

 67%|██████▋   | 44145/65675 [00:10<00:05, 4257.42ex/s]

 68%|██████▊   | 44658/65675 [00:10<00:04, 4501.85ex/s]

 69%|██████▊   | 45113/65675 [00:10<00:04, 4411.24ex/s]

 70%|██████▉   | 45661/65675 [00:10<00:04, 4715.02ex/s]

 70%|███████   | 46137/65675 [00:11<00:04, 4570.20ex/s]

 71%|███████   | 46598/65675 [00:11<00:04, 4516.12ex/s]

 72%|███████▏  | 47052/65675 [00:11<00:04, 4436.19ex/s]

 72%|███████▏  | 47560/65675 [00:11<00:03, 4619.87ex/s]

 73%|███████▎  | 48030/65675 [00:11<00:03, 4641.78ex/s]

 74%|███████▍  | 48510/65675 [00:11<00:03, 4683.68ex/s]

 75%|███████▍  | 49000/65675 [00:11<00:03, 4634.39ex/s]

 75%|███████▌  | 49465/65675 [00:11<00:03, 4617.81ex/s]

 76%|███████▌  | 49934/65675 [00:11<00:03, 4638.84ex/s]

 77%|███████▋  | 50399/65675 [00:12<00:03, 4317.15ex/s]

 78%|███████▊  | 50915/65675 [00:12<00:03, 4553.45ex/s]

 78%|███████▊  | 51376/65675 [00:12<00:03, 4489.20ex/s]

 79%|███████▉  | 51903/65675 [00:12<00:02, 4713.04ex/s]

 80%|███████▉  | 52378/65675 [00:12<00:03, 4342.25ex/s]

 80%|████████  | 52840/65675 [00:12<00:02, 4418.14ex/s]

 81%|████████  | 53288/65675 [00:12<00:02, 4275.77ex/s]

 82%|████████▏ | 53758/65675 [00:12<00:02, 4394.50ex/s]

 83%|████████▎ | 54202/65675 [00:12<00:02, 4235.92ex/s]

 83%|████████▎ | 54686/65675 [00:12<00:02, 4404.99ex/s]

 84%|████████▍ | 55131/65675 [00:13<00:02, 4060.60ex/s]

 85%|████████▍ | 55578/65675 [00:13<00:02, 4171.08ex/s]

 85%|████████▌ | 56002/65675 [00:13<00:02, 3957.49ex/s]

 86%|████████▌ | 56407/65675 [00:13<00:02, 3982.07ex/s]

 87%|████████▋ | 56912/65675 [00:13<00:02, 4282.25ex/s]

 87%|████████▋ | 57375/65675 [00:13<00:01, 4379.96ex/s]

 88%|████████▊ | 57869/65675 [00:13<00:01, 4542.36ex/s]

 89%|████████▉ | 58359/65675 [00:13<00:01, 4644.75ex/s]

 90%|████████▉ | 58886/65675 [00:13<00:01, 4828.00ex/s]

 90%|█████████ | 59371/65675 [00:14<00:01, 4708.39ex/s]

 91%|█████████ | 59856/65675 [00:14<00:01, 4747.66ex/s]

 92%|█████████▏| 60333/65675 [00:14<00:01, 4685.45ex/s]

 93%|█████████▎| 60803/65675 [00:14<00:01, 4620.09ex/s]

 93%|█████████▎| 61266/65675 [00:14<00:00, 4581.53ex/s]

 94%|█████████▍| 61802/65675 [00:14<00:00, 4808.67ex/s]

 95%|█████████▍| 62284/65675 [00:14<00:00, 4665.35ex/s]

 96%|█████████▌| 62770/65675 [00:14<00:00, 4720.81ex/s]

 96%|█████████▋| 63244/65675 [00:14<00:00, 4544.30ex/s]

 97%|█████████▋| 63753/65675 [00:14<00:00, 4699.38ex/s]

 98%|█████████▊| 64225/65675 [00:15<00:00, 4566.48ex/s]

 99%|█████████▊| 64728/65675 [00:15<00:00, 4698.83ex/s]

 99%|█████████▉| 65200/65675 [00:15<00:00, 4623.47ex/s]

100%|█████████▉| 65664/65675 [00:15<00:00, 4564.07ex/s]

100%|██████████| 65675/65675 [00:15<00:00, 4261.98ex/s]




In [17]:
len(train_dataset), len(eval_dataset)

(67900, 65675)

In [18]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] 부드럽고 또 3중 흡수채널이라.. 아이가 신나게 놀면서 뛰어도 잘흡수.. 보송보송해서 꾸준히 사용하고 싶어유.. [SEP] 본품 # 디자인 [SEP] 1
[CLS] 사용하지 않을땐 접어서 팬트리에 보관쏘옥 꺼내두고 자랑하고 싶은 예쁜 다리미판 히히 ◡̈ [SEP] 제품 전체 # 품질 [SEP] 1


# Load Trainer

In [19]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [20]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [21]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [22]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 67900


  Num Epochs = 10


  Instantaneous batch size per device = 50


  Total train batch size (w. parallel, distributed & accumulation) = 200


  Gradient Accumulation steps = 1


  Total optimization steps = 3400


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.075,0.079504,0.973704,0.639231,0.986355,0.812793,0.973704
2,0.0639,0.069872,0.974085,0.680075,0.986495,0.833285,0.974085
3,0.0447,0.071419,0.974496,0.676828,0.986724,0.831776,0.974496
4,0.042,0.084195,0.973445,0.678585,0.98615,0.832367,0.973445
5,0.022,0.09731,0.975105,0.687798,0.987035,0.837417,0.975105
6,0.021,0.102318,0.975074,0.707313,0.986983,0.847148,0.975074
7,0.0147,0.130165,0.974328,0.693566,0.986603,0.840084,0.974328
8,0.0013,0.156045,0.975516,0.696833,0.987243,0.842038,0.975516
9,0.0007,0.162191,0.974922,0.698958,0.986916,0.842937,0.974922
10,0.002,0.163778,0.974983,0.700347,0.986947,0.843647,0.974983


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-340


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-340/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-340/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-340/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-340/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-680


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-680/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-680/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-680/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-680/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1020


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1020/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1020/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1020/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1020/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-340] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1360


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1360/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1360/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1360/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1360/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1020] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1700


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1700/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1700/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1700/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1700/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1360] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2040


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2040/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2040/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2040/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2040/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-1700] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2380


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2380/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2380/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2380/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2380/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2040] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2720


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2720/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2720/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2720/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2720/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2380] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3060


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3060/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3060/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3060/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3060/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-2720] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 65675


  Batch size = 200


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3400


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3400/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3400/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3400/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3400/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-3060] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from snunlp_kr_electra_discriminator_uncleaned_v4/checkpoint-680 (score: 0.06987227499485016).


Saving model checkpoint to /tmp/tmpamrndpgs


Configuration saved in /tmp/tmpamrndpgs/config.json


Model weights saved in /tmp/tmpamrndpgs/pytorch_model.bin


tokenizer config file saved in /tmp/tmpamrndpgs/tokenizer_config.json


Special tokens file saved in /tmp/tmpamrndpgs/special_tokens_map.json


0,1
eval/accuracy,▂▃▅▁▇▇▄█▆▆
eval/f1_false,▂▃▅▁▇▆▄█▆▆
eval/f1_macro,▁▅▅▅▆█▇▇▇▇
eval/f1_micro,▂▃▅▁▇▇▄█▆▆
eval/f1_true,▁▅▅▅▆█▇▇▇▇
eval/loss,▂▁▁▂▃▃▅▇██
eval/runtime,█▂▇▁▂▅▄▆▂▁
eval/samples_per_second,▁▇▂█▇▄▅▃▇█
eval/steps_per_second,▁▇▂█▇▄▅▃▇█
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/accuracy,0.97498
eval/f1_false,0.98695
eval/f1_macro,0.84365
eval/f1_micro,0.97498
eval/f1_true,0.70035
eval/loss,0.16378
eval/runtime,141.9313
eval/samples_per_second,462.724
eval/steps_per_second,2.318
train/epoch,10.0


In [23]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
