# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    ElectraTokenizer, ElectraForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v8'

DATA_V = 'uncleaned_v8'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'monologg/koelectra-base-v3-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/monologg_koelectra_base_v3_discriminator_uncleaned_v8/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/uncleaned_v8/ce_train.csv exists.
./dataset/uncleaned_v8/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 10
batch_size = 32
gradient_accumulation_steps = 1

optim = 'adamw_hf' # 'adamw_torch'

learning_rate = 3e-6 / 8 * batch_size * NGPU # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'linear'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='f1_macro'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 50

print(learning_rate)

4.8e-05


# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = ElectraTokenizer.from_pretrained(model_checkpoint)
model = ElectraForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = f'./dataset/{DATA_V}/raw_train.csv'
dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
test_path = f'./dataset/{DATA_V}/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

### new
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]
special_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

tokens2add = special_tokens + emojis

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))
tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame().drop_duplicates()
tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
new_tokens = set(list(new_tokenizer.vocab.keys()) + tokens2add) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(new_tokenizer))
print(len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

35000





3060
35254


Embedding(35254, 768)

In [10]:
print(len(new_tokens))
print(new_tokens)

254
{'ʜ', '🙃', 'ᴠ', 'ɪ', '♬', '##죱', '##➰', '✔️', '🐱', '💆🏻\u200d♀️', '🙋🏻', '😴', '˂', '💧', 'ɢ', '##🐥', '뿤', '🖐', '🍎', '🙋🏻\u200d♀️', '🎵', '🌝', '💄', '🧚', '🙋\u200d♀️', '☝', '##ᴡ', '##˂', 'ᴗ', '##💋', 'ꈍ', 'ᴡ', '📸', '🐥', 'ᵕ', '##®', '##😬', '##🎂', '✌️', '귯', '👆', '🕸', '##ɴ', '##닠', '##💯', '펏', '##👆', '✌🏻', '##젔', '##👨', '##ɪ', '👠', '˚', '잍', '##🥤', '##듕', '##ᴍ', '##💝', '젔', '🎀', '⁉', '💇', '##🍎', '##🤮', '👩\u200d👦', '##◡', '쏨', '☺️', '💝', '💪🏻', '㉦', '##ʀ', '🔸', '🏃\u200d♀️', '🐄', '##ɢ', '&online-account&', '닠', '##🌝', '😮', '##🤘', '👌🏻', '##˚', '🍷', 'ᴜ', '🤟', '##😶', '♪', '##🕸', 'ᴇ', '##◍', '쓩', '##ᴜ', '##ᵕ', '❔', '##쏨', '##🖒', '##🚗', '##ᴏ', '##ᴠ', '◡', '핡', '##㉦', '➕', '◍', '##🤡', '##👦', '‼️', '##☝', '💇🏼\u200d♀️', '##츌', '##😮', '##앝', '곘', '👨', '👦🏼', '쵝', '🍼', '##촥', '##옄', '##🧚', '😺', '꺠', 'ɴ', '츌', '횽', '🤘', '촥', '🚗', '❤️', '👉🏻', '똭', '🍰', '##ᴛ', '##쨕', '🧚\u200d♀️', '닼', '☝🏻', '&social-security-num&', '띡', '쨕', 'ᴍ', '💬', '🕺', '💋', '🙌🏻', '&num&', '졓', '👍🏻', '💞', '##♩', '💯', '##ᴘ', '👌', '##🕷', '😲'

In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [12]:
# entity_property_pair = [
#     '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
#     '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
#     '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
#     '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
# ]
# polarity_id_to_name = ['positive', 'negative', 'neutral']
# tokenizer_tester = []
# for pair in entity_property_pair:
#     for polarity in polarity_id_to_name:
#         tokenizer_tester.append('#'.join([pair, polarity]))
# for e in tokenizer_tester:
#     print(tokenizer.decode(tokenizer.encode(e)))
# for e in tokenizer_tester:
#     print(tokenizer.encode(e))

# Define Metric

In [13]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [14]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [15]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [16]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/115850 [00:00<?, ?ex/s]

  0%|          | 365/115850 [00:00<00:31, 3644.36ex/s]

  1%|          | 738/115850 [00:00<00:31, 3691.71ex/s]

  1%|          | 1108/115850 [00:00<00:33, 3391.38ex/s]

  1%|▏         | 1544/115850 [00:00<00:30, 3752.97ex/s]

  2%|▏         | 1951/115850 [00:00<00:29, 3861.79ex/s]

  2%|▏         | 2340/115850 [00:00<00:30, 3759.54ex/s]

  2%|▏         | 2719/115850 [00:00<00:44, 2546.43ex/s]

  3%|▎         | 3023/115850 [00:00<00:42, 2636.92ex/s]

  3%|▎         | 3408/115850 [00:01<00:38, 2936.99ex/s]

  3%|▎         | 3815/115850 [00:01<00:34, 3231.79ex/s]

  4%|▎         | 4166/115850 [00:01<00:36, 3083.34ex/s]

  4%|▍         | 4593/115850 [00:01<00:32, 3398.53ex/s]

  4%|▍         | 5000/115850 [00:01<00:32, 3449.70ex/s]

  5%|▍         | 5470/115850 [00:01<00:29, 3792.80ex/s]

  5%|▌         | 5871/115850 [00:01<00:28, 3852.15ex/s]

  5%|▌         | 6266/115850 [00:01<00:29, 3713.77ex/s]

  6%|▌         | 6694/115850 [00:01<00:28, 3871.45ex/s]

  6%|▌         | 7088/115850 [00:02<00:30, 3520.91ex/s]

  6%|▋         | 7508/115850 [00:02<00:29, 3703.12ex/s]

  7%|▋         | 7911/115850 [00:02<00:28, 3792.49ex/s]

  7%|▋         | 8297/115850 [00:02<00:29, 3640.91ex/s]

  7%|▋         | 8667/115850 [00:02<00:29, 3590.17ex/s]

  8%|▊         | 9030/115850 [00:02<00:30, 3494.58ex/s]

  8%|▊         | 9481/115850 [00:02<00:28, 3778.04ex/s]

  9%|▊         | 9926/115850 [00:02<00:26, 3967.98ex/s]

  9%|▉         | 10327/115850 [00:02<00:27, 3814.44ex/s]

  9%|▉         | 10748/115850 [00:03<00:26, 3925.02ex/s]

 10%|▉         | 11144/115850 [00:03<00:28, 3697.12ex/s]

 10%|▉         | 11582/115850 [00:03<00:26, 3887.66ex/s]

 10%|█         | 12000/115850 [00:03<00:27, 3774.14ex/s]

 11%|█         | 12452/115850 [00:03<00:25, 3980.52ex/s]

 11%|█         | 12915/115850 [00:03<00:24, 4163.50ex/s]

 12%|█▏        | 13336/115850 [00:03<00:24, 4131.95ex/s]

 12%|█▏        | 13785/115850 [00:03<00:24, 4233.90ex/s]

 12%|█▏        | 14211/115850 [00:03<00:25, 4046.51ex/s]

 13%|█▎        | 14631/115850 [00:03<00:24, 4088.60ex/s]

 13%|█▎        | 15043/115850 [00:04<00:26, 3863.36ex/s]

 13%|█▎        | 15520/115850 [00:04<00:24, 4116.09ex/s]

 14%|█▍        | 15974/115850 [00:04<00:23, 4236.63ex/s]

 14%|█▍        | 16402/115850 [00:04<00:24, 4026.08ex/s]

 15%|█▍        | 16835/115850 [00:04<00:24, 4109.73ex/s]

 15%|█▍        | 17250/115850 [00:04<00:25, 3913.23ex/s]

 15%|█▌        | 17670/115850 [00:04<00:24, 3991.70ex/s]

 16%|█▌        | 18073/115850 [00:04<00:24, 3937.38ex/s]

 16%|█▌        | 18469/115850 [00:04<00:24, 3922.52ex/s]

 16%|█▋        | 18863/115850 [00:05<00:27, 3583.51ex/s]

 17%|█▋        | 19228/115850 [00:05<00:28, 3446.21ex/s]

 17%|█▋        | 19629/115850 [00:05<00:26, 3597.72ex/s]

 17%|█▋        | 20000/115850 [00:05<00:26, 3603.65ex/s]

 18%|█▊        | 20450/115850 [00:05<00:24, 3856.69ex/s]

 18%|█▊        | 20907/115850 [00:05<00:23, 4061.33ex/s]

 18%|█▊        | 21319/115850 [00:05<00:23, 4076.50ex/s]

 19%|█▉        | 21733/115850 [00:05<00:22, 4094.01ex/s]

 19%|█▉        | 22145/115850 [00:05<00:23, 4020.51ex/s]

 20%|█▉        | 22635/115850 [00:06<00:21, 4277.73ex/s]

 20%|█▉        | 23065/115850 [00:06<00:21, 4226.52ex/s]

 20%|██        | 23530/115850 [00:06<00:21, 4349.16ex/s]

 21%|██        | 24000/115850 [00:06<00:21, 4357.43ex/s]

 21%|██        | 24437/115850 [00:06<00:21, 4272.07ex/s]

 21%|██▏       | 24865/115850 [00:06<00:21, 4255.67ex/s]

 22%|██▏       | 25292/115850 [00:06<00:23, 3916.53ex/s]

 22%|██▏       | 25689/115850 [00:06<00:23, 3871.15ex/s]

 23%|██▎       | 26080/115850 [00:06<00:24, 3713.16ex/s]

 23%|██▎       | 26506/115850 [00:06<00:23, 3863.49ex/s]

 23%|██▎       | 26941/115850 [00:07<00:22, 3998.93ex/s]

 24%|██▎       | 27344/115850 [00:07<00:23, 3798.60ex/s]

 24%|██▍       | 27812/115850 [00:07<00:21, 4043.86ex/s]

 24%|██▍       | 28221/115850 [00:07<00:22, 3924.34ex/s]

 25%|██▍       | 28652/115850 [00:07<00:21, 4032.58ex/s]

 25%|██▌       | 29059/115850 [00:07<00:22, 3923.58ex/s]

 25%|██▌       | 29515/115850 [00:07<00:21, 4104.23ex/s]

 26%|██▌       | 29969/115850 [00:07<00:20, 4230.28ex/s]

 26%|██▌       | 30395/115850 [00:07<00:21, 3945.20ex/s]

 27%|██▋       | 30840/115850 [00:08<00:20, 4085.39ex/s]

 27%|██▋       | 31254/115850 [00:08<00:22, 3834.41ex/s]

 27%|██▋       | 31711/115850 [00:08<00:20, 4035.96ex/s]

 28%|██▊       | 32121/115850 [00:08<00:21, 3908.32ex/s]

 28%|██▊       | 32520/115850 [00:08<00:21, 3930.12ex/s]

 28%|██▊       | 32961/115850 [00:08<00:20, 4066.69ex/s]

 29%|██▉       | 33371/115850 [00:08<00:21, 3852.86ex/s]

 29%|██▉       | 33782/115850 [00:08<00:20, 3924.10ex/s]

 30%|██▉       | 34178/115850 [00:08<00:22, 3703.23ex/s]

 30%|██▉       | 34640/115850 [00:09<00:20, 3957.90ex/s]

 30%|███       | 35041/115850 [00:09<00:20, 3865.95ex/s]

 31%|███       | 35472/115850 [00:09<00:20, 3989.44ex/s]

 31%|███       | 35914/115850 [00:09<00:19, 4112.94ex/s]

 31%|███▏      | 36329/115850 [00:09<00:20, 3930.18ex/s]

 32%|███▏      | 36754/115850 [00:09<00:19, 4018.11ex/s]

 32%|███▏      | 37159/115850 [00:09<00:23, 3305.81ex/s]

 32%|███▏      | 37628/115850 [00:09<00:21, 3654.06ex/s]

 33%|███▎      | 38016/115850 [00:09<00:22, 3533.38ex/s]

 33%|███▎      | 38431/115850 [00:10<00:20, 3694.67ex/s]

 34%|███▎      | 38854/115850 [00:10<00:20, 3841.42ex/s]

 34%|███▍      | 39249/115850 [00:10<00:20, 3752.58ex/s]

 34%|███▍      | 39681/115850 [00:10<00:19, 3909.30ex/s]

 35%|███▍      | 40079/115850 [00:10<00:19, 3854.54ex/s]

 35%|███▍      | 40524/115850 [00:10<00:18, 4023.73ex/s]

 35%|███▌      | 40942/115850 [00:10<00:18, 4067.17ex/s]

 36%|███▌      | 41352/115850 [00:10<00:19, 3817.06ex/s]

 36%|███▌      | 41783/115850 [00:10<00:18, 3954.55ex/s]

 36%|███▋      | 42183/115850 [00:11<00:20, 3591.06ex/s]

 37%|███▋      | 42628/115850 [00:11<00:19, 3822.96ex/s]

 37%|███▋      | 43019/115850 [00:11<00:19, 3829.81ex/s]

 37%|███▋      | 43429/115850 [00:11<00:18, 3905.85ex/s]

 38%|███▊      | 43858/115850 [00:11<00:17, 4014.35ex/s]

 38%|███▊      | 44264/115850 [00:11<00:19, 3751.32ex/s]

 39%|███▊      | 44695/115850 [00:11<00:18, 3906.64ex/s]

 39%|███▉      | 45091/115850 [00:11<00:18, 3834.39ex/s]

 39%|███▉      | 45526/115850 [00:11<00:17, 3978.93ex/s]

 40%|███▉      | 45928/115850 [00:11<00:17, 3946.75ex/s]

 40%|███▉      | 46325/115850 [00:12<00:19, 3628.19ex/s]

 40%|████      | 46729/115850 [00:12<00:18, 3739.76ex/s]

 41%|████      | 47109/115850 [00:12<00:19, 3550.30ex/s]

 41%|████      | 47492/115850 [00:12<00:18, 3627.27ex/s]

 41%|████▏     | 47897/115850 [00:12<00:18, 3745.89ex/s]

 42%|████▏     | 48276/115850 [00:12<00:18, 3747.58ex/s]

 42%|████▏     | 48740/115850 [00:12<00:16, 4005.03ex/s]

 42%|████▏     | 49144/115850 [00:12<00:17, 3892.85ex/s]

 43%|████▎     | 49579/115850 [00:12<00:16, 4023.11ex/s]

 43%|████▎     | 49984/115850 [00:13<00:16, 3956.67ex/s]

 43%|████▎     | 50382/115850 [00:13<00:17, 3734.16ex/s]

 44%|████▍     | 50773/115850 [00:13<00:17, 3782.95ex/s]

 44%|████▍     | 51154/115850 [00:13<00:18, 3535.96ex/s]

 45%|████▍     | 51581/115850 [00:13<00:17, 3736.02ex/s]

 45%|████▍     | 51961/115850 [00:13<00:17, 3753.60ex/s]

 45%|████▌     | 52340/115850 [00:13<00:18, 3520.02ex/s]

 45%|████▌     | 52697/115850 [00:13<00:17, 3527.47ex/s]

 46%|████▌     | 53054/115850 [00:13<00:19, 3246.10ex/s]

 46%|████▌     | 53455/115850 [00:14<00:18, 3451.74ex/s]

 47%|████▋     | 53901/115850 [00:14<00:16, 3730.20ex/s]

 47%|████▋     | 54281/115850 [00:14<00:17, 3513.28ex/s]

 47%|████▋     | 54639/115850 [00:14<00:17, 3528.10ex/s]

 47%|████▋     | 55000/115850 [00:14<00:17, 3504.81ex/s]

 48%|████▊     | 55437/115850 [00:14<00:16, 3749.41ex/s]

 48%|████▊     | 55884/115850 [00:14<00:15, 3955.08ex/s]

 49%|████▊     | 56283/115850 [00:14<00:15, 3870.85ex/s]

 49%|████▉     | 56714/115850 [00:14<00:14, 3997.86ex/s]

 49%|████▉     | 57116/115850 [00:15<00:15, 3812.50ex/s]

 50%|████▉     | 57535/115850 [00:15<00:14, 3918.15ex/s]

 50%|█████     | 57950/115850 [00:15<00:14, 3983.03ex/s]

 50%|█████     | 58351/115850 [00:15<00:14, 3886.78ex/s]

 51%|█████     | 58760/115850 [00:15<00:14, 3943.68ex/s]

 51%|█████     | 59156/115850 [00:15<00:14, 3787.55ex/s]

 51%|█████▏    | 59537/115850 [00:15<00:15, 3651.21ex/s]

 52%|█████▏    | 59938/115850 [00:15<00:14, 3750.60ex/s]

 52%|█████▏    | 60316/115850 [00:15<00:15, 3576.94ex/s]

 52%|█████▏    | 60713/115850 [00:15<00:14, 3686.64ex/s]

 53%|█████▎    | 61085/115850 [00:16<00:16, 3368.97ex/s]

 53%|█████▎    | 61553/115850 [00:16<00:14, 3722.97ex/s]

 54%|█████▎    | 61980/115850 [00:16<00:13, 3873.92ex/s]

 54%|█████▍    | 62375/115850 [00:16<00:14, 3730.55ex/s]

 54%|█████▍    | 62785/115850 [00:16<00:13, 3833.30ex/s]

 55%|█████▍    | 63173/115850 [00:16<00:14, 3717.26ex/s]

 55%|█████▍    | 63611/115850 [00:16<00:13, 3902.82ex/s]

 55%|█████▌    | 64005/115850 [00:16<00:13, 3873.04ex/s]

 56%|█████▌    | 64466/115850 [00:16<00:12, 4083.92ex/s]

 56%|█████▌    | 64917/115850 [00:17<00:12, 4206.34ex/s]

 56%|█████▋    | 65340/115850 [00:17<00:12, 3997.93ex/s]

 57%|█████▋    | 65754/115850 [00:17<00:12, 4037.16ex/s]

 57%|█████▋    | 66161/115850 [00:17<00:13, 3769.30ex/s]

 57%|█████▋    | 66543/115850 [00:17<00:14, 3496.52ex/s]

 58%|█████▊    | 66996/115850 [00:17<00:12, 3773.54ex/s]

 58%|█████▊    | 67388/115850 [00:17<00:12, 3812.32ex/s]

 59%|█████▊    | 67775/115850 [00:17<00:18, 2575.20ex/s]

 59%|█████▉    | 68121/115850 [00:18<00:17, 2763.13ex/s]

 59%|█████▉    | 68526/115850 [00:18<00:15, 3063.38ex/s]

 60%|█████▉    | 68966/115850 [00:18<00:13, 3397.23ex/s]

 60%|█████▉    | 69342/115850 [00:18<00:13, 3490.21ex/s]

 60%|██████    | 69758/115850 [00:18<00:12, 3673.23ex/s]

 61%|██████    | 70146/115850 [00:18<00:12, 3518.46ex/s]

 61%|██████    | 70570/115850 [00:18<00:12, 3714.95ex/s]

 61%|██████▏   | 71000/115850 [00:18<00:12, 3702.24ex/s]

 62%|██████▏   | 71430/115850 [00:18<00:11, 3866.51ex/s]

 62%|██████▏   | 71872/115850 [00:19<00:10, 4023.16ex/s]

 62%|██████▏   | 72281/115850 [00:19<00:12, 3590.19ex/s]

 63%|██████▎   | 72653/115850 [00:19<00:13, 3263.86ex/s]

 63%|██████▎   | 73000/115850 [00:19<00:13, 3243.97ex/s]

 63%|██████▎   | 73396/115850 [00:19<00:12, 3432.01ex/s]

 64%|██████▎   | 73831/115850 [00:19<00:11, 3683.07ex/s]

 64%|██████▍   | 74208/115850 [00:19<00:11, 3661.94ex/s]

 64%|██████▍   | 74663/115850 [00:19<00:10, 3911.34ex/s]

 65%|██████▍   | 75060/115850 [00:19<00:10, 3785.15ex/s]

 65%|██████▌   | 75488/115850 [00:20<00:10, 3923.74ex/s]

 66%|██████▌   | 75896/115850 [00:20<00:10, 3968.32ex/s]

 66%|██████▌   | 76296/115850 [00:20<00:10, 3746.31ex/s]

 66%|██████▌   | 76675/115850 [00:20<00:12, 3245.91ex/s]

 66%|██████▋   | 77031/115850 [00:20<00:11, 3325.13ex/s]

 67%|██████▋   | 77462/115850 [00:20<00:10, 3589.03ex/s]

 67%|██████▋   | 77871/115850 [00:20<00:10, 3724.19ex/s]

 68%|██████▊   | 78252/115850 [00:20<00:10, 3515.50ex/s]

 68%|██████▊   | 78650/115850 [00:20<00:10, 3641.84ex/s]

 68%|██████▊   | 79021/115850 [00:21<00:10, 3567.70ex/s]

 69%|██████▊   | 79444/115850 [00:21<00:09, 3729.15ex/s]

 69%|██████▉   | 79880/115850 [00:21<00:09, 3908.62ex/s]

 69%|██████▉   | 80275/115850 [00:21<00:09, 3697.70ex/s]

 70%|██████▉   | 80680/115850 [00:21<00:09, 3794.64ex/s]

 70%|██████▉   | 81064/115850 [00:21<00:09, 3623.52ex/s]

 70%|███████   | 81459/115850 [00:21<00:09, 3714.79ex/s]

 71%|███████   | 81840/115850 [00:21<00:09, 3737.78ex/s]

 71%|███████   | 82217/115850 [00:21<00:09, 3510.79ex/s]

 71%|███████▏  | 82646/115850 [00:22<00:08, 3726.37ex/s]

 72%|███████▏  | 83024/115850 [00:22<00:09, 3533.44ex/s]

 72%|███████▏  | 83423/115850 [00:22<00:08, 3660.20ex/s]

 72%|███████▏  | 83806/115850 [00:22<00:08, 3708.25ex/s]

 73%|███████▎  | 84181/115850 [00:22<00:09, 3266.76ex/s]

 73%|███████▎  | 84610/115850 [00:22<00:08, 3538.41ex/s]

 73%|███████▎  | 85000/115850 [00:22<00:08, 3509.66ex/s]

 74%|███████▎  | 85389/115850 [00:22<00:08, 3613.79ex/s]

 74%|███████▍  | 85804/115850 [00:22<00:07, 3764.53ex/s]

 74%|███████▍  | 86186/115850 [00:22<00:08, 3632.81ex/s]

 75%|███████▍  | 86571/115850 [00:23<00:07, 3692.78ex/s]

 75%|███████▌  | 86973/115850 [00:23<00:07, 3784.49ex/s]

 75%|███████▌  | 87355/115850 [00:23<00:07, 3689.34ex/s]

 76%|███████▌  | 87738/115850 [00:23<00:07, 3729.50ex/s]

 76%|███████▌  | 88113/115850 [00:23<00:07, 3489.24ex/s]

 76%|███████▋  | 88511/115850 [00:23<00:07, 3625.43ex/s]

 77%|███████▋  | 88910/115850 [00:23<00:07, 3727.77ex/s]

 77%|███████▋  | 89286/115850 [00:23<00:07, 3629.90ex/s]

 77%|███████▋  | 89710/115850 [00:23<00:06, 3804.37ex/s]

 78%|███████▊  | 90093/115850 [00:24<00:07, 3670.99ex/s]

 78%|███████▊  | 90501/115850 [00:24<00:06, 3783.78ex/s]

 78%|███████▊  | 90901/115850 [00:24<00:06, 3845.39ex/s]

 79%|███████▉  | 91288/115850 [00:24<00:06, 3604.28ex/s]

 79%|███████▉  | 91703/115850 [00:24<00:06, 3755.94ex/s]

 79%|███████▉  | 92083/115850 [00:24<00:06, 3684.18ex/s]

 80%|███████▉  | 92550/115850 [00:24<00:05, 3965.66ex/s]

 80%|████████  | 92992/115850 [00:24<00:05, 4096.53ex/s]

 81%|████████  | 93405/115850 [00:24<00:05, 3855.62ex/s]

 81%|████████  | 93815/115850 [00:25<00:05, 3922.58ex/s]

 81%|████████▏ | 94211/115850 [00:25<00:05, 3794.98ex/s]

 82%|████████▏ | 94606/115850 [00:25<00:05, 3837.28ex/s]

 82%|████████▏ | 95000/115850 [00:25<00:05, 3806.75ex/s]

 82%|████████▏ | 95420/115850 [00:25<00:05, 3918.89ex/s]

 83%|████████▎ | 95853/115850 [00:25<00:04, 4037.49ex/s]

 83%|████████▎ | 96259/115850 [00:25<00:05, 3742.96ex/s]

 83%|████████▎ | 96671/115850 [00:25<00:04, 3846.17ex/s]

 84%|████████▍ | 97060/115850 [00:25<00:05, 3679.32ex/s]

 84%|████████▍ | 97494/115850 [00:25<00:04, 3862.27ex/s]

 85%|████████▍ | 97941/115850 [00:26<00:04, 4034.49ex/s]

 85%|████████▍ | 98349/115850 [00:26<00:04, 3784.47ex/s]

 85%|████████▌ | 98733/115850 [00:26<00:04, 3762.77ex/s]

 86%|████████▌ | 99113/115850 [00:26<00:04, 3727.43ex/s]

 86%|████████▌ | 99524/115850 [00:26<00:04, 3836.50ex/s]

 86%|████████▋ | 99985/115850 [00:26<00:03, 4058.62ex/s]

 87%|████████▋ | 100394/115850 [00:26<00:03, 3977.30ex/s]

 87%|████████▋ | 100828/115850 [00:26<00:03, 4080.64ex/s]

 87%|████████▋ | 101238/115850 [00:26<00:03, 3718.17ex/s]

 88%|████████▊ | 101651/115850 [00:27<00:03, 3831.00ex/s]

 88%|████████▊ | 102040/115850 [00:27<00:03, 3648.38ex/s]

 88%|████████▊ | 102487/115850 [00:27<00:03, 3875.18ex/s]

 89%|████████▉ | 102909/115850 [00:27<00:03, 3970.25ex/s]

 89%|████████▉ | 103311/115850 [00:27<00:03, 3860.06ex/s]

 90%|████████▉ | 103758/115850 [00:27<00:02, 4032.05ex/s]

 90%|████████▉ | 104165/115850 [00:27<00:02, 3909.23ex/s]

 90%|█████████ | 104592/115850 [00:27<00:02, 4010.40ex/s]

 91%|█████████ | 105000/115850 [00:27<00:02, 3815.46ex/s]

 91%|█████████ | 105434/115850 [00:28<00:02, 3962.07ex/s]

 91%|█████████▏| 105861/115850 [00:28<00:02, 4048.95ex/s]

 92%|█████████▏| 106269/115850 [00:28<00:02, 3899.77ex/s]

 92%|█████████▏| 106755/115850 [00:28<00:02, 4173.31ex/s]

 93%|█████████▎| 107176/115850 [00:28<00:02, 3919.51ex/s]

 93%|█████████▎| 107625/115850 [00:28<00:02, 4077.24ex/s]

 93%|█████████▎| 108038/115850 [00:28<00:01, 3972.54ex/s]

 94%|█████████▎| 108464/115850 [00:28<00:01, 4051.59ex/s]

 94%|█████████▍| 108929/115850 [00:28<00:01, 4223.12ex/s]

 94%|█████████▍| 109355/115850 [00:28<00:01, 4040.02ex/s]

 95%|█████████▍| 109800/115850 [00:29<00:01, 4154.56ex/s]

 95%|█████████▌| 110219/115850 [00:29<00:01, 3879.04ex/s]

 95%|█████████▌| 110625/115850 [00:29<00:01, 3928.65ex/s]

 96%|█████████▌| 111022/115850 [00:29<00:01, 3743.16ex/s]

 96%|█████████▌| 111444/115850 [00:29<00:01, 3874.56ex/s]

 97%|█████████▋| 111873/115850 [00:29<00:00, 3991.71ex/s]

 97%|█████████▋| 112276/115850 [00:29<00:00, 3837.90ex/s]

 97%|█████████▋| 112681/115850 [00:29<00:00, 3894.33ex/s]

 98%|█████████▊| 113073/115850 [00:29<00:00, 3683.76ex/s]

 98%|█████████▊| 113457/115850 [00:30<00:00, 3726.58ex/s]

 98%|█████████▊| 113870/115850 [00:30<00:00, 3840.31ex/s]

 99%|█████████▊| 114257/115850 [00:30<00:00, 3701.74ex/s]

 99%|█████████▉| 114630/115850 [00:30<00:00, 3682.39ex/s]

 99%|█████████▉| 115000/115850 [00:30<00:00, 3219.72ex/s]

100%|█████████▉| 115410/115850 [00:30<00:00, 3451.54ex/s]

100%|█████████▉| 115819/115850 [00:30<00:00, 3624.84ex/s]

100%|██████████| 115850/115850 [00:30<00:00, 3769.64ex/s]




  0%|          | 0/28975 [00:00<?, ?ex/s]

  2%|▏         | 454/28975 [00:00<00:06, 4534.44ex/s]

  3%|▎         | 908/28975 [00:00<00:07, 3795.97ex/s]

  4%|▍         | 1296/28975 [00:00<00:07, 3580.41ex/s]

  6%|▌         | 1740/28975 [00:00<00:07, 3885.33ex/s]

  7%|▋         | 2135/28975 [00:00<00:07, 3758.87ex/s]

  9%|▊         | 2515/28975 [00:00<00:07, 3524.32ex/s]

 10%|▉         | 2872/28975 [00:00<00:07, 3464.78ex/s]

 11%|█         | 3242/28975 [00:00<00:07, 3533.09ex/s]

 13%|█▎        | 3709/28975 [00:00<00:06, 3868.70ex/s]

 14%|█▍        | 4118/28975 [00:01<00:06, 3932.93ex/s]

 16%|█▌        | 4602/28975 [00:01<00:05, 4200.33ex/s]

 17%|█▋        | 5025/28975 [00:01<00:05, 4049.21ex/s]

 19%|█▉        | 5459/28975 [00:01<00:05, 4131.94ex/s]

 20%|██        | 5875/28975 [00:01<00:05, 4122.98ex/s]

 22%|██▏       | 6289/28975 [00:01<00:05, 3842.45ex/s]

 23%|██▎       | 6730/28975 [00:01<00:05, 4001.63ex/s]

 25%|██▍       | 7135/28975 [00:01<00:05, 3910.00ex/s]

 26%|██▌       | 7591/28975 [00:01<00:05, 4095.24ex/s]

 28%|██▊       | 8004/28975 [00:02<00:05, 3833.86ex/s]

 29%|██▉       | 8393/28975 [00:02<00:05, 3824.71ex/s]

 30%|███       | 8809/28975 [00:02<00:05, 3919.32ex/s]

 32%|███▏      | 9204/28975 [00:02<00:05, 3709.04ex/s]

 33%|███▎      | 9645/28975 [00:02<00:04, 3902.82ex/s]

 35%|███▍      | 10040/28975 [00:02<00:04, 3814.02ex/s]

 36%|███▌      | 10452/28975 [00:02<00:04, 3899.77ex/s]

 38%|███▊      | 10871/28975 [00:02<00:04, 3983.22ex/s]

 39%|███▉      | 11272/28975 [00:02<00:04, 3775.38ex/s]

 40%|████      | 11679/28975 [00:03<00:04, 3856.53ex/s]

 42%|████▏     | 12068/28975 [00:03<00:04, 3768.79ex/s]

 43%|████▎     | 12509/28975 [00:03<00:04, 3950.58ex/s]

 45%|████▍     | 12919/28975 [00:03<00:04, 3989.61ex/s]

 46%|████▌     | 13320/28975 [00:03<00:04, 3493.80ex/s]

 47%|████▋     | 13753/28975 [00:03<00:04, 3717.55ex/s]

 49%|████▉     | 14136/28975 [00:03<00:04, 3644.35ex/s]

 50%|█████     | 14518/28975 [00:03<00:03, 3692.03ex/s]

 51%|█████▏    | 14901/28975 [00:03<00:03, 3729.27ex/s]

 53%|█████▎    | 15279/28975 [00:03<00:03, 3680.27ex/s]

 54%|█████▍    | 15705/28975 [00:04<00:03, 3847.27ex/s]

 56%|█████▌    | 16093/28975 [00:04<00:03, 3656.04ex/s]

 57%|█████▋    | 16463/28975 [00:04<00:05, 2464.42ex/s]

 58%|█████▊    | 16874/28975 [00:04<00:04, 2814.99ex/s]

 60%|█████▉    | 17285/28975 [00:04<00:03, 3117.65ex/s]

 61%|██████    | 17698/28975 [00:04<00:03, 3369.84ex/s]

 62%|██████▏   | 18071/28975 [00:04<00:03, 3327.83ex/s]

 64%|██████▍   | 18494/28975 [00:05<00:02, 3565.07ex/s]

 65%|██████▌   | 18930/28975 [00:05<00:02, 3781.64ex/s]

 67%|██████▋   | 19325/28975 [00:05<00:02, 3624.98ex/s]

 68%|██████▊   | 19752/28975 [00:05<00:02, 3802.32ex/s]

 70%|██████▉   | 20143/28975 [00:05<00:02, 3683.45ex/s]

 71%|███████   | 20533/28975 [00:05<00:02, 3743.54ex/s]

 72%|███████▏  | 20942/28975 [00:05<00:02, 3841.20ex/s]

 74%|███████▎  | 21331/28975 [00:05<00:02, 3349.10ex/s]

 75%|███████▍  | 21721/28975 [00:05<00:02, 3492.75ex/s]

 76%|███████▌  | 22082/28975 [00:05<00:01, 3462.33ex/s]

 78%|███████▊  | 22505/28975 [00:06<00:01, 3674.57ex/s]

 79%|███████▉  | 22897/28975 [00:06<00:01, 3744.19ex/s]

 80%|████████  | 23277/28975 [00:06<00:01, 3547.12ex/s]

 82%|████████▏ | 23655/28975 [00:06<00:01, 3611.53ex/s]

 83%|████████▎ | 24021/28975 [00:06<00:01, 3457.47ex/s]

 84%|████████▍ | 24434/28975 [00:06<00:01, 3645.29ex/s]

 86%|████████▌ | 24884/28975 [00:06<00:01, 3888.15ex/s]

 87%|████████▋ | 25277/28975 [00:06<00:00, 3825.96ex/s]

 89%|████████▊ | 25675/28975 [00:06<00:00, 3869.80ex/s]

 90%|████████▉ | 26065/28975 [00:07<00:00, 3651.79ex/s]

 92%|█████████▏| 26519/28975 [00:07<00:00, 3900.28ex/s]

 93%|█████████▎| 26986/28975 [00:07<00:00, 4119.26ex/s]

 95%|█████████▍| 27402/28975 [00:07<00:00, 3920.04ex/s]

 96%|█████████▌| 27846/28975 [00:07<00:00, 4065.10ex/s]

 98%|█████████▊| 28257/28975 [00:07<00:00, 3902.04ex/s]

 99%|█████████▉| 28665/28975 [00:07<00:00, 3952.07ex/s]

100%|██████████| 28975/28975 [00:07<00:00, 3728.91ex/s]




In [17]:
len(train_dataset), len(eval_dataset)

(115850, 28975)

In [18]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] Target # 내 피부 컨디션에 따라 수분진정, 수분집중 앰플과 크림을 골라 사용할 수 있는데 먼저 피부가 예민해져서 고민이신 분들... 수부지피부이신 분들은 # 수분진정 라인을! [SEP] Target # 패키지 / 구성품 # 가격 [SEP] 1


[CLS] Target # 진짜 어마어마한 구성이라 울 아들 # 크리스마스선물 로 정했어요 ♥️ [SEP] Target # 제품 전체 # 다양성 [SEP] 1


# Load Trainer

In [19]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [20]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [21]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [22]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 115850


  Num Epochs = 10


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 128


  Gradient Accumulation steps = 1


  Total optimization steps = 9060


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.091,0.076139,0.974116,0.625,0.986595,0.805798,0.974116
2,0.0621,0.074407,0.974323,0.651359,0.98667,0.819015,0.974323
3,0.0462,0.08848,0.975289,0.681211,0.987146,0.834179,0.975289
4,0.032,0.091542,0.974495,0.686466,0.986707,0.836586,0.974495
5,0.0323,0.102323,0.97484,0.666971,0.986926,0.826949,0.97484
6,0.022,0.120227,0.973046,0.669208,0.98595,0.827579,0.973046
7,0.0145,0.146881,0.973356,0.680464,0.986099,0.833281,0.973356
8,0.007,0.165418,0.97377,0.683069,0.986319,0.834694,0.97377
9,0.0045,0.194238,0.974323,0.690773,0.986605,0.838689,0.974323
10,0.0022,0.203447,0.973425,0.678363,0.98614,0.832251,0.973425


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 28975


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-906


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-906/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-906/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-906/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-906/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 28975


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-1812


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-1812/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-1812/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-1812/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-1812/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 28975


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-2718


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-2718/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-2718/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-2718/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-2718/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-906] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 28975


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-3624


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-3624/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-3624/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-3624/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-3624/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-1812] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 28975


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-4530


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-4530/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-4530/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-4530/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-4530/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-2718] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 28975


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-5436


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-5436/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-5436/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-5436/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-5436/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-4530] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 28975


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-6342


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-6342/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-6342/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-6342/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-6342/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-5436] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 28975


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-7248


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-7248/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-7248/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-7248/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-7248/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-6342] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 28975


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-8154


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-8154/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-8154/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-8154/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-8154/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-3624] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 28975


  Batch size = 128


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-9060


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-9060/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-9060/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-9060/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-9060/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-7248] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from monologg_koelectra_base_v3_discriminator_uncleaned_v8/checkpoint-8154 (score: 0.8386891406080794).


Saving model checkpoint to /tmp/tmpyktvp64a


Configuration saved in /tmp/tmpyktvp64a/config.json


Model weights saved in /tmp/tmpyktvp64a/pytorch_model.bin


tokenizer config file saved in /tmp/tmpyktvp64a/tokenizer_config.json


Special tokens file saved in /tmp/tmpyktvp64a/special_tokens_map.json


0,1
eval/accuracy,▄▅█▆▇▁▂▃▅▂
eval/f1_false,▅▅█▅▇▁▂▃▅▂
eval/f1_macro,▁▄▇█▆▆▇▇█▇
eval/f1_micro,▄▅█▆▇▁▂▃▅▂
eval/f1_true,▁▄▇█▅▆▇▇█▇
eval/loss,▁▁▂▂▃▃▅▆██
eval/runtime,▁▄▂▁▆▄▁▆█▃
eval/samples_per_second,█▅▇█▃▅█▃▁▆
eval/steps_per_second,█▅▇█▃▅█▃▁▆
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/accuracy,0.97343
eval/f1_false,0.98614
eval/f1_macro,0.83225
eval/f1_micro,0.97343
eval/f1_true,0.67836
eval/loss,0.20345
eval/runtime,90.2567
eval/samples_per_second,321.029
eval/steps_per_second,2.515
train/epoch,10.0


In [23]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
