# Description

# Modules and Global Variables

In [1]:
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    ElectraTokenizerFast, ElectraForSequenceClassification, 
    DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

import torch
import wandb

import datasets
import evaluate

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')

torch.__version__: 1.7.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = pc_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'positive': 0, 'negative': 1, 'neutral': 2}
{0: 'positive', 1: 'negative', 2: 'neutral'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_sentiment_classification_multi'
RUN_ID = 'uncleaned_v13_40_epochs'

DATA_V = 'uncleaned_v13'
DATA_T = 'pc' # ce or pc
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'snunlp/KR-ELECTRA-discriminator'

notebook_name = 'trainer_for_asc_m_new.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'asc')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/asc exists.
./trainer_for_asc_m_new.ipynb exists.
./dataset/uncleaned_v13/pc_train.csv exists.
./dataset/uncleaned_v13/pc_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 40
batch_size = 32
gradient_accumulation_steps = 1

optim = 'adamw_hf' # 'adamw_torch'

learning_rate = 3e-6 / 8 * batch_size * NGPU # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'linear'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='f1_macro'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 100

print(learning_rate)

4.8e-05


# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_sentiment_classification_multi
env: WANDB_NOTEBOOK_NAME=./trainer_for_asc_m_new.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at snunlp/KR-ELECTRA-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = f'./dataset/{DATA_V}/raw_train.csv'
dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
test_path = f'./dataset/{DATA_V}/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

print(len(tokenizer))
tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form]).to_frame().drop_duplicates()
tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
new_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
new_tokens = set(list(new_tokenizer.vocab.keys())) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(new_tokenizer))
print(len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

30000





3060
30076


Embedding(30076, 768)

In [10]:
print(len(new_tokens))
print(new_tokens)

76
{'##ˇ', '##ᵕ', '##ᴠ', 'ᵕ', 'ɪ', '##ɪ', '🚗', 'ɢ', '##ɢ', '💡', '😯', '💬', '죱', '🕷', '##㉦', '💇', 'ɴ', 'ᴡ', '##🕸', '◍', '🍼', '➕', '쫜', 'ʀ', '##💄', '뜌', '##💇', '##ᴡ', '##💆', 'ꈍ', '㉦', '##ᴍ', '🤡', '❔', 'ʜ', '##ᴛ', '🥤', '읒', '##ᴘ', 'ᴜ', '##➕', '🐄', 'ғ', '##🥤', '🍷', '##읒', '##🚗', '쓩', 'ᴛ', 'ᴍ', 'ᴘ', '##죱', '##◍', 'ˇ', '##🕷', '##👠', '👠', '💆', '⏰', '😺', '##쫜', '🕸', '##ɴ', '##❔', '##ꈍ', 'ᴠ', '쨕', '##ʀ', '##뜌', '🕺', '챳', '💄', '##🤡', '##ᴜ', '뿤', '##쨕'}


In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'positive': 0, 'negative': 1, 'neutral': 2},
 {0: 'positive', 1: 'negative', 2: 'neutral'},
 3)

# Define Metric

In [12]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [13]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_positive, f1_negative, f1_neutral = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1,2])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 
            'f1_positive': f1_positive, 'f1_negative': f1_negative, 'f1_neutral': f1_neutral, 
            'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [14]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [15]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=True)
eval_dataset = eval_dataset.map(preprocess_function, batched=True)

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

In [16]:
len(train_dataset), len(eval_dataset)

(3196, 3002)

In [17]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] 3개월간 꾸준히 사용했더니 쳐지던 턱살에 탄력이 생기는게 얼굴이 전체적으로 작아진 느낌 [SEP] 본품 # 품질 [SEP] 0
[CLS] 수면 골든 타임인 밤 11시부터 2시 요거 바르고 푹 자면 더욱 효과적인 슬리핑케어 효과를 볼 수 있어요 [SEP] 본품 # 품질 [SEP] 0


# Load Trainer

In [18]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [19]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [20]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [21]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 3196


  Num Epochs = 40


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 128


  Gradient Accumulation steps = 1


  Total optimization steps = 1000


  Number of trainable parameters = 109142019


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 Positive,F1 Negative,F1 Neutral,F1 Macro,F1 Micro
1,1.1538,0.136527,0.972685,0.986153,0.0,0.0,0.328718,0.972685
2,1.1538,0.116235,0.970686,0.987372,0.466667,0.0,0.48468,0.970686
3,1.1538,0.13382,0.960027,0.9828,0.367647,0.0,0.450149,0.960027
4,0.1842,0.101645,0.973018,0.98858,0.516129,0.133333,0.546014,0.973018
5,0.1842,0.108087,0.971352,0.987363,0.561404,0.197802,0.58219,0.971352
6,0.1842,0.105797,0.973351,0.988917,0.388889,0.291262,0.556356,0.973351
7,0.1842,0.126341,0.97435,0.988063,0.604651,0.309278,0.633997,0.97435
8,0.07,0.191781,0.943704,0.973396,0.530612,0.205882,0.569964,0.943704
9,0.07,0.138374,0.97435,0.989269,0.352941,0.30303,0.548414,0.97435
10,0.07,0.150776,0.968021,0.986995,0.193548,0.294574,0.491706,0.968021


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-25


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-25/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-25/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-25/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-25/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-50


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-50/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-50/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-50/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-50/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-75


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-75/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-75/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-75/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-75/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-25] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-100


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-100/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-100/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-100/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-100/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-50] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-125


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-125/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-125/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-125/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-125/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-75] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-150


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-150/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-150/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-150/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-150/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-100] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-175


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-175/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-175/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-175/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-175/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-125] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-200


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-200/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-200/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-200/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-200/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-150] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-225


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-225/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-225/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-225/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-225/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-200] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-250


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-250/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-250/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-250/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-250/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-225] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-275


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-275/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-275/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-275/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-275/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-250] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-300


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-300/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-300/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-300/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-300/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-275] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-325


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-325/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-325/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-325/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-325/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-300] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-350


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-350/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-350/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-350/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-350/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-325] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-375


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-375/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-375/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-375/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-375/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-350] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-400


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-400/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-400/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-400/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-400/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-375] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-425


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-425/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-425/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-425/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-425/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-400] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-450


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-450/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-450/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-450/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-450/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-425] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-475


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-475/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-475/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-475/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-475/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-450] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-500


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-500/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-500/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-500/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-500/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-475] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-525


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-525/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-525/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-525/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-525/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-550


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-550/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-550/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-550/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-550/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-525] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-575


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-575/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-575/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-575/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-575/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-550] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-600


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-600/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-600/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-600/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-600/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-575] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-625


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-625/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-625/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-625/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-625/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-600] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-650


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-650/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-650/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-650/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-650/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-625] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-675


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-675/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-675/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-675/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-675/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-650] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-700


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-700/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-700/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-700/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-700/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-675] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-725


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-725/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-725/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-725/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-725/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-700] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-750/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-725] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-775


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-775/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-775/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-775/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-775/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-750] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-800


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-800/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-800/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-800/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-800/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-775] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-825


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-825/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-825/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-825/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-825/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-800] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-850


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-850/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-850/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-850/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-850/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-825] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-875


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-875/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-875/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-875/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-875/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-850] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-900


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-900/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-900/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-900/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-900/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-875] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-925


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-925/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-925/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-925/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-925/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-900] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-950


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-950/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-950/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-950/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-950/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-925] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-975


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-975/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-975/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-975/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-975/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-950] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, form, pair. If id, form, pair are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 3002


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-1000


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-1000/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-1000/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-1000/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-1000/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-975] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from snunlp_kr_electra_discriminator_uncleaned_v13_40_epochs/checkpoint-175 (score: 0.6339974230347505).


Saving model checkpoint to /tmp/tmp3hgz14r0


Configuration saved in /tmp/tmp3hgz14r0/config.json


Model weights saved in /tmp/tmp3hgz14r0/pytorch_model.bin


tokenizer config file saved in /tmp/tmp3hgz14r0/tokenizer_config.json


Special tokens file saved in /tmp/tmp3hgz14r0/special_tokens_map.json


0,1
eval/accuracy,█▇▅█▇██▁█▇▆▄▆▄▆█▅▆▇▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
eval/f1_macro,▁▅▄▆▇▆█▇▆▅▅▇▆▆▆▇▆▆▆▆█▇▇▇▇▇▇▇████████████
eval/f1_micro,█▇▅█▇██▁█▇▆▄▆▄▆█▅▆▇▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
eval/f1_negative,▁▆▅▇▇▆█▇▅▃▄▇▅▅▅▆▅▆▆▅█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
eval/f1_neutral,▁▁▁▄▅▇█▆███▆▇▇▇█▇▇▇▇█████▇██████████████
eval/f1_positive,▇▇▅█▇█▇▁█▇▇▄▇▄▆█▅▆▇▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
eval/loss,▃▂▃▁▁▁▂▆▃▄▄▇▅█▆▆█▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████
eval/runtime,▁█▁█▁▂▅▁▁▂▁▂▂▂▇▂▂▂▂▂▂▂▂▂▂▁▂▂█▂▂▂▇▂▂▂▂▇▁▂
eval/samples_per_second,█▁█▁█▇▃██▇▇▇▇▇▂▆▇▇▇▇▇▇▇▇▆█▇▇▁▆▇▇▂▇▇▇▇▁█▇
eval/steps_per_second,█▁█▁█▇▃██▇▇▇▇▇▂▆▇▇▇▇▇▇▇▇▆█▇▇▁▆▇▇▂▇▇▇▇▁█▇

0,1
eval/accuracy,0.97002
eval/f1_macro,0.61961
eval/f1_micro,0.97002
eval/f1_negative,0.55814
eval/f1_neutral,0.31405
eval/f1_positive,0.98664
eval/loss,0.22474
eval/runtime,7.4848
eval/samples_per_second,401.078
eval/steps_per_second,3.206


In [22]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
