# Description

# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = pc_binary_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_sentiment_classification_binary'
RUN_ID = 'cleaned_v2_total_v1'

DATA_V = 'cleaned_v2_total_v1'
DATA_T = 'pc_binary' # ce or pc
AUGMENTATION = False
AUG_NAME = 'aug'

model_checkpoint = 'snunlp/KR-ELECTRA-discriminator'

notebook_name = 'asc_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'asc')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/snunlp_kr_electra_discriminator_cleaned_v2_total_v1/asc exists.
./asc_binary_trainer.ipynb exists.
./dataset/cleaned_v2_total_v1/pc_binary_train.csv exists.
./dataset/cleaned_v2_total_v1/pc_binary_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 30
batch_size = 32
gradient_accumulation_steps = 1

optim = 'adamw_torch' # 'adamw_hf'

learning_rate = 5e-5 # 3e-6
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'cosine'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='f1_macro'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 500

# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_sentiment_classification_binary
env: WANDB_NOTEBOOK_NAME=./asc_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at snunlp/KR-ELECTRA-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = './dataset/cleaned_v2_total_v1/raw_train.csv'
dev_path = './dataset/cleaned_v2_total_v1/raw_dev.csv'
test_path = './dataset/cleaned_v2_total_v1/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

In [10]:
### new
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]


# more_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']

# emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
# emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))

# tokensToAdd = more_tokens + emojis
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

In [11]:
data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
print(len(data))
data = data.drop_duplicates()
print(len(data.drop_duplicates()))

7920
7894


In [12]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))

tokenizerTrainData = data.sentence_form.to_list()
newTokenizer = tokenizer.train_new_from_iterator(tokenizerTrainData, vocab_size=1)

new_tokens = set(list(newTokenizer.vocab.keys())) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(newTokenizer))
print(len(tokenizer))

model.resize_token_embeddings(len(tokenizer))

30000





2615
30013


Embedding(30013, 768)

In [13]:
new_tokens

{'##뜌', '##읒', '##죱', '##쨕', '##쫜', '뜌', '뿤', '쓩', '읒', '죱', '쨕', '쫜', '챳'}

In [14]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [15]:
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]

polarity_id_to_name = ['positive', 'negative', 'neutral']

tokenizer_tester = []
for pair in entity_property_pair:
    for polarity in polarity_id_to_name:
        tokenizer_tester.append('#'.join([pair, polarity]))

for e in tokenizer_tester:
    print(tokenizer.decode(tokenizer.encode(e)))

for e in tokenizer_tester:
    print(tokenizer.encode(e))

[CLS] 본품 # 가격 # positive [SEP]
[CLS] 본품 # 가격 # negative [SEP]
[CLS] 본품 # 가격 # neutral [SEP]
[CLS] 본품 # 다양성 # positive [SEP]
[CLS] 본품 # 다양성 # negative [SEP]
[CLS] 본품 # 다양성 # neutral [SEP]
[CLS] 본품 # 디자인 # positive [SEP]
[CLS] 본품 # 디자인 # negative [SEP]
[CLS] 본품 # 디자인 # neutral [SEP]
[CLS] 본품 # 인지도 # positive [SEP]
[CLS] 본품 # 인지도 # negative [SEP]
[CLS] 본품 # 인지도 # neutral [SEP]
[CLS] 본품 # 일반 # positive [SEP]
[CLS] 본품 # 일반 # negative [SEP]
[CLS] 본품 # 일반 # neutral [SEP]
[CLS] 본품 # 편의성 # positive [SEP]
[CLS] 본품 # 편의성 # negative [SEP]
[CLS] 본품 # 편의성 # neutral [SEP]
[CLS] 본품 # 품질 # positive [SEP]
[CLS] 본품 # 품질 # negative [SEP]
[CLS] 본품 # 품질 # neutral [SEP]
[CLS] 브랜드 # 가격 # positive [SEP]
[CLS] 브랜드 # 가격 # negative [SEP]
[CLS] 브랜드 # 가격 # neutral [SEP]
[CLS] 브랜드 # 디자인 # positive [SEP]
[CLS] 브랜드 # 디자인 # negative [SEP]
[CLS] 브랜드 # 디자인 # neutral [SEP]
[CLS] 브랜드 # 인지도 # positive [SEP]
[CLS] 브랜드 # 인지도 # negative [SEP]
[CLS] 브랜드 # 인지도 # neutral [SEP]
[CLS] 브랜드 # 일반 # positive [SEP]
[CLS] 브랜드 # 일반 # nega

# Define Metric

In [16]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [17]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [18]:
def preprocess_function(examples):
    return tokenizer(examples["sentence_form"], examples["entity_property"], truncation=True)

In [19]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset).shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset).shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/18594 [00:00<?, ?ex/s]

  2%|▏         | 396/18594 [00:00<00:04, 3959.25ex/s]

  4%|▍         | 803/18594 [00:00<00:04, 4020.09ex/s]

  6%|▋         | 1206/18594 [00:00<00:04, 3660.79ex/s]

  9%|▊         | 1621/18594 [00:00<00:04, 3842.30ex/s]

 11%|█         | 2009/18594 [00:00<00:04, 3739.21ex/s]

 13%|█▎        | 2432/18594 [00:00<00:04, 3896.53ex/s]

 15%|█▌        | 2845/18594 [00:00<00:03, 3967.86ex/s]

 17%|█▋        | 3244/18594 [00:00<00:03, 3868.09ex/s]

 20%|█▉        | 3658/18594 [00:00<00:03, 3948.56ex/s]

 22%|██▏       | 4055/18594 [00:01<00:03, 3832.15ex/s]

 24%|██▍       | 4477/18594 [00:01<00:03, 3942.89ex/s]

 26%|██▋       | 4886/18594 [00:01<00:03, 3985.42ex/s]

 28%|██▊       | 5286/18594 [00:01<00:03, 3850.32ex/s]

 31%|███       | 5693/18594 [00:01<00:03, 3912.74ex/s]

 33%|███▎      | 6086/18594 [00:01<00:03, 3824.25ex/s]

 35%|███▍      | 6502/18594 [00:01<00:03, 3920.26ex/s]

 37%|███▋      | 6898/18594 [00:01<00:02, 3930.25ex/s]

 39%|███▉      | 7292/18594 [00:01<00:02, 3815.67ex/s]

 41%|████▏     | 7686/18594 [00:01<00:02, 3849.55ex/s]

 43%|████▎     | 8072/18594 [00:02<00:02, 3790.05ex/s]

 46%|████▌     | 8486/18594 [00:02<00:02, 3889.45ex/s]

 48%|████▊     | 8897/18594 [00:02<00:02, 3954.00ex/s]

 50%|████▉     | 9294/18594 [00:02<00:02, 3837.79ex/s]

 52%|█████▏    | 9713/18594 [00:02<00:02, 3937.62ex/s]

 54%|█████▍    | 10108/18594 [00:02<00:02, 3840.25ex/s]

 57%|█████▋    | 10517/18594 [00:02<00:02, 3911.10ex/s]

 59%|█████▉    | 10930/18594 [00:02<00:01, 3974.93ex/s]

 61%|██████    | 11329/18594 [00:02<00:01, 3847.55ex/s]

 63%|██████▎   | 11744/18594 [00:03<00:01, 3933.79ex/s]

 65%|██████▌   | 12139/18594 [00:03<00:01, 3845.46ex/s]

 68%|██████▊   | 12552/18594 [00:03<00:01, 3925.09ex/s]

 70%|██████▉   | 12962/18594 [00:03<00:01, 3975.52ex/s]

 72%|███████▏  | 13361/18594 [00:03<00:01, 3841.72ex/s]

 74%|███████▍  | 13777/18594 [00:03<00:01, 3931.81ex/s]

 76%|███████▌  | 14172/18594 [00:03<00:01, 3835.59ex/s]

 78%|███████▊  | 14584/18594 [00:03<00:01, 3917.38ex/s]

 81%|████████  | 14994/18594 [00:03<00:00, 3968.99ex/s]

 83%|████████▎ | 15392/18594 [00:03<00:00, 3877.24ex/s]

 85%|████████▌ | 15807/18594 [00:04<00:00, 3954.95ex/s]

 87%|████████▋ | 16204/18594 [00:04<00:00, 3831.62ex/s]

 89%|████████▉ | 16614/18594 [00:04<00:00, 3908.49ex/s]

 91%|█████████▏| 17007/18594 [00:04<00:00, 3806.96ex/s]

 94%|█████████▎| 17426/18594 [00:04<00:00, 3917.01ex/s]

 96%|█████████▌| 17836/18594 [00:04<00:00, 3967.88ex/s]

 98%|█████████▊| 18234/18594 [00:04<00:00, 3855.68ex/s]

100%|██████████| 18594/18594 [00:04<00:00, 3890.08ex/s]




  0%|          | 0/9006 [00:00<?, ?ex/s]

  5%|▍         | 409/9006 [00:00<00:02, 4081.68ex/s]

  9%|▉         | 823/9006 [00:00<00:01, 4115.68ex/s]

 14%|█▎        | 1235/9006 [00:00<00:02, 3869.21ex/s]

 18%|█▊        | 1652/9006 [00:00<00:01, 3982.08ex/s]

 23%|██▎       | 2052/9006 [00:00<00:01, 3837.66ex/s]

 27%|██▋       | 2466/9006 [00:00<00:01, 3935.83ex/s]

 32%|███▏      | 2875/9006 [00:00<00:01, 3982.55ex/s]

 36%|███▋      | 3275/9006 [00:00<00:01, 3880.87ex/s]

 41%|████      | 3687/9006 [00:00<00:01, 3953.11ex/s]

 45%|████▌     | 4084/9006 [00:01<00:01, 3816.51ex/s]

 50%|████▉     | 4486/9006 [00:01<00:01, 3874.01ex/s]

 54%|█████▍    | 4891/9006 [00:01<00:01, 3924.96ex/s]

 59%|█████▊    | 5285/9006 [00:01<00:00, 3809.18ex/s]

 63%|██████▎   | 5688/9006 [00:01<00:00, 3872.30ex/s]

 67%|██████▋   | 6077/9006 [00:01<00:00, 3767.44ex/s]

 72%|███████▏  | 6489/9006 [00:01<00:00, 3867.62ex/s]

 77%|███████▋  | 6897/9006 [00:01<00:00, 3927.30ex/s]

 81%|████████  | 7291/9006 [00:01<00:00, 3815.60ex/s]

 85%|████████▌ | 7691/9006 [00:01<00:00, 3868.21ex/s]

 90%|████████▉ | 8079/9006 [00:02<00:00, 3770.13ex/s]

 94%|█████████▍| 8479/9006 [00:02<00:00, 3836.23ex/s]

 99%|█████████▊| 8879/9006 [00:02<00:00, 3882.36ex/s]

100%|██████████| 9006/9006 [00:02<00:00, 3860.86ex/s]




In [20]:
len(train_dataset), len(eval_dataset)

(18594, 9006)

In [21]:
k = random.randrange(len(train_dataset))
tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k]

('[CLS] 모모쏘우 핸드크림 이면 걱정 끝 [SEP] 제품 전체 # 일반 # neutral [SEP]', 1)

# Load Trainer

In [22]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [23]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [24]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [25]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 18594


  Num Epochs = 30


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 128


  Gradient Accumulation steps = 1


  Total optimization steps = 4380


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.7048,0.05519,0.985232,0.977673,0.988967,0.98332,0.985232
2,0.7048,0.033876,0.987897,0.981597,0.990984,0.98629,0.987897
3,0.7048,0.03052,0.98934,0.984043,0.991997,0.98802,0.98934
4,0.0743,0.022421,0.992005,0.988076,0.993987,0.991031,0.992005
5,0.0743,0.01389,0.996114,0.994173,0.997085,0.995629,0.996114
6,0.0743,0.007683,0.998001,0.997,0.998501,0.997751,0.998001
7,0.0196,0.008365,0.997668,0.996498,0.998252,0.997375,0.997668
8,0.0196,0.005309,0.998223,0.997334,0.998668,0.998001,0.998223
9,0.0196,0.007417,0.998001,0.997,0.998501,0.997751,0.998001
10,0.0196,0.008664,0.996669,0.995013,0.997499,0.996256,0.996669


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-146


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-146/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-146/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-146/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-146/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-292


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-292/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-292/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-292/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-292/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-438


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-438/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-438/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-438/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-438/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-146] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-584


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-584/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-584/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-584/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-584/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-292] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-730


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-730/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-730/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-730/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-730/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-438] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-876


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-876/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-876/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-876/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-876/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-584] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1022


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1022/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1022/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1022/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1022/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-730] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1168


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1168/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1168/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1168/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1168/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-876] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1314


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1314/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1314/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1314/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1314/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1022] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1460


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1460/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1460/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1460/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1460/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1314] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1606


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1606/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1606/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1606/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1606/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1168] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1752


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1752/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1752/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1752/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1752/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1460] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1898


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1898/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1898/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1898/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1898/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1606] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2044


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2044/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2044/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2044/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2044/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1752] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2190


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2190/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2190/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2190/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2190/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-1898] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2336


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2336/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2336/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2336/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2336/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2044] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2482


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2482/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2482/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2482/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2482/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2190] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2628


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2628/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2628/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2628/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2628/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2336] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2774


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2774/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2774/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2774/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2774/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2628] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2920


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2920/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2920/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2920/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2920/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2774] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3066


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3066/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3066/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3066/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3066/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2920] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3212


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3212/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3212/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3212/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3212/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3066] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3358


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3358/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3358/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3358/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3358/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-2482] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3504


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3504/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3504/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3504/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3504/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3212] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3650


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3650/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3650/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3650/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3650/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3358] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3796


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3796/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3796/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3796/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3796/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3504] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3942


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3942/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3942/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3942/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3942/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3650] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4088


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4088/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4088/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4088/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4088/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3796] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4234


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4234/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4234/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4234/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4234/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4088] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, entity_property, sentence_form. If id, entity_property, sentence_form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 9006


  Batch size = 128


Saving model checkpoint to snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4380


Configuration saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4380/config.json


Model weights saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4380/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4380/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4380/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-4234] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from snunlp_kr_electra_discriminator_cleaned_v2_total_v1/checkpoint-3942 (score: 0.9992504996668887).


Saving model checkpoint to /tmp/tmpdiewrgbt


Configuration saved in /tmp/tmpdiewrgbt/config.json


Model weights saved in /tmp/tmpdiewrgbt/pytorch_model.bin


tokenizer config file saved in /tmp/tmpdiewrgbt/tokenizer_config.json


Special tokens file saved in /tmp/tmpdiewrgbt/special_tokens_map.json


0,1
eval/accuracy,▁▂▃▄▆▇▇▇▇▇█▇██████████████████
eval/f1_false,▁▂▃▄▆▇▇▇▇▇█▇██████████████████
eval/f1_macro,▁▂▃▄▆▇▇▇▇▇█▇██████████████████
eval/f1_micro,▁▂▃▄▆▇▇▇▇▇█▇██████████████████
eval/f1_true,▁▂▃▄▆▇▇▇▇▇█▇██████████████████
eval/loss,█▅▅▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/runtime,▁▂▆▃▃▁▇▂▂▅▆▂▃▃▁▂▅▂▂▂▁▁▁▂▂▂▁▃█▃
eval/samples_per_second,█▇▃▆▆█▂▇▇▄▃▇▆▆█▇▄▇▇▇███▇▇▇█▆▁▆
eval/steps_per_second,█▇▃▆▆█▂▇▇▄▃▇▆▆█▇▄▇▇▇███▇▇▇█▆▁▆
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████

0,1
eval/accuracy,0.99922
eval/f1_false,0.99942
eval/f1_macro,0.99913
eval/f1_micro,0.99922
eval/f1_true,0.99883
eval/loss,0.00146
eval/runtime,29.1216
eval/samples_per_second,309.255
eval/steps_per_second,2.438
train/epoch,30.0


In [26]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
