# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v2'

DATA_V = 'uncleaned_v2'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'snunlp/KR-ELECTRA-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/snunlp_kr_electra_discriminator_uncleaned_v2/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/uncleaned_v2/ce_train.csv exists.
./dataset/uncleaned_v2/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 20
batch_size = 25
gradient_accumulation_steps = 1

optim = 'adamw_torch' # 'adamw_hf'

learning_rate = 3e-6 # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'cosine'
warmup_ratio = 0.1

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='f1_macro'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 500

# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at snunlp/KR-ELECTRA-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = f'./dataset/{DATA_V}/raw_train.csv'
dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
test_path = f'./dataset/{DATA_V}/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

### new
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]
special_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

tokens2add = special_tokens + emojis

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))
tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame().drop_duplicates()
tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
new_tokens = set(list(new_tokenizer.vocab.keys()) + tokens2add) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(new_tokenizer))
print(len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

30000





3060
30117


Embedding(30117, 768)

In [10]:
print(len(new_tokens))
print(new_tokens)

117
{'ʜ', '💆\u200d♀️', '&social-security-num&', '##💄', '##◍', '##ꈍ', '✌️', '##🕸', '뿤', '👨\u200d👧', '☺️', '##쫜', '&affiliation&', '🙋🏻\u200d♀️', '쫜', '♥️', 'ˇ', 'ᴡ', '🙌🏻', '💬', 'ᴜ', '🕸', '쓩', '🚗', 'ᴍ', '&card-num&', '👠', '➕', 'ʀ', 'ɴ', '뜌', '☝🏻', '〰️', '&online-account&', '🕷', '##ʀ', '🧚\u200d♀️', '##❔', 'ɢ', '👉🏻', '##👠', '🏃\u200d♀️', 'ɪ', '##ᴛ', '##🕷', '##ᴠ', '##뜌', '##➕', '👩\u200d👦', '##💇', '☝️', '👌🏻', '##ᴡ', '💄', '㉦', 'ᴛ', '❣️', '##ɪ', '◍', '챳', '🤡', 'ᴠ', '✔️', '💇🏼\u200d♀️', '쨕', '##ᴍ', '💆🏻\u200d♀️', '👦🏼', 'ᵕ', '❔', '##🥤', '🐄', '🥤', '🍼', '❤️', '##㉦', '&tel-num&', '##ˇ', '🙆\u200d♂️', '⁉️', '✌🏻', '&name&', '##ɴ', 'ꈍ', '&num&', '##ɢ', '🙆🏻', '&bank-account&', '💡', '🙏🏻', '‼️', '👏🏻', '🍷', '👋🏻', '##ᵕ', '🙋\u200d♀️', '⏰', '##죱', '😺', '😯', '🤘🏻', '읒', '🕺', '💪🏻', '💇', '##읒', '##🤡', '##쨕', 'ᴘ', '🙋🏻', '죱', '💆', '##ᴜ', '##💆', '##🚗', 'ғ', '##ᴘ'}


In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [12]:
# entity_property_pair = [
#     '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
#     '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
#     '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
#     '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
# ]
# polarity_id_to_name = ['positive', 'negative', 'neutral']
# tokenizer_tester = []
# for pair in entity_property_pair:
#     for polarity in polarity_id_to_name:
#         tokenizer_tester.append('#'.join([pair, polarity]))
# for e in tokenizer_tester:
#     print(tokenizer.decode(tokenizer.encode(e)))
# for e in tokenizer_tester:
#     print(tokenizer.encode(e))

# Define Metric

In [13]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [14]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [15]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [16]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/75000 [00:00<?, ?ex/s]

  1%|          | 388/75000 [00:00<00:19, 3876.47ex/s]

  1%|          | 777/75000 [00:00<00:19, 3882.61ex/s]

  2%|▏         | 1166/75000 [00:00<00:32, 2258.69ex/s]

  2%|▏         | 1547/75000 [00:00<00:27, 2683.92ex/s]

  3%|▎         | 1922/75000 [00:00<00:24, 2983.98ex/s]

  3%|▎         | 2262/75000 [00:00<00:23, 3048.53ex/s]

  4%|▎         | 2637/75000 [00:00<00:22, 3249.23ex/s]

  4%|▍         | 3000/75000 [00:00<00:22, 3179.01ex/s]

  5%|▍         | 3387/75000 [00:01<00:21, 3372.95ex/s]

  5%|▌         | 3770/75000 [00:01<00:20, 3502.08ex/s]

  6%|▌         | 4130/75000 [00:01<00:20, 3415.56ex/s]

  6%|▌         | 4489/75000 [00:01<00:20, 3463.27ex/s]

  6%|▋         | 4870/75000 [00:01<00:19, 3562.96ex/s]

  7%|▋         | 5231/75000 [00:01<00:20, 3401.54ex/s]

  8%|▊         | 5634/75000 [00:01<00:19, 3579.75ex/s]

  8%|▊         | 6000/75000 [00:01<00:20, 3447.42ex/s]

  9%|▊         | 6398/75000 [00:01<00:19, 3598.10ex/s]

  9%|▉         | 6784/75000 [00:02<00:18, 3672.15ex/s]

 10%|▉         | 7154/75000 [00:02<00:19, 3488.51ex/s]

 10%|█         | 7529/75000 [00:02<00:18, 3561.19ex/s]

 11%|█         | 7935/75000 [00:02<00:18, 3703.07ex/s]

 11%|█         | 8308/75000 [00:02<00:19, 3507.50ex/s]

 12%|█▏        | 8697/75000 [00:02<00:18, 3613.08ex/s]

 12%|█▏        | 9062/75000 [00:02<00:18, 3489.21ex/s]

 13%|█▎        | 9434/75000 [00:02<00:18, 3553.88ex/s]

 13%|█▎        | 9817/75000 [00:02<00:17, 3631.25ex/s]

 14%|█▎        | 10183/75000 [00:02<00:18, 3491.31ex/s]

 14%|█▍        | 10592/75000 [00:03<00:17, 3659.88ex/s]

 15%|█▍        | 11000/75000 [00:03<00:17, 3619.71ex/s]

 15%|█▌        | 11384/75000 [00:03<00:17, 3681.12ex/s]

 16%|█▌        | 11754/75000 [00:03<00:17, 3662.38ex/s]

 16%|█▌        | 12122/75000 [00:03<00:17, 3505.69ex/s]

 17%|█▋        | 12475/75000 [00:03<00:17, 3501.55ex/s]

 17%|█▋        | 12844/75000 [00:03<00:17, 3554.52ex/s]

 18%|█▊        | 13201/75000 [00:03<00:18, 3392.83ex/s]

 18%|█▊        | 13588/75000 [00:03<00:17, 3526.53ex/s]

 19%|█▊        | 13966/75000 [00:04<00:16, 3599.39ex/s]

 19%|█▉        | 14328/75000 [00:04<00:17, 3429.15ex/s]

 20%|█▉        | 14713/75000 [00:04<00:17, 3546.29ex/s]

 20%|██        | 15071/75000 [00:04<00:18, 3285.45ex/s]

 21%|██        | 15454/75000 [00:04<00:17, 3433.70ex/s]

 21%|██        | 15803/75000 [00:04<00:17, 3448.16ex/s]

 22%|██▏       | 16152/75000 [00:04<00:17, 3297.78ex/s]

 22%|██▏       | 16506/75000 [00:04<00:17, 3364.23ex/s]

 23%|██▎       | 16883/75000 [00:04<00:16, 3478.19ex/s]

 23%|██▎       | 17234/75000 [00:05<00:17, 3356.99ex/s]

 23%|██▎       | 17590/75000 [00:05<00:16, 3414.65ex/s]

 24%|██▍       | 17967/75000 [00:05<00:16, 3516.97ex/s]

 24%|██▍       | 18321/75000 [00:05<00:16, 3349.03ex/s]

 25%|██▍       | 18697/75000 [00:05<00:16, 3464.77ex/s]

 25%|██▌       | 19046/75000 [00:05<00:16, 3423.50ex/s]

 26%|██▌       | 19409/75000 [00:05<00:15, 3482.60ex/s]

 26%|██▋       | 19811/75000 [00:05<00:15, 3638.68ex/s]

 27%|██▋       | 20177/75000 [00:05<00:15, 3519.04ex/s]

 27%|██▋       | 20555/75000 [00:05<00:15, 3593.49ex/s]

 28%|██▊       | 20952/75000 [00:06<00:14, 3703.20ex/s]

 28%|██▊       | 21324/75000 [00:06<00:15, 3532.66ex/s]

 29%|██▉       | 21711/75000 [00:06<00:14, 3628.87ex/s]

 29%|██▉       | 22077/75000 [00:06<00:16, 3286.06ex/s]

 30%|██▉       | 22413/75000 [00:06<00:16, 3242.98ex/s]

 30%|███       | 22752/75000 [00:06<00:15, 3283.41ex/s]

 31%|███       | 23085/75000 [00:06<00:16, 3123.02ex/s]

 31%|███       | 23418/75000 [00:06<00:16, 3179.28ex/s]

 32%|███▏      | 23772/75000 [00:06<00:15, 3280.01ex/s]

 32%|███▏      | 24103/75000 [00:07<00:15, 3183.04ex/s]

 33%|███▎      | 24466/75000 [00:07<00:15, 3306.72ex/s]

 33%|███▎      | 24800/75000 [00:07<00:15, 3314.45ex/s]

 34%|███▎      | 25133/75000 [00:07<00:16, 3069.69ex/s]

 34%|███▍      | 25487/75000 [00:07<00:15, 3199.40ex/s]

 34%|███▍      | 25863/75000 [00:07<00:14, 3356.60ex/s]

 35%|███▍      | 26203/75000 [00:07<00:15, 3245.76ex/s]

 35%|███▌      | 26572/75000 [00:07<00:14, 3370.28ex/s]

 36%|███▌      | 26949/75000 [00:07<00:13, 3484.18ex/s]

 36%|███▋      | 27300/75000 [00:08<00:14, 3360.02ex/s]

 37%|███▋      | 27667/75000 [00:08<00:13, 3447.84ex/s]

 37%|███▋      | 28014/75000 [00:08<00:13, 3374.02ex/s]

 38%|███▊      | 28408/75000 [00:08<00:13, 3535.21ex/s]

 38%|███▊      | 28795/75000 [00:08<00:12, 3631.02ex/s]

 39%|███▉      | 29160/75000 [00:08<00:13, 3372.81ex/s]

 39%|███▉      | 29528/75000 [00:08<00:13, 3457.84ex/s]

 40%|███▉      | 29922/75000 [00:08<00:12, 3595.40ex/s]

 40%|████      | 30285/75000 [00:08<00:13, 3436.28ex/s]

 41%|████      | 30669/75000 [00:08<00:12, 3548.24ex/s]

 41%|████▏     | 31027/75000 [00:09<00:13, 3351.27ex/s]

 42%|████▏     | 31431/75000 [00:09<00:12, 3542.11ex/s]

 42%|████▏     | 31814/75000 [00:09<00:11, 3623.26ex/s]

 43%|████▎     | 32180/75000 [00:09<00:12, 3407.50ex/s]

 43%|████▎     | 32552/75000 [00:09<00:12, 3494.12ex/s]

 44%|████▍     | 32918/75000 [00:09<00:11, 3538.65ex/s]

 44%|████▍     | 33275/75000 [00:09<00:12, 3314.69ex/s]

 45%|████▍     | 33638/75000 [00:09<00:12, 3402.23ex/s]

 45%|████▌     | 34000/75000 [00:09<00:12, 3332.70ex/s]

 46%|████▌     | 34378/75000 [00:10<00:11, 3456.40ex/s]

 46%|████▋     | 34759/75000 [00:10<00:11, 3556.41ex/s]

 47%|████▋     | 35118/75000 [00:10<00:11, 3504.10ex/s]

 47%|████▋     | 35487/75000 [00:10<00:11, 3556.92ex/s]

 48%|████▊     | 35876/75000 [00:10<00:10, 3653.36ex/s]

 48%|████▊     | 36243/75000 [00:10<00:11, 3508.06ex/s]

 49%|████▉     | 36613/75000 [00:10<00:10, 3561.55ex/s]

 49%|████▉     | 37000/75000 [00:10<00:11, 3445.47ex/s]

 50%|████▉     | 37387/75000 [00:10<00:10, 3563.11ex/s]

 50%|█████     | 37787/75000 [00:10<00:10, 3688.34ex/s]

 51%|█████     | 38158/75000 [00:11<00:10, 3498.37ex/s]

 51%|█████▏    | 38550/75000 [00:11<00:10, 3614.91ex/s]

 52%|█████▏    | 38915/75000 [00:11<00:09, 3624.88ex/s]

 52%|█████▏    | 39280/75000 [00:11<00:10, 3384.86ex/s]

 53%|█████▎    | 39657/75000 [00:11<00:10, 3491.85ex/s]

 53%|█████▎    | 40010/75000 [00:11<00:10, 3421.00ex/s]

 54%|█████▍    | 40409/75000 [00:11<00:09, 3582.14ex/s]

 54%|█████▍    | 40786/75000 [00:11<00:09, 3635.00ex/s]

 55%|█████▍    | 41152/75000 [00:11<00:09, 3547.24ex/s]

 55%|█████▌    | 41550/75000 [00:12<00:09, 3671.20ex/s]

 56%|█████▌    | 41952/75000 [00:12<00:08, 3770.41ex/s]

 56%|█████▋    | 42331/75000 [00:12<00:09, 3599.56ex/s]

 57%|█████▋    | 42714/75000 [00:12<00:08, 3664.20ex/s]

 57%|█████▋    | 43083/75000 [00:12<00:09, 3546.15ex/s]

 58%|█████▊    | 43465/75000 [00:12<00:08, 3623.61ex/s]

 58%|█████▊    | 43863/75000 [00:12<00:08, 3725.23ex/s]

 59%|█████▉    | 44238/75000 [00:12<00:08, 3516.39ex/s]

 59%|█████▉    | 44612/75000 [00:12<00:08, 3579.27ex/s]

 60%|█████▉    | 44973/75000 [00:13<00:08, 3565.15ex/s]

 60%|██████    | 45332/75000 [00:13<00:08, 3406.15ex/s]

 61%|██████    | 45742/75000 [00:13<00:08, 3600.79ex/s]

 61%|██████▏   | 46105/75000 [00:13<00:08, 3497.27ex/s]

 62%|██████▏   | 46499/75000 [00:13<00:07, 3620.88ex/s]

 62%|██████▏   | 46873/75000 [00:13<00:07, 3654.50ex/s]

 63%|██████▎   | 47241/75000 [00:13<00:07, 3558.34ex/s]

 64%|██████▎   | 47635/75000 [00:13<00:07, 3667.79ex/s]

 64%|██████▍   | 48004/75000 [00:13<00:07, 3531.05ex/s]

 65%|██████▍   | 48395/75000 [00:13<00:07, 3637.74ex/s]

 65%|██████▌   | 48785/75000 [00:14<00:07, 3712.03ex/s]

 66%|██████▌   | 49158/75000 [00:14<00:07, 3586.69ex/s]

 66%|██████▌   | 49560/75000 [00:14<00:06, 3710.06ex/s]

 67%|██████▋   | 49943/75000 [00:14<00:06, 3744.59ex/s]

 67%|██████▋   | 50319/75000 [00:14<00:06, 3611.15ex/s]

 68%|██████▊   | 50712/75000 [00:14<00:06, 3702.28ex/s]

 68%|██████▊   | 51084/75000 [00:14<00:06, 3634.05ex/s]

 69%|██████▊   | 51487/75000 [00:14<00:06, 3748.33ex/s]

 69%|██████▉   | 51895/75000 [00:14<00:06, 3844.18ex/s]

 70%|██████▉   | 52281/75000 [00:15<00:06, 3656.68ex/s]

 70%|███████   | 52664/75000 [00:15<00:06, 3703.80ex/s]

 71%|███████   | 53037/75000 [00:15<00:06, 3540.62ex/s]

 71%|███████   | 53424/75000 [00:15<00:05, 3632.47ex/s]

 72%|███████▏  | 53823/75000 [00:15<00:05, 3734.03ex/s]

 72%|███████▏  | 54199/75000 [00:15<00:05, 3560.74ex/s]

 73%|███████▎  | 54595/75000 [00:15<00:05, 3671.17ex/s]

 73%|███████▎  | 55000/75000 [00:15<00:05, 3588.17ex/s]

 74%|███████▍  | 55399/75000 [00:15<00:05, 3699.39ex/s]

 74%|███████▍  | 55803/75000 [00:15<00:05, 3795.25ex/s]

 75%|███████▍  | 56185/75000 [00:16<00:05, 3602.13ex/s]

 75%|███████▌  | 56579/75000 [00:16<00:04, 3696.35ex/s]

 76%|███████▌  | 56952/75000 [00:16<00:04, 3705.74ex/s]

 76%|███████▋  | 57325/75000 [00:16<00:04, 3556.40ex/s]

 77%|███████▋  | 57733/75000 [00:16<00:04, 3704.77ex/s]

 77%|███████▋  | 58106/75000 [00:16<00:04, 3378.95ex/s]

 78%|███████▊  | 58511/75000 [00:16<00:04, 3560.34ex/s]

 79%|███████▊  | 58921/75000 [00:16<00:04, 3709.36ex/s]

 79%|███████▉  | 59298/75000 [00:16<00:04, 3563.63ex/s]

 80%|███████▉  | 59669/75000 [00:17<00:04, 3602.22ex/s]

 80%|████████  | 60033/75000 [00:17<00:04, 3447.02ex/s]

 81%|████████  | 60417/75000 [00:17<00:04, 3555.61ex/s]

 81%|████████  | 60776/75000 [00:17<00:04, 3538.09ex/s]

 82%|████████▏ | 61132/75000 [00:17<00:04, 3382.89ex/s]

 82%|████████▏ | 61496/75000 [00:17<00:03, 3454.69ex/s]

 82%|████████▏ | 61848/75000 [00:17<00:03, 3469.58ex/s]

 83%|████████▎ | 62197/75000 [00:17<00:03, 3325.88ex/s]

 83%|████████▎ | 62583/75000 [00:17<00:03, 3477.12ex/s]

 84%|████████▍ | 62939/75000 [00:17<00:03, 3498.93ex/s]

 84%|████████▍ | 63291/75000 [00:18<00:03, 3247.57ex/s]

 85%|████████▍ | 63656/75000 [00:18<00:03, 3359.13ex/s]

 85%|████████▌ | 64000/75000 [00:18<00:03, 3228.60ex/s]

 86%|████████▌ | 64372/75000 [00:18<00:03, 3365.89ex/s]

 86%|████████▋ | 64756/75000 [00:18<00:02, 3499.88ex/s]

 87%|████████▋ | 65110/75000 [00:18<00:02, 3422.07ex/s]

 87%|████████▋ | 65508/75000 [00:18<00:02, 3578.36ex/s]

 88%|████████▊ | 65869/75000 [00:18<00:02, 3333.58ex/s]

 88%|████████▊ | 66207/75000 [00:19<00:02, 3108.94ex/s]

 89%|████████▊ | 66537/75000 [00:19<00:02, 3159.44ex/s]

 89%|████████▉ | 66882/75000 [00:19<00:02, 3238.32ex/s]

 90%|████████▉ | 67210/75000 [00:19<00:02, 2977.34ex/s]

 90%|█████████ | 67575/75000 [00:19<00:02, 3159.58ex/s]

 91%|█████████ | 67959/75000 [00:19<00:02, 3347.71ex/s]

 91%|█████████ | 68300/75000 [00:19<00:02, 2578.17ex/s]

 92%|█████████▏| 68678/75000 [00:19<00:02, 2863.43ex/s]

 92%|█████████▏| 69000/75000 [00:19<00:02, 2833.29ex/s]

 92%|█████████▏| 69316/75000 [00:20<00:01, 2915.46ex/s]

 93%|█████████▎| 69658/75000 [00:20<00:01, 3051.06ex/s]

 93%|█████████▎| 70000/75000 [00:20<00:01, 2821.54ex/s]

 94%|█████████▎| 70295/75000 [00:20<00:01, 2707.87ex/s]

 94%|█████████▍| 70672/75000 [00:20<00:01, 2985.64ex/s]

 95%|█████████▍| 70981/75000 [00:20<00:01, 2803.32ex/s]

 95%|█████████▌| 71270/75000 [00:20<00:01, 2684.17ex/s]

 95%|█████████▌| 71582/75000 [00:20<00:01, 2798.24ex/s]

 96%|█████████▌| 71951/75000 [00:20<00:01, 3042.06ex/s]

 96%|█████████▋| 72262/75000 [00:21<00:00, 2996.68ex/s]

 97%|█████████▋| 72615/75000 [00:21<00:00, 3146.34ex/s]

 97%|█████████▋| 72968/75000 [00:21<00:00, 3254.82ex/s]

 98%|█████████▊| 73297/75000 [00:21<00:00, 3137.61ex/s]

 98%|█████████▊| 73634/75000 [00:21<00:00, 3203.27ex/s]

 99%|█████████▊| 74000/75000 [00:21<00:00, 3230.81ex/s]

 99%|█████████▉| 74380/75000 [00:21<00:00, 3392.42ex/s]

100%|█████████▉| 74784/75000 [00:21<00:00, 3578.88ex/s]

100%|██████████| 75000/75000 [00:21<00:00, 3428.15ex/s]




  0%|          | 0/69825 [00:00<?, ?ex/s]

  1%|          | 374/69825 [00:00<00:18, 3735.14ex/s]

  1%|          | 748/69825 [00:00<00:18, 3682.91ex/s]

  2%|▏         | 1117/69825 [00:00<00:20, 3419.83ex/s]

  2%|▏         | 1509/69825 [00:00<00:18, 3605.54ex/s]

  3%|▎         | 1882/69825 [00:00<00:18, 3645.93ex/s]

  3%|▎         | 2249/69825 [00:00<00:19, 3516.60ex/s]

  4%|▎         | 2610/69825 [00:00<00:18, 3543.88ex/s]

  4%|▍         | 2989/69825 [00:00<00:18, 3616.84ex/s]

  5%|▍         | 3352/69825 [00:00<00:19, 3404.03ex/s]

  5%|▌         | 3734/69825 [00:01<00:18, 3523.56ex/s]

  6%|▌         | 4090/69825 [00:01<00:19, 3398.86ex/s]

  6%|▋         | 4483/69825 [00:01<00:18, 3550.68ex/s]

  7%|▋         | 4852/69825 [00:01<00:18, 3590.42ex/s]

  7%|▋         | 5213/69825 [00:01<00:18, 3422.38ex/s]

  8%|▊         | 5575/69825 [00:01<00:18, 3478.22ex/s]

  9%|▊         | 5945/69825 [00:01<00:18, 3540.50ex/s]

  9%|▉         | 6301/69825 [00:01<00:18, 3396.55ex/s]

 10%|▉         | 6670/69825 [00:01<00:18, 3477.97ex/s]

 10%|█         | 7020/69825 [00:02<00:18, 3364.99ex/s]

 11%|█         | 7394/69825 [00:02<00:17, 3470.31ex/s]

 11%|█         | 7760/69825 [00:02<00:17, 3523.43ex/s]

 12%|█▏        | 8114/69825 [00:02<00:18, 3374.66ex/s]

 12%|█▏        | 8506/69825 [00:02<00:17, 3527.75ex/s]

 13%|█▎        | 8905/69825 [00:02<00:16, 3661.17ex/s]

 13%|█▎        | 9274/69825 [00:02<00:16, 3570.27ex/s]

 14%|█▍        | 9675/69825 [00:02<00:16, 3695.57ex/s]

 14%|█▍        | 10047/69825 [00:02<00:17, 3501.22ex/s]

 15%|█▍        | 10419/69825 [00:02<00:16, 3562.02ex/s]

 15%|█▌        | 10789/69825 [00:03<00:16, 3600.38ex/s]

 16%|█▌        | 11151/69825 [00:03<00:17, 3403.14ex/s]

 16%|█▋        | 11496/69825 [00:03<00:17, 3415.89ex/s]

 17%|█▋        | 11870/69825 [00:03<00:16, 3506.31ex/s]

 18%|█▊        | 12223/69825 [00:03<00:17, 3357.08ex/s]

 18%|█▊        | 12580/69825 [00:03<00:16, 3415.41ex/s]

 19%|█▊        | 12952/69825 [00:03<00:16, 3500.29ex/s]

 19%|█▉        | 13304/69825 [00:03<00:16, 3343.25ex/s]

 20%|█▉        | 13664/69825 [00:03<00:16, 3413.98ex/s]

 20%|██        | 14008/69825 [00:04<00:17, 3223.55ex/s]

 21%|██        | 14372/69825 [00:04<00:16, 3339.11ex/s]

 21%|██        | 14721/69825 [00:04<00:16, 3380.25ex/s]

 22%|██▏       | 15062/69825 [00:04<00:16, 3246.89ex/s]

 22%|██▏       | 15429/69825 [00:04<00:16, 3366.21ex/s]

 23%|██▎       | 15799/69825 [00:04<00:15, 3462.00ex/s]

 23%|██▎       | 16148/69825 [00:04<00:16, 3335.52ex/s]

 24%|██▎       | 16511/69825 [00:04<00:15, 3413.25ex/s]

 24%|██▍       | 16876/69825 [00:04<00:15, 3481.26ex/s]

 25%|██▍       | 17226/69825 [00:04<00:15, 3389.85ex/s]

 25%|██▌       | 17629/69825 [00:05<00:14, 3574.57ex/s]

 26%|██▌       | 18000/69825 [00:05<00:14, 3475.13ex/s]

 26%|██▋       | 18384/69825 [00:05<00:14, 3577.33ex/s]

 27%|██▋       | 18780/69825 [00:05<00:13, 3685.45ex/s]

 27%|██▋       | 19151/69825 [00:05<00:14, 3426.86ex/s]

 28%|██▊       | 19525/69825 [00:05<00:14, 3514.24ex/s]

 29%|██▊       | 19944/69825 [00:05<00:13, 3706.01ex/s]

 29%|██▉       | 20319/69825 [00:05<00:14, 3476.80ex/s]

 30%|██▉       | 20675/69825 [00:05<00:14, 3497.82ex/s]

 30%|███       | 21029/69825 [00:06<00:14, 3422.39ex/s]

 31%|███       | 21417/69825 [00:06<00:13, 3551.99ex/s]

 31%|███▏      | 21825/69825 [00:06<00:12, 3702.69ex/s]

 32%|███▏      | 22198/69825 [00:06<00:13, 3538.65ex/s]

 32%|███▏      | 22573/69825 [00:06<00:13, 3597.79ex/s]

 33%|███▎      | 22983/69825 [00:06<00:12, 3740.61ex/s]

 33%|███▎      | 23360/69825 [00:06<00:13, 3559.60ex/s]

 34%|███▍      | 23732/69825 [00:06<00:12, 3602.87ex/s]

 35%|███▍      | 24095/69825 [00:06<00:13, 3434.39ex/s]

 35%|███▌      | 24469/69825 [00:07<00:12, 3518.68ex/s]

 36%|███▌      | 24851/69825 [00:07<00:12, 3601.75ex/s]

 36%|███▌      | 25214/69825 [00:07<00:13, 3196.75ex/s]

 37%|███▋      | 25543/69825 [00:07<00:14, 3133.01ex/s]

 37%|███▋      | 25863/69825 [00:07<00:14, 3128.51ex/s]

 37%|███▋      | 26181/69825 [00:07<00:14, 2988.41ex/s]

 38%|███▊      | 26504/69825 [00:07<00:14, 3052.25ex/s]

 38%|███▊      | 26817/69825 [00:07<00:13, 3073.19ex/s]

 39%|███▉      | 27127/69825 [00:07<00:15, 2845.30ex/s]

 39%|███▉      | 27427/69825 [00:08<00:14, 2886.26ex/s]

 40%|███▉      | 27740/69825 [00:08<00:14, 2953.91ex/s]

 40%|████      | 28039/69825 [00:08<00:14, 2926.76ex/s]

 41%|████      | 28413/69825 [00:08<00:13, 3160.65ex/s]

 41%|████      | 28771/69825 [00:08<00:12, 3280.61ex/s]

 42%|████▏     | 29102/69825 [00:08<00:13, 3079.88ex/s]

 42%|████▏     | 29459/69825 [00:08<00:12, 3218.12ex/s]

 43%|████▎     | 29813/69825 [00:08<00:12, 3309.54ex/s]

 43%|████▎     | 30147/69825 [00:08<00:12, 3208.47ex/s]

 44%|████▎     | 30536/69825 [00:08<00:11, 3401.96ex/s]

 44%|████▍     | 30935/69825 [00:09<00:10, 3571.18ex/s]

 45%|████▍     | 31295/69825 [00:09<00:11, 3445.66ex/s]

 45%|████▌     | 31680/69825 [00:09<00:10, 3561.32ex/s]

 46%|████▌     | 32039/69825 [00:09<00:11, 3420.72ex/s]

 46%|████▋     | 32414/69825 [00:09<00:10, 3512.41ex/s]

 47%|████▋     | 32787/69825 [00:09<00:10, 3573.29ex/s]

 47%|████▋     | 33147/69825 [00:09<00:10, 3448.66ex/s]

 48%|████▊     | 33522/69825 [00:09<00:10, 3532.79ex/s]

 49%|████▊     | 33901/69825 [00:09<00:09, 3605.43ex/s]

 49%|████▉     | 34263/69825 [00:10<00:10, 3428.83ex/s]

 50%|████▉     | 34637/69825 [00:10<00:10, 3515.36ex/s]

 50%|█████     | 35000/69825 [00:10<00:10, 3335.61ex/s]

 51%|█████     | 35381/69825 [00:10<00:09, 3467.38ex/s]

 51%|█████     | 35764/69825 [00:10<00:09, 3569.69ex/s]

 52%|█████▏    | 36124/69825 [00:10<00:09, 3450.95ex/s]

 52%|█████▏    | 36521/69825 [00:10<00:09, 3597.54ex/s]

 53%|█████▎    | 36896/69825 [00:10<00:09, 3640.83ex/s]

 53%|█████▎    | 37262/69825 [00:10<00:09, 3491.60ex/s]

 54%|█████▍    | 37654/69825 [00:10<00:08, 3611.71ex/s]

 54%|█████▍    | 38018/69825 [00:11<00:09, 3518.59ex/s]

 55%|█████▌    | 38419/69825 [00:11<00:08, 3658.43ex/s]

 56%|█████▌    | 38808/69825 [00:11<00:08, 3725.49ex/s]

 56%|█████▌    | 39183/69825 [00:11<00:08, 3653.86ex/s]

 57%|█████▋    | 39567/69825 [00:11<00:08, 3707.70ex/s]

 57%|█████▋    | 39950/69825 [00:11<00:07, 3740.69ex/s]

 58%|█████▊    | 40325/69825 [00:11<00:08, 3601.06ex/s]

 58%|█████▊    | 40721/69825 [00:11<00:07, 3701.42ex/s]

 59%|█████▉    | 41093/69825 [00:11<00:08, 3471.90ex/s]

 59%|█████▉    | 41444/69825 [00:12<00:08, 3480.76ex/s]

 60%|█████▉    | 41802/69825 [00:12<00:07, 3507.50ex/s]

 60%|██████    | 42155/69825 [00:12<00:08, 3311.51ex/s]

 61%|██████    | 42554/69825 [00:12<00:07, 3499.84ex/s]

 62%|██████▏   | 42954/69825 [00:12<00:07, 3642.33ex/s]

 62%|██████▏   | 43322/69825 [00:12<00:07, 3516.93ex/s]

 63%|██████▎   | 43733/69825 [00:12<00:07, 3683.97ex/s]

 63%|██████▎   | 44105/69825 [00:12<00:07, 3548.92ex/s]

 64%|██████▎   | 44502/69825 [00:12<00:06, 3667.39ex/s]

 64%|██████▍   | 44893/69825 [00:12<00:06, 3734.91ex/s]

 65%|██████▍   | 45269/69825 [00:13<00:06, 3624.68ex/s]

 65%|██████▌   | 45651/69825 [00:13<00:06, 3680.16ex/s]

 66%|██████▌   | 46021/69825 [00:13<00:06, 3533.57ex/s]

 66%|██████▋   | 46412/69825 [00:13<00:06, 3640.44ex/s]

 67%|██████▋   | 46797/69825 [00:13<00:06, 3698.84ex/s]

 68%|██████▊   | 47169/69825 [00:13<00:06, 3515.49ex/s]

 68%|██████▊   | 47564/69825 [00:13<00:06, 3636.78ex/s]

 69%|██████▊   | 47939/69825 [00:13<00:05, 3668.86ex/s]

 69%|██████▉   | 48308/69825 [00:13<00:06, 3531.81ex/s]

 70%|██████▉   | 48713/69825 [00:14<00:05, 3677.26ex/s]

 70%|███████   | 49083/69825 [00:14<00:05, 3542.38ex/s]

 71%|███████   | 49445/69825 [00:14<00:05, 3563.72ex/s]

 71%|███████▏  | 49828/69825 [00:14<00:05, 3638.18ex/s]

 72%|███████▏  | 50194/69825 [00:14<00:05, 3508.45ex/s]

 72%|███████▏  | 50595/69825 [00:14<00:05, 3651.67ex/s]

 73%|███████▎  | 51000/69825 [00:14<00:05, 3614.30ex/s]

 74%|███████▎  | 51388/69825 [00:14<00:05, 3686.77ex/s]

 74%|███████▍  | 51785/69825 [00:14<00:04, 3768.44ex/s]

 75%|███████▍  | 52164/69825 [00:14<00:04, 3642.10ex/s]

 75%|███████▌  | 52547/69825 [00:15<00:04, 3693.77ex/s]

 76%|███████▌  | 52925/69825 [00:15<00:04, 3717.05ex/s]

 76%|███████▋  | 53298/69825 [00:15<00:04, 3511.04ex/s]

 77%|███████▋  | 53678/69825 [00:15<00:04, 3592.78ex/s]

 77%|███████▋  | 54040/69825 [00:15<00:04, 3514.93ex/s]

 78%|███████▊  | 54437/69825 [00:15<00:04, 3644.96ex/s]

 79%|███████▊  | 54835/69825 [00:15<00:04, 3740.37ex/s]

 79%|███████▉  | 55211/69825 [00:15<00:04, 3592.72ex/s]

 80%|███████▉  | 55615/69825 [00:15<00:03, 3718.82ex/s]

 80%|████████  | 55992/69825 [00:16<00:03, 3731.43ex/s]

 81%|████████  | 56367/69825 [00:16<00:03, 3540.77ex/s]

 81%|████████▏ | 56749/69825 [00:16<00:03, 3618.99ex/s]

 82%|████████▏ | 57114/69825 [00:16<00:03, 3472.39ex/s]

 82%|████████▏ | 57487/69825 [00:16<00:03, 3543.25ex/s]

 83%|████████▎ | 57873/69825 [00:16<00:03, 3633.58ex/s]

 83%|████████▎ | 58239/69825 [00:16<00:03, 3248.50ex/s]

 84%|████████▍ | 58609/69825 [00:16<00:03, 3369.83ex/s]

 84%|████████▍ | 58995/69825 [00:16<00:03, 3504.70ex/s]

 85%|████████▌ | 59352/69825 [00:17<00:03, 3339.65ex/s]

 86%|████████▌ | 59709/69825 [00:17<00:02, 3402.36ex/s]

 86%|████████▌ | 60054/69825 [00:17<00:03, 2583.57ex/s]

 87%|████████▋ | 60445/69825 [00:17<00:03, 2893.56ex/s]

 87%|████████▋ | 60824/69825 [00:17<00:02, 3118.57ex/s]

 88%|████████▊ | 61162/69825 [00:17<00:02, 3119.61ex/s]

 88%|████████▊ | 61554/69825 [00:17<00:02, 3334.76ex/s]

 89%|████████▊ | 61967/69825 [00:17<00:02, 3555.72ex/s]

 89%|████████▉ | 62335/69825 [00:17<00:02, 3428.95ex/s]

 90%|████████▉ | 62715/69825 [00:18<00:02, 3532.56ex/s]

 90%|█████████ | 63076/69825 [00:18<00:01, 3402.03ex/s]

 91%|█████████ | 63453/69825 [00:18<00:01, 3504.66ex/s]

 91%|█████████▏| 63838/69825 [00:18<00:01, 3602.03ex/s]

 92%|█████████▏| 64202/69825 [00:18<00:01, 3386.62ex/s]

 92%|█████████▏| 64555/69825 [00:18<00:01, 3425.92ex/s]

 93%|█████████▎| 64942/69825 [00:18<00:01, 3551.80ex/s]

 94%|█████████▎| 65301/69825 [00:18<00:01, 3447.24ex/s]

 94%|█████████▍| 65684/69825 [00:18<00:01, 3556.22ex/s]

 95%|█████████▍| 66043/69825 [00:19<00:01, 3189.58ex/s]

 95%|█████████▌| 66371/69825 [00:19<00:01, 3134.93ex/s]

 96%|█████████▌| 66691/69825 [00:19<00:01, 3108.49ex/s]

 96%|█████████▌| 67006/69825 [00:19<00:00, 2854.47ex/s]

 96%|█████████▋| 67343/69825 [00:19<00:00, 2989.86ex/s]

 97%|█████████▋| 67659/69825 [00:19<00:00, 3035.87ex/s]

 97%|█████████▋| 67974/69825 [00:19<00:00, 3066.44ex/s]

 98%|█████████▊| 68284/69825 [00:19<00:00, 2802.50ex/s]

 98%|█████████▊| 68636/69825 [00:19<00:00, 2996.98ex/s]

 99%|█████████▊| 68942/69825 [00:20<00:00, 2943.87ex/s]

 99%|█████████▉| 69241/69825 [00:20<00:00, 2909.91ex/s]

100%|█████████▉| 69568/69825 [00:20<00:00, 3011.41ex/s]

100%|██████████| 69825/69825 [00:20<00:00, 3436.42ex/s]




In [17]:
len(train_dataset), len(eval_dataset)

(75000, 69825)

In [18]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] < < # 핫팩추천 > > [SEP] 상품평 문장의 대범주 유형은 < < 브랜드 > > 이고 소범주 유형은 < < 디자인 > > 이다. [SEP] 1


[CLS] < < # 짱좋아 > > [SEP] 상품평 문장의 대범주 유형은 < < 본품 > > 이고 소범주 유형은 < < 가격 > > 이다. [SEP] 1


# Load Trainer

In [19]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [20]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [21]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [22]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 75000


  Num Epochs = 20


  Instantaneous batch size per device = 25


  Total train batch size (w. parallel, distributed & accumulation) = 100


  Gradient Accumulation steps = 1


  Total optimization steps = 15000


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.3896,0.15654,0.957007,0.0,0.978031,0.489016,0.957007
2,0.1262,0.112611,0.96348,0.394011,0.981173,0.687592,0.96348
3,0.1133,0.101988,0.9657,0.52433,0.982209,0.753269,0.9657
4,0.0984,0.090367,0.96875,0.615574,0.983713,0.799644,0.96875
5,0.0902,0.081059,0.97193,0.633919,0.985405,0.809662,0.97193
6,0.0797,0.075756,0.972531,0.646647,0.98571,0.816179,0.972531
7,0.0747,0.076712,0.971285,0.659766,0.98501,0.822388,0.971285
8,0.0688,0.07766,0.973047,0.670172,0.985949,0.828061,0.973047
9,0.0656,0.080574,0.970054,0.662688,0.984331,0.823509,0.970054
10,0.0631,0.077417,0.972975,0.676607,0.985898,0.831253,0.972975


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-750/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-1500


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-1500/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-1500/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-1500/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-1500/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-2250


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-2250/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-2250/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-2250/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-2250/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-750] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3000


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3000/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3000/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3000/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3000/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-1500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3750/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-2250] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-4500


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-4500/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-4500/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-4500/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-4500/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-5250


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-5250/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-5250/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-5250/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-5250/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-3750] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6000


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6000/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6000/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6000/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6000/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-4500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6750/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-5250] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-7500


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-7500/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-7500/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-7500/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-7500/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-8250


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-8250/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-8250/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-8250/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-8250/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-6750] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9000


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9000/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9000/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9000/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9000/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-7500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9750/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-8250] due to args.save_total_limit




wandb: Network error (ReadTimeout), entering retry loop.


wandb: Network error (ReadTimeout), entering retry loop.


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-10500


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-10500/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-10500/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-10500/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-10500/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9750] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-11250


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-11250/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-11250/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-11250/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-11250/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-10500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12000


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12000/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12000/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12000/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12000/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-11250] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12750/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-13500


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-13500/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-13500/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-13500/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-13500/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-9000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-14250


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-14250/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-14250/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-14250/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-14250/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-12750] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: form, pair, id. If form, pair, id are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-15000


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-15000/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-15000/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-15000/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-15000/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-14250] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from snunlp_kr_electra_discriminator_uncleaned_v2/checkpoint-13500 (score: 0.837884833537545).


Saving model checkpoint to /tmp/tmp7s6d2ufv


Configuration saved in /tmp/tmp7s6d2ufv/config.json


Model weights saved in /tmp/tmp7s6d2ufv/pytorch_model.bin


tokenizer config file saved in /tmp/tmp7s6d2ufv/tokenizer_config.json


Special tokens file saved in /tmp/tmp7s6d2ufv/special_tokens_map.json


0,1
eval/accuracy,▁▄▅▆▇▇▇█▆███▇███████
eval/f1_false,▁▄▄▆▇▇▇█▆▇██▇███████
eval/f1_macro,▁▅▆▇▇███████████████
eval/f1_micro,▁▄▅▆▇▇▇█▆███▇███████
eval/f1_true,▁▅▆▇▇███████████████
eval/loss,█▄▃▂▂▁▁▁▂▁▁▁▁▁▂▂▂▂▂▂
eval/runtime,▁▄▅▆▆▅▆▇▅▆▆█▆▆▇▃▅▆▇▅
eval/samples_per_second,█▄▄▃▃▄▃▂▄▃▃▁▃▃▂▆▄▃▂▄
eval/steps_per_second,█▅▄▃▃▄▃▂▄▃▃▁▃▃▂▆▄▃▂▄
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████

0,1
eval/accuracy,0.97359
eval/f1_false,0.98622
eval/f1_macro,0.83582
eval/f1_micro,0.97359
eval/f1_true,0.68543
eval/loss,0.08153
eval/runtime,238.0755
eval/samples_per_second,293.289
eval/steps_per_second,2.936
train/epoch,20.0


In [23]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
