# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    ElectraTokenizer, ElectraForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v6'

DATA_V = 'uncleaned_v6'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'monologg/koelectra-base-v3-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/monologg_koelectra_base_v3_discriminator_uncleaned_v6/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/uncleaned_v6/ce_train.csv exists.
./dataset/uncleaned_v6/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 20
batch_size = 25 * 2
gradient_accumulation_steps = 1

optim = 'adamw_hf' # 'adamw_torch'

learning_rate = 3e-6 / 8 * batch_size * 4 # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'cosine'
warmup_ratio = 0

save_total_limit = 2

load_best_model_at_end = True
metric_for_best_model ='eval_loss'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 5

# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = ElectraTokenizer.from_pretrained(model_checkpoint)
model = ElectraForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = f'./dataset/{DATA_V}/raw_train.csv'
dev_path = f'./dataset/{DATA_V}/raw_dev.csv'
test_path = f'./dataset/{DATA_V}/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

### new
entity_property_pair = [
    '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
    '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
    '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
    '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
]
special_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

tokens2add = special_tokens + emojis

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))
tokenizer_train_data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame().drop_duplicates()
tokenizer_train_data = tokenizer_train_data.sentence_form.to_list()
new_tokenizer = tokenizer.train_new_from_iterator(tokenizer_train_data, vocab_size=1)
new_tokens = set(list(new_tokenizer.vocab.keys()) + tokens2add) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens))
print(len(new_tokenizer))
print(len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

35000





3060
35254


Embedding(35254, 768)

In [10]:
print(len(new_tokens))
print(new_tokens)

254
{'😴', '&bank-account&', '##🐥', '👨\u200d👧', 'ᴇ', '❔', 'ɴ', '🌝', '💡', '##ˇ', '‼️', '##ɴ', '☺️', '##펏', '͈', '##◍', '##♬', '👨', '##💞', '뜌', '📸', '&online-account&', '##💯', '😡', '##촥', '&social-security-num&', '❣️', '🤘🏻', 'ᴏ', '☝🏻', '앝', '🍎', '〰️', 'ᴠ', '🙋🏻\u200d♀️', '😮', '💝', '🍼', '🏃\u200d♀️', 'ᴜ', '죱', '웻', '##닠', '##쨕', '##ɢ', '##🧚', '##˃', '💯', '👦', '👋🏻', '##😮', '💆🏻\u200d♀️', '츌', '💇🏼\u200d♀️', '젔', '##읒', '💄', '##🍎', '˚', '♬', '닼', '🕺', '✌️', '##앝', '밪', '🐱', '##잍', '🧚\u200d♀️', '🔸', '🥤', '##듕', '꺠', '♥️', '##ᴜ', '🎵', 'ᴍ', '띡', '💇', '👉🏻', 'ꈍ', '##쫜', '##◡', '˃', '##💋', '🍷', '&affiliation&', 'ʜ', '##❔', '##➕', '🐥', '😬', '##ᴠ', '##👌', '🙋\u200d♀️', '##🎵', '##ꈍ', '##ᴡ', '👏🏻', '😯', 'ɢ', '&tel-num&', '챳', '##챦', '졓', 'ᵕ', '🖒', '➰', '똭', '➕', '𖤐', '퐉', '🧚', '곘', '##➰', '♩', '##ᴏ', '🙌🏻', '㉦', '##🌹', '🤘', '🌻', '⁉', '💆\u200d♀️', '##ᴘ', '챦', '귯', '👆', '👩\u200d👦', 'ᴗ', '🕸', '😲', '##귯', '💆', '컄', '##젔', '##뜌', '🎀', '##💇', '##🎂', '##♩', '✔️', '쨕', '👌', 'ღ', '💬', '##♪', '##😶', '##ᴀ', '##🕸', '핡',

In [11]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [12]:
# entity_property_pair = [
#     '본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질',
#     '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질',
#     '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질',
#     '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질'
# ]
# polarity_id_to_name = ['positive', 'negative', 'neutral']
# tokenizer_tester = []
# for pair in entity_property_pair:
#     for polarity in polarity_id_to_name:
#         tokenizer_tester.append('#'.join([pair, polarity]))
# for e in tokenizer_tester:
#     print(tokenizer.decode(tokenizer.encode(e)))
# for e in tokenizer_tester:
#     print(tokenizer.encode(e))

# Define Metric

In [13]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [14]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [15]:
def preprocess_function(examples):
    return tokenizer(examples["form"], examples["pair"], truncation=True)

In [16]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)


  0%|          | 0/75000 [00:00<?, ?ex/s]


  1%|          | 480/75000 [00:00<00:15, 4797.32ex/s]


  1%|▏         | 960/75000 [00:00<00:15, 4783.55ex/s]


  2%|▏         | 1439/75000 [00:00<00:17, 4220.87ex/s]


  3%|▎         | 1893/75000 [00:00<00:16, 4334.91ex/s]


  3%|▎         | 2332/75000 [00:00<00:17, 4208.82ex/s]


  4%|▎         | 2757/75000 [00:00<00:28, 2543.65ex/s]


  4%|▍         | 3141/75000 [00:00<00:25, 2823.97ex/s]


  5%|▍         | 3609/75000 [00:01<00:21, 3260.96ex/s]


  5%|▌         | 4000/75000 [00:01<00:20, 3387.84ex/s]


  6%|▌         | 4438/75000 [00:01<00:19, 3647.11ex/s]


  7%|▋         | 4901/75000 [00:01<00:17, 3912.55ex/s]


  7%|▋         | 5320/75000 [00:01<00:17, 3948.95ex/s]


  8%|▊         | 5844/75000 [00:01<00:16, 4314.69ex/s]


  8%|▊         | 6292/75000 [00:01<00:15, 4338.03ex/s]


  9%|▉         | 6763/75000 [00:01<00:15, 4443.01ex/s]


 10%|▉         | 7216/75000 [00:01<00:16, 4135.49ex/s]


 10%|█         | 7669/75000 [00:01<00:15, 4245.18ex/s]


 11%|█         | 8102/75000 [00:02<00:16, 4149.38ex/s]


 11%|█▏        | 8597/75000 [00:02<00:15, 4375.53ex/s]


 12%|█▏        | 9046/75000 [00:02<00:14, 4406.14ex/s]


 13%|█▎        | 9491/75000 [00:02<00:14, 4370.99ex/s]


 13%|█▎        | 9964/75000 [00:02<00:14, 4474.24ex/s]


 14%|█▍        | 10414/75000 [00:02<00:14, 4356.36ex/s]


 15%|█▍        | 10960/75000 [00:02<00:13, 4674.41ex/s]


 15%|█▌        | 11430/75000 [00:02<00:14, 4502.75ex/s]


 16%|█▌        | 11911/75000 [00:02<00:13, 4590.18ex/s]


 16%|█▋        | 12373/75000 [00:03<00:14, 4334.68ex/s]


 17%|█▋        | 12811/75000 [00:03<00:14, 4290.39ex/s]


 18%|█▊        | 13243/75000 [00:03<00:15, 4030.97ex/s]


 18%|█▊        | 13695/75000 [00:03<00:14, 4163.81ex/s]


 19%|█▉        | 14116/75000 [00:03<00:14, 4064.21ex/s]


 19%|█▉        | 14608/75000 [00:03<00:14, 4305.33ex/s]


 20%|██        | 15043/75000 [00:03<00:14, 4069.29ex/s]


 21%|██        | 15484/75000 [00:03<00:14, 4163.84ex/s]


 21%|██        | 15916/75000 [00:03<00:14, 4206.23ex/s]


 22%|██▏       | 16340/75000 [00:04<00:14, 3966.27ex/s]


 22%|██▏       | 16773/75000 [00:04<00:14, 4067.64ex/s]


 23%|██▎       | 17184/75000 [00:04<00:14, 4046.74ex/s]


 24%|██▎       | 17634/75000 [00:04<00:13, 4177.15ex/s]


 24%|██▍       | 18055/75000 [00:04<00:14, 4023.18ex/s]


 25%|██▍       | 18489/75000 [00:04<00:13, 4112.58ex/s]


 25%|██▌       | 18957/75000 [00:04<00:13, 4274.86ex/s]


 26%|██▌       | 19387/75000 [00:04<00:13, 4100.82ex/s]


 27%|██▋       | 19913/75000 [00:04<00:12, 4431.06ex/s]


 27%|██▋       | 20389/75000 [00:04<00:12, 4525.99ex/s]


 28%|██▊       | 20882/75000 [00:05<00:11, 4642.21ex/s]


 28%|██▊       | 21349/75000 [00:05<00:12, 4377.97ex/s]


 29%|██▉       | 21792/75000 [00:05<00:12, 4375.89ex/s]


 30%|██▉       | 22233/75000 [00:05<00:12, 4069.93ex/s]


 30%|███       | 22683/75000 [00:05<00:12, 4187.02ex/s]


 31%|███       | 23107/75000 [00:05<00:12, 4177.34ex/s]


 32%|███▏      | 23630/75000 [00:05<00:11, 4477.19ex/s]


 32%|███▏      | 24082/75000 [00:05<00:11, 4326.92ex/s]


 33%|███▎      | 24553/75000 [00:05<00:11, 4433.46ex/s]


 33%|███▎      | 25000/75000 [00:06<00:12, 4065.63ex/s]


 34%|███▍      | 25414/75000 [00:06<00:12, 3948.36ex/s]


 35%|███▍      | 25931/75000 [00:06<00:11, 4283.30ex/s]


 35%|███▌      | 26366/75000 [00:06<00:11, 4247.29ex/s]


 36%|███▌      | 26821/75000 [00:06<00:11, 4331.34ex/s]


 36%|███▋      | 27258/75000 [00:06<00:11, 4266.05ex/s]


 37%|███▋      | 27688/75000 [00:06<00:11, 4260.56ex/s]


 37%|███▋      | 28116/75000 [00:06<00:11, 4096.86ex/s]


 38%|███▊      | 28587/75000 [00:06<00:10, 4270.89ex/s]


 39%|███▊      | 29047/75000 [00:06<00:10, 4363.65ex/s]


 39%|███▉      | 29513/75000 [00:07<00:10, 4449.46ex/s]


 40%|███▉      | 29966/75000 [00:07<00:10, 4469.85ex/s]


 41%|████      | 30415/75000 [00:07<00:10, 4184.19ex/s]


 41%|████      | 30856/75000 [00:07<00:10, 4244.57ex/s]


 42%|████▏     | 31284/75000 [00:07<00:10, 3988.24ex/s]


 42%|████▏     | 31786/75000 [00:07<00:10, 4274.76ex/s]


 43%|████▎     | 32220/75000 [00:07<00:10, 4158.84ex/s]


 44%|████▎     | 32641/75000 [00:07<00:10, 4123.13ex/s]


 44%|████▍     | 33057/75000 [00:07<00:10, 3849.23ex/s]


 45%|████▍     | 33447/75000 [00:08<00:10, 3779.70ex/s]


 45%|████▌     | 33889/75000 [00:08<00:10, 3955.55ex/s]


 46%|████▌     | 34289/75000 [00:08<00:10, 3823.74ex/s]


 46%|████▋     | 34737/75000 [00:08<00:10, 4007.35ex/s]


 47%|████▋     | 35198/75000 [00:08<00:09, 4179.46ex/s]


 47%|████▋     | 35622/75000 [00:08<00:09, 4194.79ex/s]


 48%|████▊     | 36092/75000 [00:08<00:08, 4342.65ex/s]


 49%|████▉     | 36606/75000 [00:08<00:08, 4577.60ex/s]


 49%|████▉     | 37066/75000 [00:08<00:08, 4434.15ex/s]


 50%|█████     | 37583/75000 [00:09<00:08, 4646.71ex/s]


 51%|█████     | 38050/75000 [00:09<00:08, 4483.07ex/s]


 51%|█████▏    | 38534/75000 [00:09<00:07, 4585.19ex/s]


 52%|█████▏    | 38995/75000 [00:09<00:08, 4376.36ex/s]


 53%|█████▎    | 39436/75000 [00:09<00:08, 4089.31ex/s]


 53%|█████▎    | 39905/75000 [00:09<00:08, 4252.21ex/s]


 54%|█████▍    | 40336/75000 [00:09<00:08, 4134.66ex/s]


 54%|█████▍    | 40826/75000 [00:09<00:07, 4348.03ex/s]


 55%|█████▌    | 41316/75000 [00:09<00:07, 4503.77ex/s]


 56%|█████▌    | 41871/75000 [00:09<00:06, 4806.96ex/s]


 56%|█████▋    | 42363/75000 [00:10<00:06, 4837.85ex/s]


 57%|█████▋    | 42908/75000 [00:10<00:06, 5016.54ex/s]


 58%|█████▊    | 43412/75000 [00:10<00:06, 4903.51ex/s]


 59%|█████▊    | 43905/75000 [00:10<00:06, 4892.84ex/s]


 59%|█████▉    | 44396/75000 [00:10<00:07, 4311.08ex/s]


 60%|█████▉    | 44874/75000 [00:10<00:06, 4436.25ex/s]


 60%|██████    | 45329/75000 [00:10<00:06, 4371.15ex/s]


 61%|██████    | 45830/75000 [00:10<00:06, 4549.60ex/s]


 62%|██████▏   | 46292/75000 [00:10<00:06, 4565.33ex/s]


 62%|██████▏   | 46754/75000 [00:11<00:06, 4543.74ex/s]


 63%|██████▎   | 47225/75000 [00:11<00:06, 4590.42ex/s]


 64%|██████▎   | 47735/75000 [00:11<00:05, 4737.21ex/s]


 64%|██████▍   | 48211/75000 [00:11<00:05, 4575.14ex/s]


 65%|██████▍   | 48676/75000 [00:11<00:05, 4595.57ex/s]


 66%|██████▌   | 49138/75000 [00:11<00:06, 4300.57ex/s]


 66%|██████▌   | 49609/75000 [00:11<00:05, 4412.77ex/s]


 67%|██████▋   | 50055/75000 [00:11<00:05, 4276.16ex/s]


 67%|██████▋   | 50580/75000 [00:11<00:05, 4551.42ex/s]


 68%|██████▊   | 51101/75000 [00:12<00:05, 4739.42ex/s]


 69%|██████▉   | 51637/75000 [00:12<00:04, 4919.18ex/s]


 70%|██████▉   | 52132/75000 [00:12<00:04, 4614.13ex/s]


 70%|███████   | 52652/75000 [00:12<00:04, 4779.11ex/s]


 71%|███████   | 53135/75000 [00:12<00:04, 4691.87ex/s]


 71%|███████▏  | 53608/75000 [00:12<00:04, 4661.76ex/s]


 72%|███████▏  | 54077/75000 [00:12<00:04, 4518.99ex/s]


 73%|███████▎  | 54589/75000 [00:12<00:04, 4685.28ex/s]


 73%|███████▎  | 55060/75000 [00:12<00:05, 3749.77ex/s]


 74%|███████▍  | 55564/75000 [00:13<00:04, 4068.64ex/s]


 75%|███████▍  | 56000/75000 [00:13<00:04, 4075.66ex/s]


 75%|███████▌  | 56428/75000 [00:13<00:04, 4024.44ex/s]


 76%|███████▌  | 56897/75000 [00:13<00:04, 4203.56ex/s]


 76%|███████▋  | 57329/75000 [00:13<00:04, 3751.75ex/s]


 77%|███████▋  | 57810/75000 [00:13<00:04, 4027.81ex/s]


 78%|███████▊  | 58239/75000 [00:13<00:04, 4097.81ex/s]


 78%|███████▊  | 58740/75000 [00:13<00:03, 4350.97ex/s]


 79%|███████▉  | 59185/75000 [00:13<00:03, 4232.84ex/s]


 79%|███████▉  | 59616/75000 [00:14<00:04, 3596.67ex/s]


 80%|████████  | 60000/75000 [00:14<00:04, 3527.21ex/s]


 81%|████████  | 60427/75000 [00:14<00:03, 3719.92ex/s]


 81%|████████  | 60820/75000 [00:14<00:03, 3775.10ex/s]


 82%|████████▏ | 61207/75000 [00:14<00:03, 3643.71ex/s]


 82%|████████▏ | 61653/75000 [00:14<00:03, 3866.40ex/s]


 83%|████████▎ | 62047/75000 [00:14<00:03, 3684.85ex/s]


 83%|████████▎ | 62486/75000 [00:14<00:03, 3879.32ex/s]


 84%|████████▍ | 62880/75000 [00:14<00:03, 3762.19ex/s]


 84%|████████▍ | 63261/75000 [00:15<00:03, 3500.55ex/s]


 85%|████████▍ | 63673/75000 [00:15<00:03, 3666.99ex/s]


 85%|████████▌ | 64046/75000 [00:15<00:03, 3607.65ex/s]


 86%|████████▌ | 64501/75000 [00:15<00:02, 3871.57ex/s]


 87%|████████▋ | 65000/75000 [00:15<00:02, 4065.17ex/s]


 87%|████████▋ | 65487/75000 [00:15<00:02, 4291.64ex/s]


 88%|████████▊ | 66000/75000 [00:15<00:02, 4351.25ex/s]


 89%|████████▊ | 66490/75000 [00:15<00:01, 4504.43ex/s]


 89%|████████▉ | 66963/75000 [00:15<00:01, 4569.14ex/s]


 90%|████████▉ | 67428/75000 [00:15<00:01, 4591.46ex/s]


 91%|█████████ | 67889/75000 [00:16<00:02, 2937.05ex/s]


 91%|█████████ | 68259/75000 [00:16<00:02, 3092.80ex/s]


 92%|█████████▏| 68721/75000 [00:16<00:01, 3446.64ex/s]


 92%|█████████▏| 69118/75000 [00:16<00:01, 3572.59ex/s]


 93%|█████████▎| 69644/75000 [00:16<00:01, 4013.06ex/s]


 93%|█████████▎| 70079/75000 [00:16<00:01, 4031.98ex/s]


 94%|█████████▍| 70556/75000 [00:16<00:01, 4234.77ex/s]


 95%|█████████▍| 71032/75000 [00:16<00:00, 4382.49ex/s]


 95%|█████████▌| 71494/75000 [00:17<00:00, 4448.38ex/s]


 96%|█████████▌| 71949/75000 [00:17<00:00, 4015.44ex/s]


 96%|█████████▋| 72366/75000 [00:17<00:00, 3775.05ex/s]


 97%|█████████▋| 72781/75000 [00:17<00:00, 3873.59ex/s]


 98%|█████████▊| 73178/75000 [00:17<00:00, 3446.71ex/s]


 98%|█████████▊| 73537/75000 [00:17<00:00, 3134.19ex/s]


 99%|█████████▊| 73992/75000 [00:17<00:00, 3485.83ex/s]


 99%|█████████▉| 74458/75000 [00:17<00:00, 3792.33ex/s]


100%|██████████| 75000/75000 [00:18<00:00, 4167.45ex/s]


100%|██████████| 75000/75000 [00:18<00:00, 4151.77ex/s]





  0%|          | 0/69825 [00:00<?, ?ex/s]


  0%|          | 330/69825 [00:00<00:21, 3297.03ex/s]


  1%|          | 783/69825 [00:00<00:17, 4020.52ex/s]


  2%|▏         | 1186/69825 [00:00<00:17, 3886.41ex/s]


  2%|▏         | 1659/69825 [00:00<00:16, 4207.88ex/s]


  3%|▎         | 2081/69825 [00:00<00:16, 4088.19ex/s]


  4%|▎         | 2524/69825 [00:00<00:16, 4198.69ex/s]


  4%|▍         | 3000/69825 [00:00<00:15, 4180.44ex/s]


  5%|▍         | 3464/69825 [00:00<00:15, 4318.77ex/s]


  6%|▌         | 3935/69825 [00:00<00:14, 4435.90ex/s]


  6%|▋         | 4380/69825 [00:01<00:15, 4286.48ex/s]


  7%|▋         | 4836/69825 [00:01<00:14, 4365.46ex/s]


  8%|▊         | 5274/69825 [00:01<00:15, 4166.72ex/s]


  8%|▊         | 5742/69825 [00:01<00:14, 4313.02ex/s]


  9%|▉         | 6176/69825 [00:01<00:14, 4296.69ex/s]


 10%|▉         | 6696/69825 [00:01<00:13, 4560.56ex/s]


 10%|█         | 7155/69825 [00:01<00:14, 4424.96ex/s]


 11%|█         | 7668/69825 [00:01<00:13, 4628.40ex/s]


 12%|█▏        | 8134/69825 [00:01<00:13, 4451.06ex/s]


 12%|█▏        | 8596/69825 [00:01<00:13, 4498.52ex/s]


 13%|█▎        | 9048/69825 [00:02<00:13, 4495.68ex/s]


 14%|█▎        | 9500/69825 [00:02<00:14, 4211.56ex/s]


 14%|█▍        | 9950/69825 [00:02<00:13, 4291.87ex/s]


 15%|█▍        | 10383/69825 [00:02<00:13, 4262.15ex/s]


 15%|█▌        | 10812/69825 [00:02<00:13, 4268.63ex/s]


 16%|█▌        | 11241/69825 [00:02<00:15, 3879.42ex/s]


 17%|█▋        | 11674/69825 [00:02<00:14, 4002.08ex/s]


 17%|█▋        | 12081/69825 [00:02<00:14, 3957.83ex/s]


 18%|█▊        | 12494/69825 [00:02<00:14, 4006.33ex/s]


 19%|█▊        | 12938/69825 [00:03<00:13, 4130.20ex/s]


 19%|█▉        | 13354/69825 [00:03<00:14, 3979.43ex/s]


 20%|█▉        | 13763/69825 [00:03<00:13, 4008.21ex/s]


 20%|██        | 14166/69825 [00:03<00:14, 3875.44ex/s]


 21%|██        | 14575/69825 [00:03<00:14, 3934.21ex/s]


 21%|██▏       | 15000/69825 [00:03<00:13, 3983.03ex/s]


 22%|██▏       | 15477/69825 [00:03<00:12, 4204.32ex/s]


 23%|██▎       | 15922/69825 [00:03<00:12, 4274.46ex/s]


 23%|██▎       | 16351/69825 [00:03<00:13, 4050.83ex/s]


 24%|██▍       | 16760/69825 [00:04<00:13, 4058.74ex/s]


 25%|██▍       | 17176/69825 [00:04<00:12, 4087.76ex/s]


 25%|██▌       | 17647/69825 [00:04<00:12, 4269.22ex/s]


 26%|██▌       | 18076/69825 [00:04<00:13, 3917.76ex/s]


 27%|██▋       | 18546/69825 [00:04<00:12, 4133.21ex/s]


 27%|██▋       | 18976/69825 [00:04<00:12, 4163.07ex/s]


 28%|██▊       | 19397/69825 [00:04<00:12, 4014.08ex/s]


 28%|██▊       | 19885/69825 [00:04<00:11, 4258.58ex/s]


 29%|██▉       | 20315/69825 [00:04<00:11, 4152.97ex/s]


 30%|██▉       | 20774/69825 [00:04<00:11, 4276.02ex/s]


 30%|███       | 21205/69825 [00:05<00:11, 4247.05ex/s]


 31%|███       | 21703/69825 [00:05<00:10, 4458.48ex/s]


 32%|███▏      | 22151/69825 [00:05<00:10, 4397.42ex/s]


 32%|███▏      | 22593/69825 [00:05<00:10, 4384.04ex/s]


 33%|███▎      | 23033/69825 [00:05<00:10, 4366.18ex/s]


 34%|███▎      | 23525/69825 [00:05<00:10, 4527.53ex/s]


 34%|███▍      | 24000/69825 [00:05<00:10, 4531.33ex/s]


 35%|███▌      | 24518/69825 [00:05<00:09, 4721.25ex/s]


 36%|███▌      | 24991/69825 [00:05<00:09, 4630.05ex/s]


 36%|███▋      | 25455/69825 [00:06<00:10, 4379.56ex/s]


 37%|███▋      | 25905/69825 [00:06<00:09, 4412.65ex/s]


 38%|███▊      | 26349/69825 [00:06<00:10, 4207.06ex/s]


 38%|███▊      | 26831/69825 [00:06<00:09, 4376.58ex/s]


 39%|███▉      | 27272/69825 [00:06<00:09, 4281.08ex/s]


 40%|███▉      | 27723/69825 [00:06<00:09, 4344.53ex/s]


 40%|████      | 28160/69825 [00:06<00:10, 4099.85ex/s]


 41%|████      | 28590/69825 [00:06<00:09, 4153.75ex/s]


 42%|████▏     | 29009/69825 [00:06<00:10, 3890.10ex/s]


 42%|████▏     | 29449/69825 [00:06<00:10, 4030.15ex/s]


 43%|████▎     | 29905/69825 [00:07<00:09, 4177.78ex/s]


 43%|████▎     | 30327/69825 [00:07<00:09, 3979.94ex/s]


 44%|████▍     | 30840/69825 [00:07<00:09, 4300.35ex/s]


 45%|████▍     | 31276/69825 [00:07<00:09, 4224.49ex/s]


 45%|████▌     | 31754/69825 [00:07<00:08, 4382.10ex/s]


 46%|████▌     | 32196/69825 [00:07<00:08, 4265.47ex/s]


 47%|████▋     | 32679/69825 [00:07<00:08, 4426.50ex/s]


 47%|████▋     | 33125/69825 [00:07<00:08, 4325.39ex/s]


 48%|████▊     | 33579/69825 [00:07<00:08, 4387.00ex/s]


 49%|████▊     | 34020/69825 [00:08<00:08, 4195.19ex/s]


 49%|████▉     | 34484/69825 [00:08<00:08, 4319.13ex/s]


 50%|█████     | 34919/69825 [00:08<00:08, 4323.43ex/s]


 51%|█████     | 35354/69825 [00:08<00:08, 4191.76ex/s]


 51%|█████▏    | 35872/69825 [00:08<00:07, 4474.89ex/s]


 52%|█████▏    | 36322/69825 [00:08<00:07, 4376.78ex/s]


 53%|█████▎    | 36762/69825 [00:08<00:08, 3912.98ex/s]


 53%|█████▎    | 37163/69825 [00:08<00:08, 3857.70ex/s]


 54%|█████▍    | 37711/69825 [00:08<00:07, 4301.04ex/s]


 55%|█████▍    | 38214/69825 [00:09<00:07, 4505.03ex/s]


 56%|█████▌    | 38768/69825 [00:09<00:06, 4801.12ex/s]


 56%|█████▌    | 39255/69825 [00:09<00:07, 4239.12ex/s]


 57%|█████▋    | 39695/69825 [00:09<00:07, 4249.00ex/s]


 57%|█████▋    | 40149/69825 [00:09<00:06, 4325.44ex/s]


 58%|█████▊    | 40663/69825 [00:09<00:06, 4553.00ex/s]


 59%|█████▉    | 41126/69825 [00:09<00:06, 4130.73ex/s]


 60%|█████▉    | 41573/69825 [00:09<00:06, 4220.15ex/s]


 60%|██████    | 42005/69825 [00:09<00:06, 4079.57ex/s]


 61%|██████    | 42555/69825 [00:10<00:06, 4469.95ex/s]


 62%|██████▏   | 43068/69825 [00:10<00:05, 4654.19ex/s]


 62%|██████▏   | 43634/69825 [00:10<00:05, 4943.18ex/s]


 63%|██████▎   | 44135/69825 [00:10<00:05, 4893.35ex/s]


 64%|██████▍   | 44650/69825 [00:10<00:05, 4967.66ex/s]


 65%|██████▍   | 45150/69825 [00:10<00:04, 4971.30ex/s]


 65%|██████▌   | 45697/69825 [00:10<00:04, 5117.97ex/s]


 66%|██████▌   | 46211/69825 [00:10<00:04, 4965.69ex/s]


 67%|██████▋   | 46713/69825 [00:10<00:04, 4980.34ex/s]


 68%|██████▊   | 47213/69825 [00:10<00:04, 4825.68ex/s]


 68%|██████▊   | 47762/69825 [00:11<00:04, 5016.48ex/s]


 69%|██████▉   | 48266/69825 [00:11<00:04, 4915.72ex/s]


 70%|██████▉   | 48760/69825 [00:11<00:04, 4739.74ex/s]


 71%|███████   | 49237/69825 [00:11<00:04, 4677.66ex/s]


 71%|███████   | 49707/69825 [00:11<00:04, 4539.23ex/s]


 72%|███████▏  | 50163/69825 [00:11<00:04, 4419.50ex/s]


 73%|███████▎  | 50648/69825 [00:11<00:04, 4539.59ex/s]


 73%|███████▎  | 51104/69825 [00:11<00:04, 4160.63ex/s]


 74%|███████▍  | 51614/69825 [00:11<00:04, 4417.04ex/s]


 75%|███████▍  | 52086/69825 [00:12<00:03, 4501.90ex/s]


 75%|███████▌  | 52575/69825 [00:12<00:03, 4612.49ex/s]


 76%|███████▌  | 53041/69825 [00:12<00:03, 4328.64ex/s]


 77%|███████▋  | 53480/69825 [00:12<00:03, 4313.44ex/s]


 77%|███████▋  | 53995/69825 [00:12<00:03, 4548.28ex/s]


 78%|███████▊  | 54455/69825 [00:12<00:03, 4446.03ex/s]


 79%|███████▊  | 54943/69825 [00:12<00:03, 4567.71ex/s]


 79%|███████▉  | 55403/69825 [00:12<00:03, 4533.18ex/s]


 80%|████████  | 55895/69825 [00:12<00:03, 4642.27ex/s]


 81%|████████  | 56361/69825 [00:12<00:03, 4456.29ex/s]


 81%|████████▏ | 56842/69825 [00:13<00:02, 4555.82ex/s]


 82%|████████▏ | 57300/69825 [00:13<00:04, 2800.36ex/s]


 83%|████████▎ | 57801/69825 [00:13<00:03, 3248.65ex/s]


 83%|████████▎ | 58242/69825 [00:13<00:03, 3508.21ex/s]


 84%|████████▍ | 58737/69825 [00:13<00:02, 3857.06ex/s]


 85%|████████▍ | 59177/69825 [00:13<00:02, 3990.23ex/s]


 85%|████████▌ | 59616/69825 [00:13<00:02, 4023.38ex/s]


 86%|████████▌ | 60047/69825 [00:14<00:02, 3987.61ex/s]


 87%|████████▋ | 60562/69825 [00:14<00:02, 4307.06ex/s]


 87%|████████▋ | 61010/69825 [00:14<00:02, 4034.16ex/s]


 88%|████████▊ | 61428/69825 [00:14<00:02, 4015.55ex/s]


 89%|████████▊ | 61961/69825 [00:14<00:01, 4378.05ex/s]


 89%|████████▉ | 62409/69825 [00:14<00:01, 4241.49ex/s]


 90%|████████▉ | 62841/69825 [00:14<00:01, 4262.96ex/s]


 91%|█████████ | 63273/69825 [00:14<00:01, 3807.40ex/s]


 91%|█████████ | 63672/69825 [00:14<00:01, 3854.70ex/s]


 92%|█████████▏| 64067/69825 [00:15<00:01, 3316.95ex/s]


 92%|█████████▏| 64493/69825 [00:15<00:01, 3553.95ex/s]


 93%|█████████▎| 64944/69825 [00:15<00:01, 3806.62ex/s]


 94%|█████████▎| 65379/69825 [00:15<00:01, 3954.10ex/s]


 94%|█████████▍| 65876/69825 [00:15<00:00, 4238.51ex/s]


 95%|█████████▍| 66311/69825 [00:15<00:00, 4094.69ex/s]


 96%|█████████▌| 66733/69825 [00:15<00:00, 4128.97ex/s]


 96%|█████████▌| 67152/69825 [00:15<00:00, 3884.54ex/s]


 97%|█████████▋| 67584/69825 [00:15<00:00, 4002.92ex/s]


 97%|█████████▋| 67990/69825 [00:16<00:00, 3870.73ex/s]


 98%|█████████▊| 68408/69825 [00:16<00:00, 3956.12ex/s]


 99%|█████████▉| 68973/69825 [00:16<00:00, 4440.49ex/s]


 99%|█████████▉| 69422/69825 [00:16<00:00, 4304.67ex/s]


100%|██████████| 69825/69825 [00:16<00:00, 4254.48ex/s]




In [17]:
len(train_dataset), len(eval_dataset)

(75000, 69825)

In [18]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] 향도좋고 # 버블버블 거품이 풍성하니 # 목욕시간 이 더 신난 아드님 [SEP] 패키지 / 구성품 # 편의성 [SEP] 1


[CLS] 스판이 안 들어간 것은 너무 뻣뻣하고, 스판이 들어간 건 좀 더 낫지만 뒷태를 비롯 어딘가 이쁘지가 않다. [SEP] 패키지 / 구성품 # 편의성 [SEP] 1


# Load Trainer

In [19]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [20]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [21]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [22]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 75000


  Num Epochs = 20


  Instantaneous batch size per device = 50


  Total train batch size (w. parallel, distributed & accumulation) = 200


  Gradient Accumulation steps = 1


  Total optimization steps = 7500


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.185,0.177536,0.957007,0.0,0.978031,0.489016,0.957007
2,0.1594,0.177339,0.957007,0.0,0.978031,0.489016,0.957007
3,0.1342,0.178203,0.957007,0.0,0.978031,0.489016,0.957007
4,0.2131,0.177374,0.957007,0.0,0.978031,0.489016,0.957007
5,0.172,0.178067,0.957007,0.0,0.978031,0.489016,0.957007
6,0.1997,0.178341,0.957007,0.0,0.978031,0.489016,0.957007
7,0.1709,0.177444,0.957007,0.0,0.978031,0.489016,0.957007
8,0.1947,0.178176,0.957007,0.0,0.978031,0.489016,0.957007


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-375


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-375/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-375/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-375/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-375/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-750


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-750/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-750/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-750/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-750/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1125


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1125/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1125/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1125/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1125/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-375] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1500


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1500/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1500/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1500/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1500/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1125] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1875


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1875/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1875/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1875/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1875/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2250


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2250/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2250/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2250/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2250/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-1875] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2625


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2625/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2625/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2625/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2625/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2250] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: pair, id, form. If pair, id, form are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 200


Saving model checkpoint to monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-3000


Configuration saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-3000/config.json


Model weights saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-3000/pytorch_model.bin


tokenizer config file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-3000/tokenizer_config.json


Special tokens file saved in monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-3000/special_tokens_map.json


Deleting older checkpoint [monologg_koelectra_base_v3_discriminator_uncleaned_v6/checkpoint-2625] due to args.save_total_limit




In [None]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/