# Description


# Modules and Global Variables

In [1]:
from transformers import (
    AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, 
    DefaultDataCollator, DataCollatorWithPadding, 
    TrainingArguments, Trainer,
)

from transformers.optimization import (
    AdamW, get_linear_schedule_with_warmup,
    Adafactor, AdafactorSchedule,
)

import torch
import wandb

import datasets
import evaluate

from sklearn.metrics import accuracy_score, f1_score

import numpy as np
import pandas as pd

import os
import re
import random

import demoji

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')
# NGPU = torch.cuda.device_count()
# if NGPU > 1:
#     model = torch.nn.DataParallel(model, device_ids=list(range(NGPU)))

torch.__version__: 1.12.1
torch.cuda.is_available(): True
NGPU: 4


In [3]:
### labels

ce_labels = ['True', 'False']
pc_labels = ['positive', 'negative', 'neutral']
pc_binary_labels = ['True', 'False']

labels = ce_labels

label2id = {k: i for i, k in enumerate(labels)}
id2label = {i: k for i, k in enumerate(labels)}
num_labels = len(labels)

print(label2id)
print(id2label)

{'True': 0, 'False': 1}
{0: 'True', 1: 'False'}


In [4]:
### paths and names

PROJECT_NAME = 'aspect_category_detection'
RUN_ID = 'uncleaned_v1'

DATA_V = 'uncleaned_v1'
DATA_T = 'ce' # ce or pc or pc_binary
AUGMENTATION = False
AUG_NAME = 'balanced'

model_checkpoint = 'snunlp/KR-ELECTRA-discriminator'

notebook_name = 'acd_binary_trainer.ipynb'

### fixed

model_name = re.sub(r'[/-]', r'_', model_checkpoint).lower()
run_name = f'{model_name}_{RUN_ID}'

ROOT_PATH = './'
SAVE_PATH = os.path.join(ROOT_PATH, 'training_results', run_name, 'acd')
NOTEBOOK_PATH = os.path.join('./', notebook_name)

augornot = f'_{AUG_NAME}' if AUGMENTATION is True else ''
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_train{augornot}.csv')
EVAL_DATA_PATH = os.path.join(ROOT_PATH, 'dataset', DATA_V, f'{DATA_T}_dev.csv')

!mkdir -p {SAVE_PATH}

In [5]:
if os.path.exists(SAVE_PATH):
    print(f'{SAVE_PATH} exists.')
else:
    print(f'{SAVE_PATH} does not exist.')
if os.path.exists(NOTEBOOK_PATH):
    print(f'{NOTEBOOK_PATH} exists.')
else:
    print(f'{NOTEBOOK_PATH} does not exist.')
if os.path.exists(TRAIN_DATA_PATH):
    print(f'{TRAIN_DATA_PATH} exists.')
else:
    print(f'{TRAIN_DATA_PATH} does not exist.')
if os.path.exists(EVAL_DATA_PATH):
    print(f'{EVAL_DATA_PATH} exists.')
else:
    print(f'{EVAL_DATA_PATH} does not exist.')

./training_results/snunlp_kr_electra_discriminator_uncleaned_v1/acd exists.
./acd_binary_trainer.ipynb exists.
./dataset/uncleaned_v1/ce_train.csv exists.
./dataset/uncleaned_v1/ce_dev.csv exists.


In [6]:
### rest of training args

report_to="wandb"

fp16 = False

num_train_epochs = 10
batch_size = 25
gradient_accumulation_steps = 1

optim = 'adamw_torch' # 'adamw_hf'

learning_rate = 3e-6 # 5e-5
weight_decay = 0.01 # 0
adam_epsilon = 1e-8

lr_scheduler_type = 'cosine'
warmup_ratio = 0

save_total_limit = 1

load_best_model_at_end = True
metric_for_best_model ='f1_macro'

save_strategy = "epoch"
evaluation_strategy = "epoch"

logging_strategy = "steps"
logging_first_step = True 
logging_steps = 500

# WandB Configuration

In [7]:
%env WANDB_PROJECT={PROJECT_NAME}
%env WANDB_NOTEBOOK_NAME={NOTEBOOK_PATH}
%env WANDB_LOG_MODEL=true
%env WANDB_WATCH=all
wandb.login()

env: WANDB_PROJECT=aspect_category_detection
env: WANDB_NOTEBOOK_NAME=./acd_binary_trainer.ipynb
env: WANDB_LOG_MODEL=true
env: WANDB_WATCH=all


[34m[1mwandb[0m: Currently logged in as: [33mdotsnangles[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Model, Tokenizer, and Collator

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, label2id=label2id, id2label=id2label, num_labels=num_labels
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at snunlp/KR-ELECTRA-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
train_path = './dataset/uncleaned_v1/raw_train.csv'
dev_path = './dataset/uncleaned_v1/raw_dev.csv'
test_path = './dataset/uncleaned_v1/raw_test.csv'
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

In [10]:
### new
entity_property_pair = [
    'Î≥∏Ìíà#Í∞ÄÍ≤©', 'Î≥∏Ìíà#Îã§ÏñëÏÑ±', 'Î≥∏Ìíà#ÎîîÏûêÏù∏', 'Î≥∏Ìíà#Ïù∏ÏßÄÎèÑ', 'Î≥∏Ìíà#ÏùºÎ∞ò', 'Î≥∏Ìíà#Ìé∏ÏùòÏÑ±', 'Î≥∏Ìíà#ÌíàÏßà',
    'Î∏åÎûúÎìú#Í∞ÄÍ≤©', 'Î∏åÎûúÎìú#ÎîîÏûêÏù∏', 'Î∏åÎûúÎìú#Ïù∏ÏßÄÎèÑ', 'Î∏åÎûúÎìú#ÏùºÎ∞ò', 'Î∏åÎûúÎìú#ÌíàÏßà',
    'Ï†úÌíà Ï†ÑÏ≤¥#Í∞ÄÍ≤©', 'Ï†úÌíà Ï†ÑÏ≤¥#Îã§ÏñëÏÑ±', 'Ï†úÌíà Ï†ÑÏ≤¥#ÎîîÏûêÏù∏', 'Ï†úÌíà Ï†ÑÏ≤¥#Ïù∏ÏßÄÎèÑ', 'Ï†úÌíà Ï†ÑÏ≤¥#ÏùºÎ∞ò', 'Ï†úÌíà Ï†ÑÏ≤¥#Ìé∏ÏùòÏÑ±', 'Ï†úÌíà Ï†ÑÏ≤¥#ÌíàÏßà',
    'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#Í∞ÄÍ≤©', 'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#Îã§ÏñëÏÑ±', 'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#ÎîîÏûêÏù∏', 'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#ÏùºÎ∞ò', 'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#Ìé∏ÏùòÏÑ±', 'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#ÌíàÏßà'
]


more_tokens = ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']

emojis = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
emojis = list(set(demoji.findall(' '.join(emojis.sentence_form.to_list())).keys()))

tokensToAdd = more_tokens + emojis
ep_labels = pd.Series(entity_property_pair, name='sentence_form', copy=True)

In [11]:
data = pd.concat([train.sentence_form, dev.sentence_form, test.sentence_form], ignore_index=True, verify_integrity=True).to_frame()
print(len(data))
data = data.drop_duplicates()
print(len(data.drop_duplicates()))

7920
7915


In [12]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(len(tokenizer))

tokenizerTrainData = data.sentence_form.to_list()
newTokenizer = tokenizer.train_new_from_iterator(tokenizerTrainData, vocab_size=1)

new_tokens = set(list(newTokenizer.vocab.keys())) - set(tokenizer.vocab.keys())
tokenizer.add_tokens(list(new_tokens) + tokensToAdd)
print(len(newTokenizer))
print(len(tokenizer))

model.resize_token_embeddings(len(tokenizer))

30000





3060
30117


Embedding(30117, 768)

In [13]:
len(new_tokens), new_tokens

(76,
 {'##…¢',
  '##…™',
  '##…¥',
  '## Ä',
  '##Àá',
  '##·¥ç',
  '##·¥ò',
  '##·¥õ',
  '##·¥ú',
  '##·¥†',
  '##·¥°',
  '##·µï',
  '##‚óç',
  '##‚ùî',
  '##‚ûï',
  '##„â¶',
  '##Íàç',
  '##Îúå',
  '##Ïùí',
  '##Ï£±',
  '##Ï®ï',
  '##Ï´ú',
  '##üë†',
  '##üíÑ',
  '##üíÜ',
  '##üíá',
  '##üï∑',
  '##üï∏',
  '##üöó',
  '##ü§°',
  '##ü•§',
  '…¢',
  '…™',
  '…¥',
  ' Ä',
  ' ú',
  'Àá',
  '“ì',
  '·¥ç',
  '·¥ò',
  '·¥õ',
  '·¥ú',
  '·¥†',
  '·¥°',
  '·µï',
  '‚è∞',
  '‚óç',
  '‚ùî',
  '‚ûï',
  '„â¶',
  'Íàç',
  'Îúå',
  'Îø§',
  'Ïì©',
  'Ïùí',
  'Ï£±',
  'Ï®ï',
  'Ï´ú',
  'Ï±≥',
  'üç∑',
  'üçº',
  'üêÑ',
  'üë†',
  'üíÑ',
  'üíÜ',
  'üíá',
  'üí°',
  'üí¨',
  'üï∑',
  'üï∏',
  'üï∫',
  'üòØ',
  'üò∫',
  'üöó',
  'ü§°',
  'ü•§'})

In [14]:
model.config.label2id, model.config.id2label, model.num_labels

({'True': 0, 'False': 1}, {0: 'True', 1: 'False'}, 2)

In [15]:
# entity_property_pair = [
#     'Î≥∏Ìíà#Í∞ÄÍ≤©', 'Î≥∏Ìíà#Îã§ÏñëÏÑ±', 'Î≥∏Ìíà#ÎîîÏûêÏù∏', 'Î≥∏Ìíà#Ïù∏ÏßÄÎèÑ', 'Î≥∏Ìíà#ÏùºÎ∞ò', 'Î≥∏Ìíà#Ìé∏ÏùòÏÑ±', 'Î≥∏Ìíà#ÌíàÏßà',
#     'Î∏åÎûúÎìú#Í∞ÄÍ≤©', 'Î∏åÎûúÎìú#ÎîîÏûêÏù∏', 'Î∏åÎûúÎìú#Ïù∏ÏßÄÎèÑ', 'Î∏åÎûúÎìú#ÏùºÎ∞ò', 'Î∏åÎûúÎìú#ÌíàÏßà',
#     'Ï†úÌíà Ï†ÑÏ≤¥#Í∞ÄÍ≤©', 'Ï†úÌíà Ï†ÑÏ≤¥#Îã§ÏñëÏÑ±', 'Ï†úÌíà Ï†ÑÏ≤¥#ÎîîÏûêÏù∏', 'Ï†úÌíà Ï†ÑÏ≤¥#Ïù∏ÏßÄÎèÑ', 'Ï†úÌíà Ï†ÑÏ≤¥#ÏùºÎ∞ò', 'Ï†úÌíà Ï†ÑÏ≤¥#Ìé∏ÏùòÏÑ±', 'Ï†úÌíà Ï†ÑÏ≤¥#ÌíàÏßà',
#     'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#Í∞ÄÍ≤©', 'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#Îã§ÏñëÏÑ±', 'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#ÎîîÏûêÏù∏', 'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#ÏùºÎ∞ò', 'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#Ìé∏ÏùòÏÑ±', 'Ìå®ÌÇ§ÏßÄ/Íµ¨ÏÑ±Ìíà#ÌíàÏßà'
# ]

# polarity_id_to_name = ['positive', 'negative', 'neutral']

# tokenizer_tester = []
# for pair in entity_property_pair:
#     for polarity in polarity_id_to_name:
#         tokenizer_tester.append('#'.join([pair, polarity]))

# for e in tokenizer_tester:
#     print(tokenizer.decode(tokenizer.encode(e)))

# for e in tokenizer_tester:
#     print(tokenizer.encode(e))

# Define Metric

In [16]:
accuracy_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

In [17]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_metric.compute(references=labels, predictions=predictions)['accuracy']
    f1_true, f1_false = tuple(f1_metric.compute(references=labels, predictions=predictions, average=None, labels=[0,1])['f1'])
    f1_macro = f1_metric.compute(references=labels, predictions=predictions, average='macro')['f1']
    f1_micro = f1_metric.compute(references=labels, predictions=predictions, average='micro')['f1']
    
    return {'accuracy': accuracy, 'f1_true': f1_true, 'f1_false': f1_false, 'f1_macro': f1_macro, 'f1_micro': f1_micro}

# Load Data

In [18]:
def preprocess_function(examples):
    return tokenizer(examples["sentence_form"], examples["entity_property"], truncation=True)

In [19]:
train_dataset = pd.read_csv(TRAIN_DATA_PATH)
eval_dataset = pd.read_csv(EVAL_DATA_PATH)
# train_dataset = pd.concat([train_dataset, eval_dataset])
train_dataset = datasets.Dataset.from_pandas(train_dataset) #.shuffle(seed=42)
eval_dataset = datasets.Dataset.from_pandas(eval_dataset) #.shuffle(seed=42)
train_dataset = train_dataset.map(preprocess_function, batched=False)
eval_dataset = eval_dataset.map(preprocess_function, batched=False)

  0%|          | 0/75000 [00:00<?, ?ex/s]

  1%|          | 410/75000 [00:00<00:18, 4098.01ex/s]

  1%|          | 826/75000 [00:00<00:17, 4129.51ex/s]

  2%|‚ñè         | 1239/75000 [00:00<00:29, 2528.01ex/s]

  2%|‚ñè         | 1658/75000 [00:00<00:24, 3001.60ex/s]

  3%|‚ñé         | 2014/75000 [00:00<00:23, 3162.78ex/s]

  3%|‚ñé         | 2434/75000 [00:00<00:20, 3465.84ex/s]

  4%|‚ñç         | 2844/75000 [00:00<00:19, 3651.36ex/s]

  4%|‚ñç         | 3231/75000 [00:00<00:19, 3589.58ex/s]

  5%|‚ñç         | 3666/75000 [00:01<00:18, 3808.44ex/s]

  5%|‚ñå         | 4059/75000 [00:01<00:18, 3737.88ex/s]

  6%|‚ñå         | 4464/75000 [00:01<00:18, 3826.51ex/s]

  7%|‚ñã         | 4893/75000 [00:01<00:17, 3960.80ex/s]

  7%|‚ñã         | 5294/75000 [00:01<00:18, 3858.04ex/s]

  8%|‚ñä         | 5741/75000 [00:01<00:17, 4034.36ex/s]

  8%|‚ñä         | 6148/75000 [00:01<00:17, 3949.52ex/s]

  9%|‚ñâ         | 6601/75000 [00:01<00:16, 4114.75ex/s]

  9%|‚ñâ         | 7015/75000 [00:01<00:17, 3965.61ex/s]

 10%|‚ñâ         | 7436/75000 [00:01<00:16, 4034.41ex/s]

 11%|‚ñà         | 7880/75000 [00:02<00:16, 4149.77ex/s]

 11%|‚ñà         | 8297/75000 [00:02<00:16, 3982.28ex/s]

 12%|‚ñà‚ñè        | 8727/75000 [00:02<00:16, 4072.26ex/s]

 12%|‚ñà‚ñè        | 9137/75000 [00:02<00:16, 3936.74ex/s]

 13%|‚ñà‚ñé        | 9544/75000 [00:02<00:16, 3972.77ex/s]

 13%|‚ñà‚ñé        | 9978/75000 [00:02<00:15, 4075.80ex/s]

 14%|‚ñà‚ñç        | 10388/75000 [00:02<00:16, 3941.16ex/s]

 15%|‚ñà‚ñç        | 10877/75000 [00:02<00:15, 4213.82ex/s]

 15%|‚ñà‚ñå        | 11301/75000 [00:02<00:15, 4073.01ex/s]

 16%|‚ñà‚ñå        | 11711/75000 [00:03<00:15, 4042.25ex/s]

 16%|‚ñà‚ñå        | 12117/75000 [00:03<00:15, 3935.43ex/s]

 17%|‚ñà‚ñã        | 12512/75000 [00:03<00:15, 3936.74ex/s]

 17%|‚ñà‚ñã        | 12932/75000 [00:03<00:15, 4012.53ex/s]

 18%|‚ñà‚ñä        | 13335/75000 [00:03<00:15, 3880.55ex/s]

 18%|‚ñà‚ñä        | 13744/75000 [00:03<00:15, 3940.44ex/s]

 19%|‚ñà‚ñâ        | 14140/75000 [00:03<00:15, 3823.79ex/s]

 19%|‚ñà‚ñâ        | 14566/75000 [00:03<00:15, 3947.35ex/s]

 20%|‚ñà‚ñâ        | 14975/75000 [00:03<00:15, 3986.10ex/s]

 20%|‚ñà‚ñà        | 15375/75000 [00:03<00:15, 3830.92ex/s]

 21%|‚ñà‚ñà        | 15760/75000 [00:04<00:16, 3572.59ex/s]

 21%|‚ñà‚ñà‚ñè       | 16122/75000 [00:04<00:17, 3401.50ex/s]

 22%|‚ñà‚ñà‚ñè       | 16521/75000 [00:04<00:16, 3559.65ex/s]

 23%|‚ñà‚ñà‚ñé       | 16946/75000 [00:04<00:15, 3752.61ex/s]

 23%|‚ñà‚ñà‚ñé       | 17326/75000 [00:04<00:15, 3645.25ex/s]

 24%|‚ñà‚ñà‚ñé       | 17744/75000 [00:04<00:15, 3795.23ex/s]

 24%|‚ñà‚ñà‚ñç       | 18127/75000 [00:04<00:15, 3729.89ex/s]

 25%|‚ñà‚ñà‚ñç       | 18548/75000 [00:04<00:14, 3865.95ex/s]

 25%|‚ñà‚ñà‚ñå       | 19000/75000 [00:04<00:14, 3883.66ex/s]

 26%|‚ñà‚ñà‚ñå       | 19431/75000 [00:05<00:13, 4004.26ex/s]

 27%|‚ñà‚ñà‚ñã       | 19885/75000 [00:05<00:13, 4158.04ex/s]

 27%|‚ñà‚ñà‚ñã       | 20303/75000 [00:05<00:13, 4087.40ex/s]

 28%|‚ñà‚ñà‚ñä       | 20756/75000 [00:05<00:12, 4215.00ex/s]

 28%|‚ñà‚ñà‚ñä       | 21179/75000 [00:05<00:13, 4080.75ex/s]

 29%|‚ñà‚ñà‚ñâ       | 21619/75000 [00:05<00:12, 4171.18ex/s]

 29%|‚ñà‚ñà‚ñâ       | 22038/75000 [00:05<00:13, 3926.67ex/s]

 30%|‚ñà‚ñà‚ñâ       | 22440/75000 [00:05<00:13, 3950.31ex/s]

 30%|‚ñà‚ñà‚ñà       | 22867/75000 [00:05<00:12, 4041.70ex/s]

 31%|‚ñà‚ñà‚ñà       | 23274/75000 [00:06<00:13, 3913.95ex/s]

 32%|‚ñà‚ñà‚ñà‚ñè      | 23730/75000 [00:06<00:12, 4097.23ex/s]

 32%|‚ñà‚ñà‚ñà‚ñè      | 24143/75000 [00:06<00:12, 3990.11ex/s]

 33%|‚ñà‚ñà‚ñà‚ñé      | 24589/75000 [00:06<00:12, 4123.08ex/s]

 33%|‚ñà‚ñà‚ñà‚ñé      | 25004/75000 [00:06<00:12, 3934.49ex/s]

 34%|‚ñà‚ñà‚ñà‚ñç      | 25451/75000 [00:06<00:12, 4084.07ex/s]

 35%|‚ñà‚ñà‚ñà‚ñç      | 25898/75000 [00:06<00:11, 4194.95ex/s]

 35%|‚ñà‚ñà‚ñà‚ñå      | 26320/75000 [00:06<00:12, 3998.50ex/s]

 36%|‚ñà‚ñà‚ñà‚ñå      | 26750/75000 [00:06<00:11, 4083.20ex/s]

 36%|‚ñà‚ñà‚ñà‚ñå      | 27162/75000 [00:06<00:12, 3983.70ex/s]

 37%|‚ñà‚ñà‚ñà‚ñã      | 27590/75000 [00:07<00:11, 4066.11ex/s]

 37%|‚ñà‚ñà‚ñà‚ñã      | 28000/75000 [00:07<00:11, 3985.81ex/s]

 38%|‚ñà‚ñà‚ñà‚ñä      | 28452/75000 [00:07<00:11, 4138.80ex/s]

 39%|‚ñà‚ñà‚ñà‚ñä      | 28915/75000 [00:07<00:10, 4279.02ex/s]

 39%|‚ñà‚ñà‚ñà‚ñâ      | 29345/75000 [00:07<00:10, 4157.70ex/s]

 40%|‚ñà‚ñà‚ñà‚ñâ      | 29781/75000 [00:07<00:10, 4216.13ex/s]

 40%|‚ñà‚ñà‚ñà‚ñà      | 30204/75000 [00:07<00:11, 4066.16ex/s]

 41%|‚ñà‚ñà‚ñà‚ñà      | 30641/75000 [00:07<00:10, 4151.74ex/s]

 41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 31058/75000 [00:07<00:11, 3943.47ex/s]

 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 31518/75000 [00:08<00:10, 4128.72ex/s]

 43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 31943/75000 [00:08<00:10, 4161.37ex/s]

 43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 32362/75000 [00:08<00:10, 3946.37ex/s]

 44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 32776/75000 [00:08<00:10, 3999.79ex/s]

 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 33179/75000 [00:08<00:10, 3809.23ex/s]

 45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 33568/75000 [00:08<00:10, 3831.80ex/s]

 45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 34000/75000 [00:08<00:10, 3816.49ex/s]

 46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 34433/75000 [00:08<00:10, 3960.89ex/s]

 47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 34888/75000 [00:08<00:09, 4128.89ex/s]

 47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 35304/75000 [00:09<00:10, 3849.81ex/s]

 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 35739/75000 [00:09<00:09, 3987.75ex/s]

 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 36143/75000 [00:09<00:09, 3938.66ex/s]

 49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 36565/75000 [00:09<00:09, 4016.43ex/s]

 49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 37000/75000 [00:09<00:09, 3951.07ex/s]

 50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 37448/75000 [00:09<00:09, 4099.87ex/s]

 51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 37899/75000 [00:09<00:08, 4217.98ex/s]

 51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 38323/75000 [00:09<00:08, 4090.34ex/s]

 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 38758/75000 [00:09<00:08, 4162.58ex/s]

 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 39176/75000 [00:09<00:09, 3901.00ex/s]

 53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 39589/75000 [00:10<00:08, 3963.41ex/s]

 53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 40000/75000 [00:10<00:08, 3905.76ex/s]

 54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 40450/75000 [00:10<00:08, 4071.72ex/s]

 55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 40900/75000 [00:10<00:08, 4194.12ex/s]

 55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 41322/75000 [00:10<00:08, 4126.58ex/s]

 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 41775/75000 [00:10<00:07, 4241.98ex/s]

 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 42201/75000 [00:10<00:07, 4165.58ex/s]

 57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 42649/75000 [00:10<00:07, 4254.61ex/s]

 57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 43076/75000 [00:10<00:07, 4160.12ex/s]

 58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 43527/75000 [00:11<00:07, 4260.61ex/s]

 59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 43967/75000 [00:11<00:07, 4301.12ex/s]

 59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 44398/75000 [00:11<00:07, 4080.28ex/s]

 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 44823/75000 [00:11<00:07, 4128.34ex/s]

 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 45238/75000 [00:11<00:07, 3878.34ex/s]

 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 45710/75000 [00:11<00:07, 4111.42ex/s]

 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 46126/75000 [00:11<00:07, 4047.50ex/s]

 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 46534/75000 [00:11<00:07, 4011.01ex/s]

 63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 46938/75000 [00:11<00:07, 3895.28ex/s]

 63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 47330/75000 [00:11<00:07, 3633.71ex/s]

 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 47728/75000 [00:12<00:07, 3727.87ex/s]

 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 48105/75000 [00:12<00:07, 3596.69ex/s]

 65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 48482/75000 [00:12<00:07, 3643.25ex/s]

 65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 48862/75000 [00:12<00:07, 3686.00ex/s]

 66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 49233/75000 [00:12<00:07, 3481.97ex/s]

 66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 49627/75000 [00:12<00:07, 3609.72ex/s]

 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 50000/75000 [00:12<00:07, 3514.32ex/s]

 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 50405/75000 [00:12<00:06, 3665.57ex/s]

 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 50824/75000 [00:12<00:06, 3814.32ex/s]

 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 51208/75000 [00:13<00:06, 3636.64ex/s]

 69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 51602/75000 [00:13<00:06, 3719.79ex/s]

 69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 52000/75000 [00:13<00:06, 3581.86ex/s]

 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 52386/75000 [00:13<00:06, 3659.41ex/s]

 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 52771/75000 [00:13<00:05, 3711.55ex/s]

 71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 53145/75000 [00:13<00:06, 3632.16ex/s]

 71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 53529/75000 [00:13<00:05, 3690.10ex/s]

 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 53921/75000 [00:13<00:05, 3755.28ex/s]

 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 54298/75000 [00:13<00:05, 3556.64ex/s]

 73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 54688/75000 [00:14<00:05, 3652.32ex/s]

 73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 55056/75000 [00:14<00:05, 3505.39ex/s]

 74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 55480/75000 [00:14<00:05, 3712.91ex/s]

 75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 55888/75000 [00:14<00:05, 3818.52ex/s]

 75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 56273/75000 [00:14<00:05, 3607.08ex/s]

 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 56661/75000 [00:14<00:04, 3681.55ex/s]

 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 57033/75000 [00:14<00:05, 3425.11ex/s]

 77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 57443/75000 [00:14<00:04, 3610.88ex/s]

 77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 57853/75000 [00:14<00:04, 3747.02ex/s]

 78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 58233/75000 [00:14<00:04, 3692.29ex/s]

 78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 58632/75000 [00:15<00:04, 3775.70ex/s]

 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 59013/75000 [00:15<00:04, 3583.82ex/s]

 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 59388/75000 [00:15<00:04, 3628.79ex/s]

 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 59754/75000 [00:15<00:04, 3601.19ex/s]

 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 60116/75000 [00:15<00:04, 3398.25ex/s]

 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 60504/75000 [00:15<00:04, 3533.02ex/s]

 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 60861/75000 [00:15<00:04, 3513.10ex/s]

 82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 61215/75000 [00:15<00:04, 3165.72ex/s]

 82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 61570/75000 [00:15<00:04, 3269.34ex/s]

 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 61904/75000 [00:16<00:04, 3220.08ex/s]

 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 62231/75000 [00:16<00:04, 3042.86ex/s]

 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 62622/75000 [00:16<00:03, 3279.46ex/s]

 84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 62980/75000 [00:16<00:03, 3363.81ex/s]

 84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 63321/75000 [00:16<00:03, 3056.91ex/s]

 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 63658/75000 [00:16<00:03, 3141.37ex/s]

 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 64000/75000 [00:16<00:03, 3014.19ex/s]

 86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 64356/75000 [00:16<00:03, 3160.70ex/s]

 86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 64740/75000 [00:16<00:03, 3349.60ex/s]

 87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 65081/75000 [00:17<00:02, 3316.03ex/s]

 87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 65448/75000 [00:17<00:02, 3416.53ex/s]

 88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 65830/75000 [00:17<00:02, 3531.51ex/s]

 88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 66186/75000 [00:17<00:02, 3399.00ex/s]

 89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 66573/75000 [00:17<00:02, 3531.20ex/s]

 89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 66956/75000 [00:17<00:02, 3615.66ex/s]

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 67320/75000 [00:17<00:02, 3573.42ex/s]

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 67750/75000 [00:17<00:01, 3784.64ex/s]

 91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 68130/75000 [00:17<00:01, 3549.14ex/s]

 91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 68489/75000 [00:18<00:02, 2626.13ex/s]

 92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 68924/75000 [00:18<00:02, 3020.26ex/s]

 92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 69305/75000 [00:18<00:01, 3214.89ex/s]

 93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 69752/75000 [00:18<00:01, 3541.29ex/s]

 94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 70133/75000 [00:18<00:01, 3400.84ex/s]

 94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 70561/75000 [00:18<00:01, 3634.89ex/s]

 95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 71000/75000 [00:18<00:01, 3734.88ex/s]

 95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 71439/75000 [00:18<00:00, 3916.07ex/s]

 96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 71871/75000 [00:18<00:00, 4028.46ex/s]

 96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 72282/75000 [00:19<00:00, 3855.60ex/s]

 97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 72683/75000 [00:19<00:00, 3898.27ex/s]

 97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 73078/75000 [00:19<00:00, 3619.08ex/s]

 98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 73447/75000 [00:19<00:00, 3152.33ex/s]

 98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 73776/75000 [00:19<00:00, 3179.53ex/s]

 99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 74104/75000 [00:19<00:00, 3130.83ex/s]

 99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 74511/75000 [00:19<00:00, 3381.83ex/s]

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 74925/75000 [00:19<00:00, 3591.74ex/s]

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 75000/75000 [00:19<00:00, 3763.87ex/s]




  0%|          | 0/69825 [00:00<?, ?ex/s]

  1%|          | 357/69825 [00:00<00:19, 3569.31ex/s]

  1%|          | 772/69825 [00:00<00:17, 3909.62ex/s]

  2%|‚ñè         | 1163/69825 [00:00<00:18, 3794.41ex/s]

  2%|‚ñè         | 1601/69825 [00:00<00:16, 4019.06ex/s]

  3%|‚ñé         | 2004/69825 [00:00<00:17, 3878.45ex/s]

  3%|‚ñé         | 2438/69825 [00:00<00:16, 4028.94ex/s]

  4%|‚ñç         | 2871/69825 [00:00<00:16, 4124.33ex/s]

  5%|‚ñç         | 3285/69825 [00:00<00:17, 3891.01ex/s]

  5%|‚ñå         | 3712/69825 [00:00<00:16, 4002.00ex/s]

  6%|‚ñå         | 4115/69825 [00:01<00:16, 3905.40ex/s]

  7%|‚ñã         | 4555/69825 [00:01<00:16, 4047.66ex/s]

  7%|‚ñã         | 4979/69825 [00:01<00:15, 4103.78ex/s]

  8%|‚ñä         | 5391/69825 [00:01<00:16, 3920.06ex/s]

  8%|‚ñä         | 5803/69825 [00:01<00:16, 3977.10ex/s]

  9%|‚ñâ         | 6203/69825 [00:01<00:16, 3860.25ex/s]

  9%|‚ñâ         | 6632/69825 [00:01<00:15, 3981.93ex/s]

 10%|‚ñà         | 7033/69825 [00:01<00:16, 3904.70ex/s]

 11%|‚ñà         | 7478/69825 [00:01<00:15, 4059.42ex/s]

 11%|‚ñà‚ñè        | 7899/69825 [00:01<00:15, 4102.61ex/s]

 12%|‚ñà‚ñè        | 8311/69825 [00:02<00:15, 3986.07ex/s]

 13%|‚ñà‚ñé        | 8753/69825 [00:02<00:14, 4111.15ex/s]

 13%|‚ñà‚ñé        | 9166/69825 [00:02<00:15, 4012.13ex/s]

 14%|‚ñà‚ñç        | 9637/69825 [00:02<00:14, 4213.04ex/s]

 14%|‚ñà‚ñç        | 10060/69825 [00:02<00:14, 4064.67ex/s]

 15%|‚ñà‚ñç        | 10469/69825 [00:02<00:16, 3579.37ex/s]

 16%|‚ñà‚ñå        | 10838/69825 [00:02<00:17, 3351.21ex/s]

 16%|‚ñà‚ñå        | 11183/69825 [00:02<00:18, 3194.95ex/s]

 16%|‚ñà‚ñã        | 11509/69825 [00:03<00:18, 3204.39ex/s]

 17%|‚ñà‚ñã        | 11869/69825 [00:03<00:17, 3309.47ex/s]

 17%|‚ñà‚ñã        | 12205/69825 [00:03<00:17, 3242.15ex/s]

 18%|‚ñà‚ñä        | 12551/69825 [00:03<00:17, 3301.86ex/s]

 18%|‚ñà‚ñä        | 12912/69825 [00:03<00:16, 3389.25ex/s]

 19%|‚ñà‚ñâ        | 13254/69825 [00:03<00:17, 3176.51ex/s]

 19%|‚ñà‚ñâ        | 13599/69825 [00:03<00:17, 3251.92ex/s]

 20%|‚ñà‚ñâ        | 13928/69825 [00:03<00:17, 3245.46ex/s]

 20%|‚ñà‚ñà        | 14255/69825 [00:03<00:17, 3191.04ex/s]

 21%|‚ñà‚ñà        | 14613/69825 [00:03<00:16, 3301.30ex/s]

 21%|‚ñà‚ñà‚ñè       | 14991/69825 [00:04<00:15, 3440.01ex/s]

 22%|‚ñà‚ñà‚ñè       | 15337/69825 [00:04<00:17, 3166.07ex/s]

 22%|‚ñà‚ñà‚ñè       | 15691/69825 [00:04<00:16, 3269.69ex/s]

 23%|‚ñà‚ñà‚ñé       | 16023/69825 [00:04<00:17, 3147.06ex/s]

 23%|‚ñà‚ñà‚ñé       | 16404/69825 [00:04<00:16, 3332.26ex/s]

 24%|‚ñà‚ñà‚ñç       | 16781/69825 [00:04<00:15, 3456.59ex/s]

 25%|‚ñà‚ñà‚ñç       | 17130/69825 [00:04<00:15, 3434.98ex/s]

 25%|‚ñà‚ñà‚ñå       | 17547/69825 [00:04<00:14, 3648.59ex/s]

 26%|‚ñà‚ñà‚ñå       | 17945/69825 [00:04<00:13, 3744.09ex/s]

 26%|‚ñà‚ñà‚ñå       | 18322/69825 [00:05<00:14, 3514.74ex/s]

 27%|‚ñà‚ñà‚ñã       | 18704/69825 [00:05<00:14, 3598.49ex/s]

 27%|‚ñà‚ñà‚ñã       | 19068/69825 [00:05<00:15, 3276.19ex/s]

 28%|‚ñà‚ñà‚ñä       | 19452/69825 [00:05<00:14, 3426.95ex/s]

 28%|‚ñà‚ñà‚ñä       | 19875/69825 [00:05<00:13, 3651.35ex/s]

 29%|‚ñà‚ñà‚ñâ       | 20247/69825 [00:05<00:14, 3388.71ex/s]

 29%|‚ñà‚ñà‚ñâ       | 20594/69825 [00:05<00:14, 3365.75ex/s]

 30%|‚ñà‚ñà‚ñà       | 20984/69825 [00:05<00:13, 3513.98ex/s]

 31%|‚ñà‚ñà‚ñà       | 21341/69825 [00:05<00:14, 3349.86ex/s]

 31%|‚ñà‚ñà‚ñà       | 21735/69825 [00:06<00:13, 3512.30ex/s]

 32%|‚ñà‚ñà‚ñà‚ñè      | 22091/69825 [00:06<00:13, 3446.27ex/s]

 32%|‚ñà‚ñà‚ñà‚ñè      | 22439/69825 [00:06<00:13, 3424.23ex/s]

 33%|‚ñà‚ñà‚ñà‚ñé      | 22820/69825 [00:06<00:13, 3533.24ex/s]

 33%|‚ñà‚ñà‚ñà‚ñé      | 23176/69825 [00:06<00:13, 3393.26ex/s]

 34%|‚ñà‚ñà‚ñà‚ñé      | 23534/69825 [00:06<00:13, 3444.92ex/s]

 34%|‚ñà‚ñà‚ñà‚ñç      | 23889/69825 [00:06<00:13, 3472.90ex/s]

 35%|‚ñà‚ñà‚ñà‚ñç      | 24238/69825 [00:06<00:13, 3353.07ex/s]

 35%|‚ñà‚ñà‚ñà‚ñå      | 24676/69825 [00:06<00:12, 3646.42ex/s]

 36%|‚ñà‚ñà‚ñà‚ñå      | 25044/69825 [00:06<00:12, 3577.82ex/s]

 36%|‚ñà‚ñà‚ñà‚ñã      | 25459/69825 [00:07<00:11, 3741.11ex/s]

 37%|‚ñà‚ñà‚ñà‚ñã      | 25870/69825 [00:07<00:11, 3847.24ex/s]

 38%|‚ñà‚ñà‚ñà‚ñä      | 26257/69825 [00:07<00:12, 3575.06ex/s]

 38%|‚ñà‚ñà‚ñà‚ñä      | 26620/69825 [00:07<00:12, 3567.69ex/s]

 39%|‚ñà‚ñà‚ñà‚ñä      | 27000/69825 [00:07<00:12, 3429.47ex/s]

 39%|‚ñà‚ñà‚ñà‚ñâ      | 27360/69825 [00:07<00:12, 3476.48ex/s]

 40%|‚ñà‚ñà‚ñà‚ñâ      | 27711/69825 [00:07<00:13, 3138.01ex/s]

 40%|‚ñà‚ñà‚ñà‚ñà      | 28032/69825 [00:07<00:13, 3024.05ex/s]

 41%|‚ñà‚ñà‚ñà‚ñà      | 28390/69825 [00:07<00:13, 3171.70ex/s]

 41%|‚ñà‚ñà‚ñà‚ñà      | 28738/69825 [00:08<00:12, 3254.70ex/s]

 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 29068/69825 [00:08<00:12, 3147.25ex/s]

 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 29428/69825 [00:08<00:12, 3273.83ex/s]

 43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 29778/69825 [00:08<00:12, 3335.25ex/s]

 43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 30115/69825 [00:08<00:12, 3117.54ex/s]

 44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 30483/69825 [00:08<00:12, 3273.06ex/s]

 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 30865/69825 [00:08<00:11, 3426.68ex/s]

 45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 31212/69825 [00:08<00:11, 3273.79ex/s]

 45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 31611/69825 [00:08<00:10, 3474.84ex/s]

 46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 32000/69825 [00:09<00:11, 3378.85ex/s]

 46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 32352/69825 [00:09<00:10, 3417.59ex/s]

 47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 32714/69825 [00:09<00:10, 3475.04ex/s]

 47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 33064/69825 [00:09<00:11, 3290.07ex/s]

 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 33424/69825 [00:09<00:10, 3375.84ex/s]

 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 33800/69825 [00:09<00:10, 3483.91ex/s]

 49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 34151/69825 [00:09<00:11, 3202.98ex/s]

 49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 34507/69825 [00:09<00:10, 3300.06ex/s]

 50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 34851/69825 [00:09<00:10, 3338.41ex/s]

 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 35189/69825 [00:10<00:10, 3166.34ex/s]

 51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 35554/69825 [00:10<00:10, 3301.22ex/s]

 51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 35941/69825 [00:10<00:09, 3460.96ex/s]

 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 36291/69825 [00:10<00:09, 3393.22ex/s]

 53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 36667/69825 [00:10<00:09, 3497.49ex/s]

 53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 37019/69825 [00:10<00:09, 3328.17ex/s]

 54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 37381/69825 [00:10<00:09, 3409.10ex/s]

 54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 37759/69825 [00:10<00:09, 3513.19ex/s]

 55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 38113/69825 [00:10<00:09, 3361.91ex/s]

 55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 38566/69825 [00:10<00:08, 3692.50ex/s]

 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 39000/69825 [00:11<00:08, 3772.12ex/s]

 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 39449/69825 [00:11<00:07, 3975.72ex/s]

 57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 39876/69825 [00:11<00:07, 4060.54ex/s]

 58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 40285/69825 [00:11<00:07, 3936.25ex/s]

 58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 40724/69825 [00:11<00:07, 4066.86ex/s]

 59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 41133/69825 [00:11<00:07, 3878.84ex/s]

 59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 41524/69825 [00:11<00:07, 3867.95ex/s]

 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 41913/69825 [00:11<00:07, 3863.65ex/s]

 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 42301/69825 [00:11<00:07, 3755.09ex/s]

 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 42755/69825 [00:12<00:06, 3979.08ex/s]

 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 43155/69825 [00:12<00:06, 3939.13ex/s]

 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 43606/69825 [00:12<00:06, 4104.99ex/s]

 63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 44018/69825 [00:12<00:06, 3979.10ex/s]

 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 44418/69825 [00:12<00:07, 3436.81ex/s]

 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 44810/69825 [00:12<00:07, 3563.35ex/s]

 65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 45199/69825 [00:12<00:06, 3651.61ex/s]

 65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 45644/69825 [00:12<00:06, 3874.31ex/s]

 66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 46040/69825 [00:12<00:06, 3845.78ex/s]

 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 46467/69825 [00:12<00:05, 3966.90ex/s]

 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 46882/69825 [00:13<00:05, 4018.41ex/s]

 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 47287/69825 [00:13<00:05, 3932.54ex/s]

 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 47718/69825 [00:13<00:05, 4039.89ex/s]

 69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 48125/69825 [00:13<00:05, 3913.87ex/s]

 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 48582/69825 [00:13<00:05, 4101.17ex/s]

 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 49000/69825 [00:13<00:05, 4041.55ex/s]

 71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 49413/69825 [00:13<00:05, 4064.83ex/s]

 71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 49849/69825 [00:13<00:04, 4149.98ex/s]

 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 50266/69825 [00:13<00:04, 4057.48ex/s]

 73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 50735/69825 [00:14<00:04, 4241.73ex/s]

 73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 51161/69825 [00:14<00:04, 3769.59ex/s]

 74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 51549/69825 [00:14<00:05, 3601.94ex/s]

 74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 52000/69825 [00:14<00:04, 3700.63ex/s]

 75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 52447/69825 [00:14<00:04, 3905.70ex/s]

 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 52861/69825 [00:14<00:04, 3969.22ex/s]

 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 53263/69825 [00:14<00:04, 3841.98ex/s]

 77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 53684/69825 [00:14<00:04, 3943.48ex/s]

 77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 54082/69825 [00:14<00:04, 3925.98ex/s]

 78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 54534/69825 [00:15<00:03, 4096.10ex/s]

 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 54985/69825 [00:15<00:03, 4214.86ex/s]

 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 55409/69825 [00:15<00:03, 4124.02ex/s]

 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 55853/69825 [00:15<00:03, 4213.07ex/s]

 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 56276/69825 [00:15<00:03, 4006.23ex/s]

 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 56703/69825 [00:15<00:03, 4080.09ex/s]

 82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 57114/69825 [00:15<00:03, 3973.73ex/s]

 82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 57537/69825 [00:15<00:03, 4046.85ex/s]

 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 57990/69825 [00:15<00:02, 4185.22ex/s]

 84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 58411/69825 [00:15<00:02, 3982.22ex/s]

 84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 58835/69825 [00:16<00:02, 4055.03ex/s]

 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 59243/69825 [00:16<00:02, 3896.34ex/s]

 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 59641/69825 [00:16<00:02, 3918.92ex/s]

 86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 60035/69825 [00:16<00:03, 2912.15ex/s]

 87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 60481/69825 [00:16<00:02, 3273.49ex/s]

 87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 60908/69825 [00:16<00:02, 3522.95ex/s]

 88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 61291/69825 [00:16<00:02, 3559.83ex/s]

 88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 61735/69825 [00:16<00:02, 3795.68ex/s]

 89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 62133/69825 [00:17<00:02, 3765.39ex/s]

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 62591/69825 [00:17<00:01, 3991.90ex/s]

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 63001/69825 [00:17<00:01, 3870.65ex/s]

 91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 63422/69825 [00:17<00:01, 3965.70ex/s]

 91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 63850/69825 [00:17<00:01, 4055.43ex/s]

 92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 64261/69825 [00:17<00:01, 3869.30ex/s]

 93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 64664/69825 [00:17<00:01, 3912.33ex/s]

 93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 65059/69825 [00:17<00:01, 3831.47ex/s]

 94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 65470/69825 [00:17<00:01, 3909.89ex/s]

 94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 65864/69825 [00:17<00:01, 3767.45ex/s]

 95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 66244/69825 [00:18<00:01, 3522.86ex/s]

 95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 66650/69825 [00:18<00:00, 3670.68ex/s]

 96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 67022/69825 [00:18<00:00, 3326.71ex/s]

 97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 67408/69825 [00:18<00:00, 3468.02ex/s]

 97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 67763/69825 [00:18<00:00, 3435.64ex/s]

 98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 68112/69825 [00:18<00:00, 3077.21ex/s]

 98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 68552/69825 [00:18<00:00, 3424.20ex/s]

 99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 69000/69825 [00:18<00:00, 3559.59ex/s]

 99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 69434/69825 [00:19<00:00, 3771.08ex/s]

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 69825/69825 [00:19<00:00, 3656.02ex/s]




In [20]:
len(train_dataset), len(eval_dataset)

(75000, 69825)

In [21]:
k = random.randrange(len(train_dataset))
print(tokenizer.decode(train_dataset['input_ids'][k]), train_dataset['labels'][k])
k = random.randrange(len(eval_dataset))
print(tokenizer.decode(eval_dataset['input_ids'][k]), eval_dataset['labels'][k])

[CLS] < < ÌîºÎ∂ÄÏû¨ÏÉùÏóê Ï¢ãÏùÄ ÏÑ±Î∂ÑÎì§Îßå ÏèôÏèô! > > [SEP] ÏÉÅÌíàÌèâ Î¨∏Ïû•Ïùò Î≤îÏ£º Ïú†ÌòïÏùÄ < < Ï†úÌíà Ï†ÑÏ≤¥ # Ïù∏ÏßÄÎèÑ > > Ïù¥Îã§. [SEP] 1
[CLS] < < Ïã†Ï≤¥Ïùò Í≥°ÏÑ†ÏùÑ ÎπàÌãàÏóÜÏù¥ ÏßÄÏßÄÌï¥ Ï£ºÎäî ÏÑºÏä§! > > [SEP] ÏÉÅÌíàÌèâ Î¨∏Ïû•Ïùò Î≤îÏ£º Ïú†ÌòïÏùÄ < < Î≥∏Ìíà # Ìé∏ÏùòÏÑ± > > Ïù¥Îã§. [SEP] 1


# Load Trainer

In [22]:
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    report_to=report_to,

    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,

    optim=optim,

    learning_rate=learning_rate,
    weight_decay=weight_decay,
    adam_epsilon=adam_epsilon,

    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,

    save_total_limit=save_total_limit,

    load_best_model_at_end=load_best_model_at_end,
    metric_for_best_model=metric_for_best_model,
    
    save_strategy=save_strategy,
    evaluation_strategy=evaluation_strategy,

    logging_strategy=logging_strategy,
    logging_first_step=logging_first_step, 
    logging_steps=logging_steps,
    
    fp16=fp16,
)

In [23]:
# es = EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)

In [24]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    # callbacks=[es],
)

# Run Trainer

In [25]:
trainer.train()
wandb.finish()

The following columns in the training set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 75000


  Num Epochs = 10


  Instantaneous batch size per device = 25


  Total train batch size (w. parallel, distributed & accumulation) = 100


  Gradient Accumulation steps = 1


  Total optimization steps = 7500


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




Epoch,Training Loss,Validation Loss,Accuracy,F1 True,F1 False,F1 Macro,F1 Micro
1,0.1841,0.113365,0.963122,0.403797,0.980973,0.692385,0.963122
2,0.1045,0.087828,0.966473,0.566562,0.982562,0.774562,0.966473
3,0.0921,0.080214,0.968865,0.614402,0.983778,0.79909,0.968865
4,0.0801,0.07907,0.969338,0.66225,0.98394,0.823095,0.969338
5,0.0756,0.076188,0.973348,0.670211,0.986113,0.828162,0.973348
6,0.0706,0.072567,0.973477,0.678025,0.986169,0.832097,0.973477
7,0.0686,0.073722,0.973262,0.684682,0.986039,0.83536,0.973262
8,0.0664,0.073315,0.974006,0.685605,0.986443,0.836024,0.974006
9,0.0638,0.073531,0.97382,0.687521,0.986338,0.83693,0.97382
10,0.0645,0.073687,0.973992,0.688187,0.98643,0.837308,0.973992


The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-750/special_tokens_map.json




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-1500


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-1500/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-1500/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-1500/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-1500/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-750] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-2250


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-2250/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-2250/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-2250/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-2250/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-1500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3000


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3000/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3000/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3000/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3000/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-2250] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3750/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-4500


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-4500/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-4500/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-4500/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-4500/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-3750] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-5250


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-5250/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-5250/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-5250/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-5250/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-4500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6000


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6000/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6000/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6000/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6000/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-5250] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6750


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6750/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6750/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6750/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6750/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: id, sentence_form, entity_property. If id, sentence_form, entity_property are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 69825


  Batch size = 100


Saving model checkpoint to snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-7500


Configuration saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-7500/config.json


Model weights saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-7500/pytorch_model.bin


tokenizer config file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-7500/tokenizer_config.json


Special tokens file saved in snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-7500/special_tokens_map.json


Deleting older checkpoint [snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-6750] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from snunlp_kr_electra_discriminator_uncleaned_v1/checkpoint-7500 (score: 0.8373084769085921).


Saving model checkpoint to /tmp/tmp_du6zcam


Configuration saved in /tmp/tmp_du6zcam/config.json


Model weights saved in /tmp/tmp_du6zcam/pytorch_model.bin


tokenizer config file saved in /tmp/tmp_du6zcam/tokenizer_config.json


Special tokens file saved in /tmp/tmp_du6zcam/special_tokens_map.json


0,1
eval/accuracy,‚ñÅ‚ñÉ‚ñÖ‚ñÖ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
eval/f1_false,‚ñÅ‚ñÉ‚ñÖ‚ñÖ‚ñà‚ñà‚ñá‚ñà‚ñà‚ñà
eval/f1_macro,‚ñÅ‚ñÖ‚ñÜ‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
eval/f1_micro,‚ñÅ‚ñÉ‚ñÖ‚ñÖ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
eval/f1_true,‚ñÅ‚ñÖ‚ñÜ‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
eval/loss,‚ñà‚ñÑ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
eval/runtime,‚ñÖ‚ñÜ‚ñÖ‚ñÅ‚ñÑ‚ñÜ‚ñÜ‚ñà‚ñÜ‚ñá
eval/samples_per_second,‚ñÑ‚ñÉ‚ñÑ‚ñà‚ñÖ‚ñÉ‚ñÉ‚ñÅ‚ñÉ‚ñÇ
eval/steps_per_second,‚ñÑ‚ñÉ‚ñÑ‚ñà‚ñÖ‚ñÉ‚ñÉ‚ñÅ‚ñÉ‚ñÇ
train/epoch,‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà

0,1
eval/accuracy,0.97399
eval/f1_false,0.98643
eval/f1_macro,0.83731
eval/f1_micro,0.97399
eval/f1_true,0.68819
eval/loss,0.07369
eval/runtime,236.4697
eval/samples_per_second,295.281
eval/steps_per_second,2.956
train/epoch,10.0


In [26]:
keep = [
    'added_tokens.json',
    'config.json',
    'pytorch_model.bin',
    'special_tokens_map.json',
    'tokenizer.json',
    'tokenizer_config.json',
    'vocab.txt'
]

ckpts = os.listdir(run_name)
for ckpt in ckpts:
    ckpt = os.path.join(run_name, ckpt)
    for item in os.listdir(ckpt):
        if item not in keep:
            os.remove(os.path.join(ckpt, item))

!mv wandb {run_name} {SAVE_PATH}/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
