In [1]:
# !git clone https://github.com/SKTBrain/KoBERT.git
# %cd KoBERT
# !pip install -r requirements.txt
# !pip install .

# Import Libraries and Set Path

In [1]:
from __future__ import absolute_import, division, print_function

import csv
import os
import sys
import logging

import pandas as pd
import numpy as np

import pickle

from sklearn.metrics import f1_score, confusion_matrix
from tqdm import tqdm

import torch
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, TensorDataset)
from torch.nn import CrossEntropyLoss, MSELoss
import torch.nn.functional as F
import torch.nn as nn
from torch.optim import Adam, lr_scheduler

from tqdm import tqdm_notebook, trange

from transformers import AdamW, get_linear_schedule_with_warmup

""" KoBERT Model and Tokenizer """
from kobert.pytorch_kobert import get_pytorch_kobert_model
from gluonnlp.data import SentencepieceTokenizer
from kobert.utils import get_tokenizer

from multiprocessing import Pool, cpu_count

import logging
logger = logging.getLogger()

# Increase CSV reader's field limit incase we have long text.
csv.field_size_limit(2147483647)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Load Kobert model and Vocab.
kobert, vocab = get_pytorch_kobert_model()

cuda
using cached model
using cached model


In [2]:
# File Paths
DATA_DIR = './'
TRAIN_FILE = os.path.join(DATA_DIR, 'train.csv')
TEST_FILE = os.path.join(DATA_DIR, 'public_test.csv')

In [3]:
# Bert Hyper Parameters
NUM_CLASSES = 2

MAX_SEQ_LEN = 512

EPOCHS = 2
BATCH_SIZE = 6
ACCUMUL_STEPS =4

LEARNING_RATE = 2e-5

RANDOM_SEED = 1234
LOG_INTERVAL = 200

# LSTM hyperparameters
INPUT_SIZE = 768
HIDDEN_SIZE = 768
NUM_LAYERS = 1

In [4]:
def clean_text(df):
    df['text'] = df['text'].str.replace('XX+', '', regex=True)
    # df['text'] = df['text'].str.replace(r'[,.():!?]', ' ')
    df['text'] = df['text'].str.replace('[', '')
    df['text'] = df['text'].str.replace(']', '')
    df['text'] = df['text'].str.replace('(', '')
    df['text'] = df['text'].str.replace(')', '')
    df['text'] = df['text'].str.replace('?', '')
    df['text'] = df['text'].str.replace('!', '')
    df['text'] = df['text'].str.replace('광고', '')
    return df

def convert_lines(lines, max_seq_len):
    max_seq_len -= 2
    longer = 0
    all_token_ids = []
    for line in lines:
        tokens = tokenizer(line)
        if len(tokens) > max_seq_len:
            tokens = tokens[:max_seq_len]
            longer += 1
        tokens = ['[CLS]'] + tokens + ['[SEP]'] + [0] * (max_seq_len - len(tokens))
        token_ids = [vocab.token_to_idx[token] for token in tokens]
        all_token_ids.append(token_ids)
    
    print("{:d} setences are trimmed".format(longer))
    return np.array(all_token_ids)

In [5]:
df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
df =  clean_text(df)
# de
train_df = df 
# sms_df = df[df['smishing'] == 1]
# nsms_df = df[df['smishing'] == 0]

# train_sms_df = sms_df.sample(frac=0.8, random_state=1234)
# dev_sms_df = sms_df.drop(train_sms_df.index)

# train_nsms_df = nsms_df.sample(frac=0.8, random_state=1234)
# dev_nsms_df = nsms_df.drop(train_nsms_df.index)

# train_df = pd.concat([train_sms_df, train_nsms_df])
# dev_df = pd.concat([dev_sms_df, dev_nsms_df])

In [6]:
# Load pre-trained model tokenizer (vocabulary)
""" From KoBert Documents.
tok_path = get_tokenizer()
sp = SentencepieceTokenizeer(tok_path)
sp('한국어 모델을 공유합니다.')
"""

tokenizer = SentencepieceTokenizer(get_tokenizer())

using cached model


In [7]:
test_df = pd.read_csv(os.path.join(DATA_DIR, 'public_test.csv'))
test_df = clean_text(test_df)
test_df['smishing'] = 2

In [11]:
test_x = convert_lines(test_df['text'].tolist(),  MAX_SEQ_LEN)
np.save(os.path.join(DATA_DIR, 'new_test_sequences.npy'), test_x)

test_y = test_df['smishing'].values
np.save(os.path.join(DATA_DIR, 'new_test_labels.npy'), test_y)

39 setences are trimmed


In [9]:
train_x = convert_lines(train_df['text'].tolist(),  MAX_SEQ_LEN)
np.save(os.path.join(DATA_DIR, 'new_train_sequences.npy'), train_x)

train_y = train_df['smishing'].values
np.save(os.path.join(DATA_DIR, 'new_train_labels.npy'), train_y)

4752 setences are trimmed


In [8]:
train_x = np.load(os.path.join(DATA_DIR, 'new_train_sequences.npy'))
train_y = np.load(os.path.join(DATA_DIR, 'new_train_labels.npy'))

In [11]:
# dev_x = convert_lines(dev_df['text'].tolist(),  MAX_SEQ_LEN)
# np.save(os.path.join(DATA_DIR, 'new_dev_sequences.npy'), dev_x)

# dev_y = dev_df['smishing'].values
# np.save(os.path.join(DATA_DIR, 'new_dev_labels.npy'), dev_y)

In [12]:
# dev_x = np.load(os.path.join(DATA_DIR, 'new_dev_sequences.npy'))
# dev_y = np.load(os.path.join(DATA_DIR, 'new_dev_labels.npy'))

In [9]:
train_dataset = torch.utils.data.TensorDataset(torch.tensor(train_x, dtype=torch.long), torch.tensor(train_y, dtype=torch.float))
# dev_dataset = torch.utils.data.TensorDataset(torch.tensor(dev_x, dtype=torch.long), torch.tensor(dev_y, dtype=torch.float))

In [12]:
test_dataset = torch.utils.data.TensorDataset(torch.tensor(test_x, dtype=torch.long), torch.tensor(test_y, dtype=torch.float))

# Define Models

In [13]:
class KobertLinear(nn.Module):
    def __init__(self, kobert):
        super(KobertLinear, self).__init__()
        self.kobert = kobert
        self.dropout = nn.Dropout(kobert.config.hidden_dropout_prob)
        self.classifier = nn.Linear(in_features=768, out_features=NUM_CLASSES)
    
    def forward(self, input_ids, token_type_ids=None, attention_mask=None):
        hidden_states, pooler_output = self.kobert(input_ids, token_type_ids, attention_mask)
        last_hidden_state = hidden_states[-1]
#         print(last_hidden_state.shape)
        pooler_output = self.dropout(pooler_output)
#         print(pooler_output.shape)
        logits = self.classifier(pooler_output)
        return logits, last_hidden_state

class KobertLinearPool(nn.Module):
    def __init__(self, kobert):
        super(KobertLinearPool, self).__init__()
        self.kobert = kobert
        self.dropout = nn.Dropout(kobert.config.hidden_dropout_prob)
        self.classifier = nn.Linear(in_features=768, out_features=NUM_CLASSES) 
    
    def forward(self, input_ids, token_type_ids=None, attention_mask=None):
        hidden_states, pooler_output = self.kobert(input_ids, token_type_ids, attention_mask)
        last_hidden_state = hidden_states[-1]
        last_hidden_state = last_hidden_state.permute(0, 2, 1)
        last_hidden_state_pooled = F.max_pool1d(last_hidden_state, kernel_size=last_hidden_state.size()[-1])
        last_hidden_state_pooled = last_hidden_state_pooled.squeeze()
        last_hidden_state_pooled = self.dropout(last_hidden_state_pooled)
        logits = self.classifier(last_hidden_state_pooled)
        return logits, last_hidden_state

class KobertLinearAvg(nn.Module):
    def __init__(self, kobert):
        super(KobertLinearAvg, self).__init__()
        self.kobert = kobert
        self.dropout = nn.Dropout(kobert.config.hidden_dropout_prob)
        self.classifier = nn.Linear(in_features=768, out_features=NUM_CLASSES)
    
    def forward(self, input_ids, token_type_ids=None, attention_mask=None):
        last_hidden_states, pooler_output = self.kobert(input_ids, token_type_ids, attention_mask)
        last_hidden_state_avg = torch.mean(last_hidden_states, 1)
        last_hidden_state_avg = self.dropout(last_hidden_state_avg)
        logits = self.classifier(last_hidden_state_avg)
        return logits, last_hidden_states
    
class KobertLSTM(nn.Module):
    def __init__(self, kobert):
        super(KobertLSTM, self).__init__()
        self.kobert = kobert
        self.lstm = nn.LSTM(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS,
                            batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(kobert.config.hidden_dropout_prob)
        self.classifier = nn.Linear(in_features=HIDDEN_SIZE * 2, out_features=NUM_CLASSES)
    
    def forward(self, input_ids, token_type_ids=None, attention_mask=None):
        hidden_states, pooler_output = self.kobert(input_ids, token_type_ids, attention_mask)
        last_hidden_state = hidden_states[-1]
        h_t, _ = self.lstm(last_hidden_state)
        h_t = self.dropout(h_t.view(-1, MAX_SEQ_LENGTH, 2 * HIDDEN_SIZE)[:, -1])
        logits = self.classifier(h_t)
        return logits, h_t

def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

def get_eval_report(labels, preds):
    f1 = f1_score(labels, preds)
    prc = get_precision(labels, preds)
    tn, fp, fn, tp = confusion_matrix(labels, preds).ravel()
    return {
        'f1': f1,
        'prc': prc,
        'tp': tp,
        'tn': tn,
        'fp': fp,
        'fn': fn
    }

In [14]:
MODELS = {'kobert_linear': KobertLinear(kobert),
          'kobert_linear_pool': KobertLinearPool(kobert),
          'kobert_linear_avg': KobertLinearAvg(kobert),
          'kobert_lstm': KobertLSTM(kobert)}

# Fine-tune Classifier

## Set Model Name and Related Path

In [15]:
##### ***** IMPORTANT ***** #####
######## SET MODEL NAME #########
# 'kobert_linear_simple', 'kobert_linear_avg',
# 'kobert_linear_pool', 'kobert_lstm'
MODEL_NAME = 'kobert_linear'
# MODEL_NAME = 'kobert_linear_avg'


# Weights name to load: 'xxxx.bin' 
# xxxx: floating part of the val loss
# WEIGHTS_NAME = '4851.bin'
# CONFIG_NAME = 'config.json'
ROOT_DIR ='./'
OUTPUT_DIR = os.path.join(ROOT_DIR, 'output')
REPORT_DIR = os.path.join(ROOT_DIR, 'report')

In [16]:
# Model is already loaded above

"""
Bert SenetenceCLassfication added below layers on the head
(dropout): Dropout(p=0.1)
(classifier): Linear(in_features=768, out_features=2, bias=True)
"""
model = MODELS[MODEL_NAME]

# Load weights for continuous training
# model.load_state_dict(torch.load(os.path.join(OUTPUTS_DIR, '5414.bin')))

model.to(device)

KobertLinear(
  (kobert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(8002, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True

## Set Trainable Parameters and Set Optimizer

In [17]:
num_training_steps = int(EPOCHS*len(train_dataset)/BATCH_SIZE/ACCUMUL_STEPS)
num_warmup_steps = num_training_steps * 0.05
max_grad_norm = 1.0

In [18]:
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay': 0.0}
]

In [19]:
optimizer = AdamW(optimizer_grouped_parameters, lr=LEARNING_RATE)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps)
loss_fn = CrossEntropyLoss()

## Training

In [None]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

TOTAL_STEPS = len(train_dataloader)
lowest_dev_loss = 987654321
global_step = 0

model.train()
for epoch in range(EPOCHS):
    train_acc = 0.0
    tot_loss = 0
    for step, (input_ids, labels) in enumerate(tqdm(train_dataloader)):
        input_ids = input_ids.to(device)
        labels = labels.to(torch.long).to(device)
        logits, _ = model(input_ids, torch.zeros_like(input_ids).to(device), (input_ids > 0).to(torch.long).to(device))

        loss = loss_fn(logits, labels)
        tot_loss += loss.item()
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        
        global_step += 1
        
        train_acc += calc_accuracy(logits, labels.to(torch.long))
        if (step + 1) % LOG_INTERVAL == 0:
#             torch.save(model.state_dict(), os.path.join('./output/', MODEL_NAME +'epoch_{:d}_step_{:d}.bin'.format(epoch + 1, step + 1)))
            print("epoch {}, step {}/{} loss {} train acc {}".format(epoch + 1, step + 1, TOTAL_STEPS, tot_loss / step, train_acc / (step + 1)))
            
    torch.save(model.state_dict(), os.path.join('./output/', MODEL_NAME + 'epoch_{:d}_step_{:d}.bin'.format(epoch + 1, step + 1)))
    print("epoch {} train acc {}".format(epoch + 1, train_acc / (step + 1)))    

  0%|▎                                                                           | 200/49325 [01:14<5:01:53,  2.71it/s]

epoch 1, step 200/49325 loss 0.3019466791021165 train acc 0.944166666666667


  1%|▌                                                                           | 400/49325 [02:26<4:55:40,  2.76it/s]

epoch 1, step 400/49325 loss 0.1954012405510086 train acc 0.9599999999999995


  1%|▉                                                                           | 600/49325 [03:38<4:55:45,  2.75it/s]

epoch 1, step 600/49325 loss 0.16039529500664437 train acc 0.9641666666666666


  2%|█▏                                                                          | 800/49325 [04:50<4:46:45,  2.82it/s]

epoch 1, step 800/49325 loss 0.1278242988324548 train acc 0.9708333333333339


  2%|█▌                                                                         | 1000/49325 [06:02<4:51:45,  2.76it/s]

epoch 1, step 1000/49325 loss 0.10675301128858328 train acc 0.9751666666666674


  2%|█▊                                                                         | 1200/49325 [07:13<4:48:16,  2.78it/s]

epoch 1, step 1200/49325 loss 0.0951068500467375 train acc 0.9780555555555558


  3%|██▏                                                                        | 1400/49325 [08:26<4:46:58,  2.78it/s]

epoch 1, step 1400/49325 loss 0.0870804102628319 train acc 0.9801190476190473


  3%|██▍                                                                        | 1600/49325 [09:38<4:45:30,  2.79it/s]

epoch 1, step 1600/49325 loss 0.07869423413996846 train acc 0.9821874999999995


  4%|██▋                                                                        | 1800/49325 [10:49<4:45:17,  2.78it/s]

epoch 1, step 1800/49325 loss 0.07283644254836152 train acc 0.9836111111111104


  4%|███                                                                        | 2000/49325 [12:02<4:43:19,  2.78it/s]

epoch 1, step 2000/49325 loss 0.06840921731124798 train acc 0.9846666666666658


  4%|███▎                                                                       | 2200/49325 [13:14<4:47:20,  2.73it/s]

epoch 1, step 2200/49325 loss 0.06250795826109863 train acc 0.9859848484848478


  5%|███▋                                                                       | 2400/49325 [14:26<4:45:12,  2.74it/s]

epoch 1, step 2400/49325 loss 0.05854860255593405 train acc 0.986944444444444


  5%|███▉                                                                       | 2600/49325 [15:37<4:42:09,  2.76it/s]

epoch 1, step 2600/49325 loss 0.05487357389567335 train acc 0.9878205128205125


  6%|████▎                                                                      | 2800/49325 [16:49<4:38:22,  2.79it/s]

epoch 1, step 2800/49325 loss 0.050965586316551535 train acc 0.9886904761904759


  6%|████▌                                                                      | 3000/49325 [18:01<4:38:32,  2.77it/s]

epoch 1, step 3000/49325 loss 0.04969982413041174 train acc 0.9891111111111112


  6%|████▊                                                                      | 3200/49325 [19:13<4:34:30,  2.80it/s]

epoch 1, step 3200/49325 loss 0.0472116135100681 train acc 0.9896875000000002


  7%|█████▏                                                                     | 3400/49325 [20:25<4:37:42,  2.76it/s]

epoch 1, step 3400/49325 loss 0.044439382212247 train acc 0.990294117647059


  7%|█████▍                                                                     | 3600/49325 [21:37<4:31:30,  2.81it/s]

epoch 1, step 3600/49325 loss 0.04250248358410168 train acc 0.9907407407407409


  8%|█████▊                                                                     | 3800/49325 [22:49<4:31:41,  2.79it/s]

epoch 1, step 3800/49325 loss 0.04026837075901866 train acc 0.9912280701754388


  8%|██████                                                                     | 4000/49325 [24:01<4:29:24,  2.80it/s]

epoch 1, step 4000/49325 loss 0.039049712564608874 train acc 0.991541666666667


  9%|██████▍                                                                    | 4200/49325 [25:13<4:28:53,  2.80it/s]

epoch 1, step 4200/49325 loss 0.03759544774020335 train acc 0.9918253968253969


  9%|██████▋                                                                    | 4400/49325 [26:24<4:22:47,  2.85it/s]

epoch 1, step 4400/49325 loss 0.03604500283555786 train acc 0.9921212121212121


  9%|██████▉                                                                    | 4600/49325 [27:36<4:26:41,  2.79it/s]

epoch 1, step 4600/49325 loss 0.034782466881777996 train acc 0.9923913043478259


 10%|███████▎                                                                   | 4800/49325 [28:48<4:26:26,  2.79it/s]

epoch 1, step 4800/49325 loss 0.03333405249390769 train acc 0.9927083333333332


 10%|███████▌                                                                   | 5000/49325 [30:00<4:26:40,  2.77it/s]

epoch 1, step 5000/49325 loss 0.032696341171077316 train acc 0.9928666666666662


 11%|███████▉                                                                   | 5200/49325 [31:12<4:22:05,  2.81it/s]

epoch 1, step 5200/49325 loss 0.03191830442762502 train acc 0.9930448717948714


 11%|████████▏                                                                  | 5400/49325 [32:23<4:25:59,  2.75it/s]

epoch 1, step 5400/49325 loss 0.030736840903294094 train acc 0.9933024691358021


 11%|████████▌                                                                  | 5600/49325 [33:35<4:22:07,  2.78it/s]

epoch 1, step 5600/49325 loss 0.029639483924027436 train acc 0.9935416666666663


 12%|████████▊                                                                  | 5800/49325 [34:47<4:17:56,  2.81it/s]

epoch 1, step 5800/49325 loss 0.02905408760577427 train acc 0.9937068965517236


 12%|█████████                                                                  | 6000/49325 [35:59<4:18:27,  2.79it/s]

epoch 1, step 6000/49325 loss 0.02831183347791415 train acc 0.9938888888888884


 13%|█████████▍                                                                 | 6200/49325 [37:11<4:18:35,  2.78it/s]

epoch 1, step 6200/49325 loss 0.027468654080622663 train acc 0.9940591397849456


 13%|█████████▋                                                                 | 6400/49325 [38:23<4:17:26,  2.78it/s]

epoch 1, step 6400/49325 loss 0.027076068217772885 train acc 0.9941927083333327


 13%|██████████                                                                 | 6600/49325 [39:35<4:14:39,  2.80it/s]

epoch 1, step 6600/49325 loss 0.026387716244505664 train acc 0.9943434343434338


 14%|██████████▎                                                                | 6800/49325 [40:47<4:13:14,  2.80it/s]

epoch 1, step 6800/49325 loss 0.025611998641708388 train acc 0.994509803921568


 14%|██████████▋                                                                | 7000/49325 [41:59<4:15:26,  2.76it/s]

epoch 1, step 7000/49325 loss 0.024880520914153 train acc 0.994666666666666


 15%|██████████▉                                                                | 7200/49325 [43:11<4:10:54,  2.80it/s]

epoch 1, step 7200/49325 loss 0.024189581244141445 train acc 0.9948148148148143


 15%|███████████▎                                                               | 7400/49325 [44:23<4:11:21,  2.78it/s]

epoch 1, step 7400/49325 loss 0.023972044828404027 train acc 0.9949099099099092


 15%|███████████▌                                                               | 7600/49325 [45:35<4:07:37,  2.81it/s]

epoch 1, step 7600/49325 loss 0.023517202313757066 train acc 0.9950219298245607


 16%|███████████▊                                                               | 7800/49325 [46:47<4:07:59,  2.79it/s]

epoch 1, step 7800/49325 loss 0.02291479059394324 train acc 0.9951495726495719


 16%|████████████▏                                                              | 8000/49325 [47:58<4:06:53,  2.79it/s]

epoch 1, step 8000/49325 loss 0.02275194267939031 train acc 0.9952083333333326


 17%|████████████▍                                                              | 8200/49325 [49:10<4:08:02,  2.76it/s]

epoch 1, step 8200/49325 loss 0.02219763515157946 train acc 0.9953252032520318


 17%|████████████▊                                                              | 8400/49325 [50:22<4:05:44,  2.78it/s]

epoch 1, step 8400/49325 loss 0.021984042800532767 train acc 0.9953968253968248


 17%|█████████████                                                              | 8600/49325 [51:34<4:05:06,  2.77it/s]

epoch 1, step 8600/49325 loss 0.02155321194899505 train acc 0.9954844961240306


 18%|█████████████▍                                                             | 8800/49325 [52:46<4:03:47,  2.77it/s]

epoch 1, step 8800/49325 loss 0.021253232939008087 train acc 0.9955681818181814


 18%|█████████████▋                                                             | 9000/49325 [53:58<3:54:25,  2.87it/s]

epoch 1, step 9000/49325 loss 0.020783847205201916 train acc 0.9956666666666663


 19%|█████████████▉                                                             | 9200/49325 [55:10<4:01:50,  2.77it/s]

epoch 1, step 9200/49325 loss 0.02059662799798703 train acc 0.9957246376811592


 19%|██████████████▎                                                            | 9400/49325 [56:22<3:56:18,  2.82it/s]

epoch 1, step 9400/49325 loss 0.020366293729601935 train acc 0.9957801418439715


 19%|██████████████▌                                                            | 9600/49325 [57:34<3:59:48,  2.76it/s]

epoch 1, step 9600/49325 loss 0.020047224452444074 train acc 0.9958159722222223


 20%|██████████████▉                                                            | 9800/49325 [58:45<3:56:17,  2.79it/s]

epoch 1, step 9800/49325 loss 0.01982950292088515 train acc 0.9958673469387757


 20%|███████████████                                                           | 10000/49325 [59:57<3:55:22,  2.78it/s]

epoch 1, step 10000/49325 loss 0.019504633025018165 train acc 0.995916666666667


 21%|██████████████▉                                                         | 10200/49325 [1:01:09<3:53:51,  2.79it/s]

epoch 1, step 10200/49325 loss 0.019122893635508127 train acc 0.9959967320261441


 21%|███████████████▏                                                        | 10400/49325 [1:02:21<3:49:50,  2.82it/s]

epoch 1, step 10400/49325 loss 0.01900028904256819 train acc 0.996041666666667


 21%|███████████████▍                                                        | 10600/49325 [1:03:33<3:53:38,  2.76it/s]

epoch 1, step 10600/49325 loss 0.01864254454211877 train acc 0.9961163522012583


 22%|███████████████▊                                                        | 10800/49325 [1:04:45<3:53:11,  2.75it/s]

epoch 1, step 10800/49325 loss 0.01829758041508479 train acc 0.9961882716049386


 22%|████████████████                                                        | 11000/49325 [1:05:57<3:48:13,  2.80it/s]

epoch 1, step 11000/49325 loss 0.01796510777058777 train acc 0.9962575757575761


 23%|████████████████▎                                                       | 11200/49325 [1:07:08<3:49:51,  2.76it/s]

epoch 1, step 11200/49325 loss 0.017761878249945365 train acc 0.9963095238095242


 23%|████████████████▋                                                       | 11400/49325 [1:08:20<3:48:57,  2.76it/s]

epoch 1, step 11400/49325 loss 0.01745131740718811 train acc 0.9963742690058484


 24%|████████████████▉                                                       | 11600/49325 [1:09:32<3:48:08,  2.76it/s]

epoch 1, step 11600/49325 loss 0.017310971785122087 train acc 0.9964224137931039


 24%|█████████████████▏                                                      | 11800/49325 [1:10:44<3:42:51,  2.81it/s]

epoch 1, step 11800/49325 loss 0.017123016216047122 train acc 0.9964689265536728


 24%|█████████████████▌                                                      | 12000/49325 [1:11:56<3:42:15,  2.80it/s]

epoch 1, step 12000/49325 loss 0.016839633919370146 train acc 0.9965277777777782


 25%|█████████████████▊                                                      | 12200/49325 [1:13:08<3:39:41,  2.82it/s]

epoch 1, step 12200/49325 loss 0.01656392189893159 train acc 0.9965846994535524


 25%|██████████████████                                                      | 12400/49325 [1:14:20<3:41:34,  2.78it/s]

epoch 1, step 12400/49325 loss 0.016391506543647277 train acc 0.996626344086022


 26%|██████████████████▍                                                     | 12600/49325 [1:15:31<3:40:00,  2.78it/s]

epoch 1, step 12600/49325 loss 0.01635523684604772 train acc 0.9966534391534397


 26%|██████████████████▋                                                     | 12800/49325 [1:16:46<3:38:56,  2.78it/s]

epoch 1, step 12800/49325 loss 0.016099935744133455 train acc 0.9967057291666673


 26%|██████████████████▉                                                     | 13000/49325 [1:17:58<3:37:01,  2.79it/s]

epoch 1, step 13000/49325 loss 0.01593030895287678 train acc 0.9967435897435903


 27%|███████████████████▎                                                    | 13200/49325 [1:19:10<3:34:09,  2.81it/s]

epoch 1, step 13200/49325 loss 0.01590726377509298 train acc 0.9967550505050513


 27%|███████████████████▌                                                    | 13400/49325 [1:20:22<3:36:22,  2.77it/s]

epoch 1, step 13400/49325 loss 0.01592260130271959 train acc 0.9967412935323393


 28%|███████████████████▊                                                    | 13600/49325 [1:21:34<3:31:34,  2.81it/s]

epoch 1, step 13600/49325 loss 0.01570787545368984 train acc 0.9967769607843148


 28%|████████████████████▏                                                   | 13800/49325 [1:22:46<3:33:41,  2.77it/s]

epoch 1, step 13800/49325 loss 0.015495678246916791 train acc 0.9968115942028996


 28%|████████████████████▍                                                   | 14000/49325 [1:23:57<3:32:04,  2.78it/s]

epoch 1, step 14000/49325 loss 0.01527449722746503 train acc 0.9968571428571439


 29%|████████████████████▋                                                   | 14200/49325 [1:25:09<3:30:21,  2.78it/s]

epoch 1, step 14200/49325 loss 0.015059516129447947 train acc 0.9969014084507053


 29%|█████████████████████                                                   | 14400/49325 [1:26:21<3:27:47,  2.80it/s]

epoch 1, step 14400/49325 loss 0.014850489845742096 train acc 0.9969444444444454


 30%|█████████████████████▎                                                  | 14600/49325 [1:27:32<3:28:30,  2.78it/s]

epoch 1, step 14600/49325 loss 0.014696053848424168 train acc 0.9969634703196358


 30%|█████████████████████▌                                                  | 14800/49325 [1:28:44<3:26:29,  2.79it/s]

epoch 1, step 14800/49325 loss 0.014735193405019791 train acc 0.9969819819819832


 30%|█████████████████████▉                                                  | 15000/49325 [1:29:56<3:24:42,  2.79it/s]

epoch 1, step 15000/49325 loss 0.014644117757448435 train acc 0.9970111111111123


 31%|██████████████████████▏                                                 | 15200/49325 [1:31:08<3:24:44,  2.78it/s]

epoch 1, step 15200/49325 loss 0.014451709380988652 train acc 0.9970504385964923


 31%|██████████████████████▍                                                 | 15400/49325 [1:32:19<3:22:32,  2.79it/s]

epoch 1, step 15400/49325 loss 0.014339010768425017 train acc 0.9970779220779232


 32%|██████████████████████▊                                                 | 15600/49325 [1:33:31<3:22:37,  2.77it/s]

epoch 1, step 15600/49325 loss 0.014155456052406862 train acc 0.9971153846153857


 32%|███████████████████████                                                 | 15800/49325 [1:34:43<3:19:38,  2.80it/s]

epoch 1, step 15800/49325 loss 0.01404797312155824 train acc 0.9971413502109716


 32%|███████████████████████▎                                                | 16000/49325 [1:35:55<3:20:46,  2.77it/s]

epoch 1, step 16000/49325 loss 0.013872640885253178 train acc 0.9971770833333345


 33%|███████████████████████▋                                                | 16200/49325 [1:37:06<3:17:26,  2.80it/s]

epoch 1, step 16200/49325 loss 0.013701585600376442 train acc 0.9972119341563798


 33%|███████████████████████▉                                                | 16400/49325 [1:38:18<3:18:35,  2.76it/s]

epoch 1, step 16400/49325 loss 0.013534616639645102 train acc 0.9972459349593508


 34%|████████████████████████▏                                               | 16600/49325 [1:39:30<3:16:39,  2.77it/s]

epoch 1, step 16600/49325 loss 0.013490336098813414 train acc 0.9972489959839368


 34%|████████████████████████▌                                               | 16800/49325 [1:40:42<3:15:19,  2.78it/s]

epoch 1, step 16800/49325 loss 0.013377496202990573 train acc 0.9972718253968264


 34%|████████████████████████▊                                               | 17000/49325 [1:41:53<3:12:55,  2.79it/s]

epoch 1, step 17000/49325 loss 0.01340222145954863 train acc 0.997284313725491


 35%|█████████████████████████                                               | 17200/49325 [1:43:05<3:09:06,  2.83it/s]

epoch 1, step 17200/49325 loss 0.013248017711361268 train acc 0.997315891472869


 35%|█████████████████████████▍                                              | 17400/49325 [1:44:17<3:10:23,  2.79it/s]

epoch 1, step 17400/49325 loss 0.01309603596535143 train acc 0.9973467432950199


 36%|█████████████████████████▋                                              | 17600/49325 [1:45:29<3:09:03,  2.80it/s]

epoch 1, step 17600/49325 loss 0.01295130483243105 train acc 0.9973768939393947


 36%|█████████████████████████▉                                              | 17800/49325 [1:46:40<3:06:52,  2.81it/s]

epoch 1, step 17800/49325 loss 0.012805901255289262 train acc 0.9974063670411992


 36%|██████████████████████████▎                                             | 18000/49325 [1:47:52<3:07:32,  2.78it/s]

epoch 1, step 18000/49325 loss 0.012748636892990875 train acc 0.9974259259259266


 37%|██████████████████████████▌                                             | 18200/49325 [1:49:04<3:05:40,  2.79it/s]

epoch 1, step 18200/49325 loss 0.012608792209568504 train acc 0.9974542124542132


 37%|██████████████████████████▊                                             | 18400/49325 [1:50:16<3:04:50,  2.79it/s]

epoch 1, step 18400/49325 loss 0.012530587486256937 train acc 0.9974728260869571


 38%|███████████████████████████▏                                            | 18600/49325 [1:51:28<3:03:11,  2.80it/s]

epoch 1, step 18600/49325 loss 0.012395959118376498 train acc 0.9975000000000006


 38%|███████████████████████████▍                                            | 18800/49325 [1:52:40<3:06:12,  2.73it/s]

epoch 1, step 18800/49325 loss 0.012264205615404616 train acc 0.9975265957446814


 39%|███████████████████████████▋                                            | 19000/49325 [1:53:52<3:00:09,  2.81it/s]

epoch 1, step 19000/49325 loss 0.01213521661926954 train acc 0.9975526315789479


 39%|████████████████████████████                                            | 19200/49325 [1:55:03<3:00:45,  2.78it/s]

epoch 1, step 19200/49325 loss 0.012008859118478333 train acc 0.9975781250000005


 39%|████████████████████████████▎                                           | 19400/49325 [1:56:15<3:01:26,  2.75it/s]

epoch 1, step 19400/49325 loss 0.01197232252101704 train acc 0.9975945017182135


 40%|████████████████████████████▌                                           | 19600/49325 [1:57:27<2:59:44,  2.76it/s]

epoch 1, step 19600/49325 loss 0.01185030306099606 train acc 0.9976190476190481


 40%|████████████████████████████▉                                           | 19800/49325 [1:58:39<2:55:35,  2.80it/s]

epoch 1, step 19800/49325 loss 0.011806789292538108 train acc 0.9976346801346806


 41%|█████████████████████████████▏                                          | 20000/49325 [1:59:51<2:55:10,  2.79it/s]

epoch 1, step 20000/49325 loss 0.011688868186815384 train acc 0.9976583333333338


 41%|█████████████████████████████▍                                          | 20200/49325 [2:01:03<2:53:28,  2.80it/s]

epoch 1, step 20200/49325 loss 0.01163654374246957 train acc 0.997673267326733


 41%|█████████████████████████████▊                                          | 20400/49325 [2:02:15<2:53:19,  2.78it/s]

epoch 1, step 20400/49325 loss 0.011523338312374762 train acc 0.9976960784313729


 42%|██████████████████████████████                                          | 20600/49325 [2:03:26<2:54:46,  2.74it/s]

epoch 1, step 20600/49325 loss 0.011412637268705098 train acc 0.9977184466019421


 42%|██████████████████████████████▎                                         | 20800/49325 [2:04:39<2:48:03,  2.83it/s]

epoch 1, step 20800/49325 loss 0.011305228117129008 train acc 0.997740384615385


 43%|██████████████████████████████▋                                         | 21000/49325 [2:05:51<2:48:07,  2.81it/s]

epoch 1, step 21000/49325 loss 0.011197625106199103 train acc 0.9977619047619051


 43%|██████████████████████████████▉                                         | 21200/49325 [2:07:04<2:53:50,  2.70it/s]

epoch 1, step 21200/49325 loss 0.011188616221684723 train acc 0.9977751572327047


 43%|███████████████████████████████▏                                        | 21400/49325 [2:08:16<2:45:07,  2.82it/s]

epoch 1, step 21400/49325 loss 0.01108415881603019 train acc 0.9977959501557635


 44%|███████████████████████████████▌                                        | 21600/49325 [2:09:28<3:00:35,  2.56it/s]

epoch 1, step 21600/49325 loss 0.011095000306422718 train acc 0.9978009259259261


 44%|███████████████████████████████▊                                        | 21800/49325 [2:10:41<2:46:48,  2.75it/s]

epoch 1, step 21800/49325 loss 0.011129283474086794 train acc 0.9978058103975536


 45%|████████████████████████████████                                        | 22000/49325 [2:11:53<2:40:46,  2.83it/s]

epoch 1, step 22000/49325 loss 0.011028694940668203 train acc 0.9978257575757576


 45%|████████████████████████████████▍                                       | 22200/49325 [2:13:03<2:41:24,  2.80it/s]

epoch 1, step 22200/49325 loss 0.010929652630929083 train acc 0.9978453453453454


 45%|████████████████████████████████▋                                       | 22400/49325 [2:14:14<2:37:54,  2.84it/s]

epoch 1, step 22400/49325 loss 0.010832321656757921 train acc 0.9978645833333334


 46%|████████████████████████████████▉                                       | 22600/49325 [2:15:25<2:38:02,  2.82it/s]

epoch 1, step 22600/49325 loss 0.010736642695190316 train acc 0.9978834808259588


 46%|█████████████████████████████████▎                                      | 22800/49325 [2:16:36<2:35:37,  2.84it/s]

epoch 1, step 22800/49325 loss 0.010642593075329143 train acc 0.9979020467836258


 47%|█████████████████████████████████▌                                      | 23000/49325 [2:17:47<2:38:37,  2.77it/s]

epoch 1, step 23000/49325 loss 0.010550193044220049 train acc 0.9979202898550725


 47%|█████████████████████████████████▊                                      | 23200/49325 [2:18:57<2:31:39,  2.87it/s]

epoch 1, step 23200/49325 loss 0.01045957001321057 train acc 0.9979382183908047


 47%|██████████████████████████████████▏                                     | 23400/49325 [2:20:08<2:35:17,  2.78it/s]

epoch 1, step 23400/49325 loss 0.010432359806175654 train acc 0.997948717948718


 48%|██████████████████████████████████▍                                     | 23600/49325 [2:21:19<2:33:11,  2.80it/s]

epoch 1, step 23600/49325 loss 0.010344539808928868 train acc 0.9979661016949153


 48%|██████████████████████████████████▋                                     | 23800/49325 [2:22:30<2:29:15,  2.85it/s]

epoch 1, step 23800/49325 loss 0.010278750749373717 train acc 0.9979761904761905


 49%|███████████████████████████████████                                     | 24000/49325 [2:23:41<2:30:49,  2.80it/s]

epoch 1, step 24000/49325 loss 0.010193203751984502 train acc 0.9979930555555555


 49%|███████████████████████████████████▎                                    | 24200/49325 [2:24:52<2:27:57,  2.83it/s]

epoch 1, step 24200/49325 loss 0.010109067872038543 train acc 0.9980096418732782


 49%|███████████████████████████████████▌                                    | 24400/49325 [2:26:03<2:27:04,  2.82it/s]

epoch 1, step 24400/49325 loss 0.010100253282464668 train acc 0.9980191256830601


 50%|███████████████████████████████████▉                                    | 24600/49325 [2:27:15<2:25:54,  2.82it/s]

epoch 1, step 24600/49325 loss 0.010018246330378944 train acc 0.9980352303523035


 50%|████████████████████████████████████▏                                   | 24800/49325 [2:28:26<2:25:19,  2.81it/s]

epoch 1, step 24800/49325 loss 0.009947351474268723 train acc 0.9980443548387096


 51%|████████████████████████████████████▍                                   | 25000/49325 [2:29:37<2:23:43,  2.82it/s]

epoch 1, step 25000/49325 loss 0.009868393385967632 train acc 0.9980599999999998


 51%|████████████████████████████████████▊                                   | 25200/49325 [2:30:48<2:24:19,  2.79it/s]

epoch 1, step 25200/49325 loss 0.00992929678040968 train acc 0.9980555555555553


 51%|█████████████████████████████████████                                   | 25400/49325 [2:31:59<2:19:46,  2.85it/s]

epoch 1, step 25400/49325 loss 0.009970331085940087 train acc 0.9980577427821519


 52%|█████████████████████████████████████▎                                  | 25600/49325 [2:33:10<2:20:45,  2.81it/s]

epoch 1, step 25600/49325 loss 0.009892532571422315 train acc 0.9980729166666663


 52%|█████████████████████████████████████▋                                  | 25800/49325 [2:34:21<2:18:28,  2.83it/s]

epoch 1, step 25800/49325 loss 0.00981593249630706 train acc 0.9980878552971573


 53%|█████████████████████████████████████▉                                  | 26000/49325 [2:35:32<2:19:13,  2.79it/s]

epoch 1, step 26000/49325 loss 0.009741055961292851 train acc 0.9981025641025637


 53%|██████████████████████████████████████▏                                 | 26200/49325 [2:36:43<2:17:53,  2.80it/s]

epoch 1, step 26200/49325 loss 0.009666788889411087 train acc 0.9981170483460556


 54%|██████████████████████████████████████▌                                 | 26400/49325 [2:37:55<2:16:19,  2.80it/s]

epoch 1, step 26400/49325 loss 0.009593716806214016 train acc 0.9981313131313128


 54%|██████████████████████████████████████▊                                 | 26600/49325 [2:39:06<2:14:41,  2.81it/s]

epoch 1, step 26600/49325 loss 0.009579501395005805 train acc 0.9981390977443605


 54%|███████████████████████████████████████                                 | 26800/49325 [2:40:17<2:12:00,  2.84it/s]

epoch 1, step 26800/49325 loss 0.009508149824070757 train acc 0.9981529850746265


 55%|███████████████████████████████████████▍                                | 27000/49325 [2:41:28<2:12:54,  2.80it/s]

epoch 1, step 27000/49325 loss 0.009437811237640384 train acc 0.9981666666666663


 55%|███████████████████████████████████████▋                                | 27200/49325 [2:42:39<2:10:33,  2.82it/s]

epoch 1, step 27200/49325 loss 0.009373327491817799 train acc 0.9981740196078427


 56%|███████████████████████████████████████▉                                | 27400/49325 [2:43:50<2:09:20,  2.83it/s]

epoch 1, step 27400/49325 loss 0.009352870361522363 train acc 0.9981812652068122


 56%|████████████████████████████████████████▎                               | 27600/49325 [2:45:01<2:10:11,  2.78it/s]

epoch 1, step 27600/49325 loss 0.009286473253706197 train acc 0.9981944444444439


 56%|████████████████████████████████████████▌                               | 27800/49325 [2:46:13<2:07:25,  2.82it/s]

epoch 1, step 27800/49325 loss 0.009273157413971682 train acc 0.9982014388489203


 57%|████████████████████████████████████████▊                               | 28000/49325 [2:47:24<2:06:25,  2.81it/s]

epoch 1, step 28000/49325 loss 0.009219024367683177 train acc 0.9982083333333328


 57%|█████████████████████████████████████████▏                              | 28200/49325 [2:48:35<2:04:47,  2.82it/s]

epoch 1, step 28200/49325 loss 0.0091537476016322 train acc 0.9982210401891247


 58%|█████████████████████████████████████████▍                              | 28400/49325 [2:49:46<2:04:01,  2.81it/s]

epoch 1, step 28400/49325 loss 0.009089410992658618 train acc 0.9982335680751168


 58%|█████████████████████████████████████████▋                              | 28600/49325 [2:50:57<2:01:42,  2.84it/s]

epoch 1, step 28600/49325 loss 0.009025953555858312 train acc 0.9982459207459202


 58%|██████████████████████████████████████████                              | 28800/49325 [2:52:08<2:00:04,  2.85it/s]

epoch 1, step 28800/49325 loss 0.009001350466774218 train acc 0.9982523148148142


 59%|██████████████████████████████████████████▎                             | 29000/49325 [2:53:19<1:59:12,  2.84it/s]

epoch 1, step 29000/49325 loss 0.008939375053439516 train acc 0.9982643678160914


 59%|██████████████████████████████████████████▌                             | 29200/49325 [2:54:30<1:58:07,  2.84it/s]

epoch 1, step 29200/49325 loss 0.008885706944336264 train acc 0.9982705479452049


 60%|██████████████████████████████████████████▉                             | 29400/49325 [2:55:41<1:59:38,  2.78it/s]

epoch 1, step 29400/49325 loss 0.008855446863770584 train acc 0.9982766439909291


 60%|███████████████████████████████████████████▏                            | 29600/49325 [2:56:53<1:57:21,  2.80it/s]

epoch 1, step 29600/49325 loss 0.008795696523080187 train acc 0.9982882882882876


 60%|███████████████████████████████████████████▍                            | 29800/49325 [2:58:04<1:56:17,  2.80it/s]

epoch 1, step 29800/49325 loss 0.008748958303155812 train acc 0.9982941834451895


 61%|███████████████████████████████████████████▊                            | 30000/49325 [2:59:15<1:54:32,  2.81it/s]

epoch 1, step 30000/49325 loss 0.00874655251596417 train acc 0.9982999999999993


 61%|████████████████████████████████████████████                            | 30200/49325 [3:00:26<1:52:49,  2.83it/s]

epoch 1, step 30200/49325 loss 0.008688742791942667 train acc 0.9983112582781449


 62%|████████████████████████████████████████████▍                           | 30400/49325 [3:01:37<1:52:09,  2.81it/s]

epoch 1, step 30400/49325 loss 0.008668930209357195 train acc 0.9983168859649115


 62%|████████████████████████████████████████████▋                           | 30600/49325 [3:02:48<1:51:51,  2.79it/s]

epoch 1, step 30600/49325 loss 0.008652108624540498 train acc 0.9983224400871452


 62%|████████████████████████████████████████████▉                           | 30800/49325 [3:03:59<1:51:47,  2.76it/s]

epoch 1, step 30800/49325 loss 0.008596012929368272 train acc 0.9983333333333325


 63%|█████████████████████████████████████████████▎                          | 31000/49325 [3:05:11<1:47:37,  2.84it/s]

epoch 1, step 31000/49325 loss 0.008556751284485707 train acc 0.9983387096774186


 63%|█████████████████████████████████████████████▌                          | 31200/49325 [3:06:22<1:47:13,  2.82it/s]

epoch 1, step 31200/49325 loss 0.008502004467781836 train acc 0.9983493589743582


 64%|█████████████████████████████████████████████▊                          | 31400/49325 [3:07:33<1:45:44,  2.83it/s]

epoch 1, step 31400/49325 loss 0.008447943275025262 train acc 0.9983598726114642


 64%|██████████████████████████████████████████████▏                         | 31600/49325 [3:08:47<1:45:03,  2.81it/s]

epoch 1, step 31600/49325 loss 0.008394570975048962 train acc 0.9983702531645562


 64%|██████████████████████████████████████████████▍                         | 31800/49325 [3:09:58<1:43:41,  2.82it/s]

epoch 1, step 31800/49325 loss 0.008341849264813461 train acc 0.9983805031446533


 65%|██████████████████████████████████████████████▋                         | 32000/49325 [3:11:09<1:41:51,  2.83it/s]

epoch 1, step 32000/49325 loss 0.008307843801861428 train acc 0.9983854166666658


 65%|███████████████████████████████████████████████                         | 32200/49325 [3:12:20<1:41:21,  2.82it/s]

epoch 1, step 32200/49325 loss 0.008256304567597102 train acc 0.9983954451345748


 66%|███████████████████████████████████████████████▎                        | 32400/49325 [3:13:31<1:40:18,  2.81it/s]

epoch 1, step 32400/49325 loss 0.0082054059827478 train acc 0.9984053497942379


 66%|███████████████████████████████████████████████▌                        | 32600/49325 [3:14:43<1:38:41,  2.82it/s]

epoch 1, step 32600/49325 loss 0.00815516093830159 train acc 0.9984151329243346


 66%|███████████████████████████████████████████████▉                        | 32800/49325 [3:15:54<1:38:39,  2.79it/s]

epoch 1, step 32800/49325 loss 0.008105522392245507 train acc 0.9984247967479667


 67%|████████████████████████████████████████████████▏                       | 33000/49325 [3:17:05<1:35:52,  2.84it/s]

epoch 1, step 33000/49325 loss 0.008069400550280064 train acc 0.9984292929292922


 67%|████████████████████████████████████████████████▍                       | 33200/49325 [3:18:16<1:35:47,  2.81it/s]

epoch 1, step 33200/49325 loss 0.008020860243466406 train acc 0.9984387550200796


 68%|████████████████████████████████████████████████▊                       | 33400/49325 [3:19:27<1:34:22,  2.81it/s]

epoch 1, step 33400/49325 loss 0.007972905027566222 train acc 0.9984481037924144


 68%|█████████████████████████████████████████████████                       | 33600/49325 [3:20:38<1:32:50,  2.82it/s]

epoch 1, step 33600/49325 loss 0.007978006686018795 train acc 0.9984523809523803


 69%|█████████████████████████████████████████████████▎                      | 33800/49325 [3:21:49<1:32:37,  2.79it/s]

epoch 1, step 33800/49325 loss 0.007930876509839936 train acc 0.9984615384615378


 69%|█████████████████████████████████████████████████▋                      | 34000/49325 [3:23:00<1:31:15,  2.80it/s]

epoch 1, step 34000/49325 loss 0.007884313837862424 train acc 0.9984705882352934


 69%|█████████████████████████████████████████████████▉                      | 34200/49325 [3:24:12<1:29:43,  2.81it/s]

epoch 1, step 34200/49325 loss 0.007838304323855606 train acc 0.9984795321637421


 70%|██████████████████████████████████████████████████▏                     | 34400/49325 [3:25:23<1:28:18,  2.82it/s]

epoch 1, step 34400/49325 loss 0.007792796998122695 train acc 0.9984883720930227


 70%|██████████████████████████████████████████████████▌                     | 34600/49325 [3:26:34<1:27:11,  2.81it/s]

epoch 1, step 34600/49325 loss 0.007747814205828346 train acc 0.998497109826589


 71%|██████████████████████████████████████████████████▊                     | 34800/49325 [3:27:45<1:26:11,  2.81it/s]

epoch 1, step 34800/49325 loss 0.00770336900305953 train acc 0.9985057471264361


 71%|███████████████████████████████████████████████████                     | 35000/49325 [3:28:56<1:25:32,  2.79it/s]

epoch 1, step 35000/49325 loss 0.007671743328500913 train acc 0.9985095238095233


 71%|███████████████████████████████████████████████████▍                    | 35200/49325 [3:30:07<1:24:11,  2.80it/s]

epoch 1, step 35200/49325 loss 0.00762826475124426 train acc 0.9985179924242419


 72%|███████████████████████████████████████████████████▋                    | 35400/49325 [3:31:19<1:22:43,  2.81it/s]

epoch 1, step 35400/49325 loss 0.007585461969641143 train acc 0.9985263653483987


 72%|███████████████████████████████████████████████████▉                    | 35600/49325 [3:32:30<1:20:28,  2.84it/s]

epoch 1, step 35600/49325 loss 0.007542918989452337 train acc 0.998534644194756


 73%|████████████████████████████████████████████████████▎                   | 35800/49325 [3:33:41<1:19:49,  2.82it/s]

epoch 1, step 35800/49325 loss 0.007500865449949208 train acc 0.9985428305400367


 73%|████████████████████████████████████████████████████▌                   | 36000/49325 [3:34:52<1:18:32,  2.83it/s]

epoch 1, step 36000/49325 loss 0.007459255967217884 train acc 0.9985509259259254


 73%|████████████████████████████████████████████████████▊                   | 36200/49325 [3:36:03<1:17:52,  2.81it/s]

epoch 1, step 36200/49325 loss 0.0074181043165330455 train acc 0.9985589318600363


 74%|█████████████████████████████████████████████████████▏                  | 36400/49325 [3:37:14<1:16:29,  2.82it/s]

epoch 1, step 36400/49325 loss 0.007377435499747272 train acc 0.9985668498168493


 74%|█████████████████████████████████████████████████████▍                  | 36600/49325 [3:38:25<1:15:49,  2.80it/s]

epoch 1, step 36600/49325 loss 0.007337202730227296 train acc 0.9985746812386151


 75%|█████████████████████████████████████████████████████▋                  | 36800/49325 [3:39:37<1:14:48,  2.79it/s]

epoch 1, step 36800/49325 loss 0.007297580543540085 train acc 0.9985824275362314


 75%|██████████████████████████████████████████████████████                  | 37000/49325 [3:40:48<1:13:42,  2.79it/s]

epoch 1, step 37000/49325 loss 0.0072582214014017266 train acc 0.9985900900900896


 75%|██████████████████████████████████████████████████████▎                 | 37200/49325 [3:41:59<1:11:08,  2.84it/s]

epoch 1, step 37200/49325 loss 0.007219259611915988 train acc 0.9985976702508955


 76%|██████████████████████████████████████████████████████▌                 | 37400/49325 [3:43:10<1:10:52,  2.80it/s]

epoch 1, step 37400/49325 loss 0.007181006945639631 train acc 0.998605169340463


 76%|██████████████████████████████████████████████████████▉                 | 37600/49325 [3:44:22<1:09:42,  2.80it/s]

epoch 1, step 37600/49325 loss 0.007153217214781953 train acc 0.9986081560283684


 77%|███████████████████████████████████████████████████████▏                | 37800/49325 [3:45:33<1:07:13,  2.86it/s]

epoch 1, step 37800/49325 loss 0.007115438172207489 train acc 0.9986155202821865


 77%|███████████████████████████████████████████████████████▍                | 38000/49325 [3:46:44<1:06:38,  2.83it/s]

epoch 1, step 38000/49325 loss 0.007129371450903728 train acc 0.9986184210526312


 77%|███████████████████████████████████████████████████████▊                | 38200/49325 [3:47:55<1:06:12,  2.80it/s]

epoch 1, step 38200/49325 loss 0.007092110137584923 train acc 0.9986256544502614


 78%|████████████████████████████████████████████████████████                | 38400/49325 [3:49:06<1:05:47,  2.77it/s]

epoch 1, step 38400/49325 loss 0.007055245595256247 train acc 0.9986328124999996


 78%|████████████████████████████████████████████████████████▎               | 38600/49325 [3:50:17<1:02:24,  2.86it/s]

epoch 1, step 38600/49325 loss 0.007019069783018691 train acc 0.9986398963730566


 79%|████████████████████████████████████████████████████████▋               | 38800/49325 [3:51:28<1:02:58,  2.79it/s]

epoch 1, step 38800/49325 loss 0.006982962510361854 train acc 0.9986469072164945


 79%|████████████████████████████████████████████████████████▉               | 39000/49325 [3:52:39<1:00:54,  2.83it/s]

epoch 1, step 39000/49325 loss 0.006953512178747988 train acc 0.9986495726495723


 79%|█████████████████████████████████████████████████████████▏              | 39200/49325 [3:53:50<1:00:02,  2.81it/s]

epoch 1, step 39200/49325 loss 0.006918092931474968 train acc 0.9986564625850337


 80%|███████████████████████████████████████████████████████████               | 39400/49325 [3:55:01<58:29,  2.83it/s]

epoch 1, step 39400/49325 loss 0.006883040100516393 train acc 0.9986632825719117


 80%|███████████████████████████████████████████████████████████▍              | 39600/49325 [3:56:13<58:32,  2.77it/s]

epoch 1, step 39600/49325 loss 0.00684841727108383 train acc 0.9986700336700334


 81%|███████████████████████████████████████████████████████████▋              | 39800/49325 [3:57:24<56:18,  2.82it/s]

epoch 1, step 39800/49325 loss 0.006814071757793265 train acc 0.9986767169179226


 81%|████████████████████████████████████████████████████████████              | 40000/49325 [3:58:35<55:21,  2.81it/s]

epoch 1, step 40000/49325 loss 0.006780150310121355 train acc 0.998683333333333


 82%|████████████████████████████████████████████████████████████▎             | 40200/49325 [3:59:46<54:41,  2.78it/s]

epoch 1, step 40200/49325 loss 0.006773477409207268 train acc 0.9986857379767825


 82%|████████████████████████████████████████████████████████████▌             | 40400/49325 [4:00:57<52:53,  2.81it/s]

epoch 1, step 40400/49325 loss 0.006788767193383251 train acc 0.998688118811881


 82%|████████████████████████████████████████████████████████████▉             | 40600/49325 [4:02:09<52:01,  2.80it/s]

epoch 1, step 40600/49325 loss 0.006755379622936692 train acc 0.998694581280788


 83%|█████████████████████████████████████████████████████████████▏            | 40800/49325 [4:03:20<50:29,  2.81it/s]

epoch 1, step 40800/49325 loss 0.006768470912003089 train acc 0.9986968954248365


 83%|█████████████████████████████████████████████████████████████▌            | 41000/49325 [4:04:31<49:15,  2.82it/s]

epoch 1, step 41000/49325 loss 0.0067355275143962815 train acc 0.9987032520325202


 84%|█████████████████████████████████████████████████████████████▊            | 41200/49325 [4:05:42<48:10,  2.81it/s]

epoch 1, step 41200/49325 loss 0.0067028947302067736 train acc 0.9987095469255662


 84%|██████████████████████████████████████████████████████████████            | 41400/49325 [4:06:55<47:31,  2.78it/s]

epoch 1, step 41400/49325 loss 0.006670571898963705 train acc 0.9987157809983895


 84%|██████████████████████████████████████████████████████████████▍           | 41600/49325 [4:08:07<47:54,  2.69it/s]

epoch 1, step 41600/49325 loss 0.006646207666927324 train acc 0.9987179487179486


 85%|██████████████████████████████████████████████████████████████▋           | 41800/49325 [4:09:20<44:13,  2.84it/s]

epoch 1, step 41800/49325 loss 0.006639195813645941 train acc 0.99872009569378


 85%|███████████████████████████████████████████████████████████████           | 42000/49325 [4:10:32<45:30,  2.68it/s]

epoch 1, step 42000/49325 loss 0.006607653773166241 train acc 0.9987261904761905


 86%|███████████████████████████████████████████████████████████████▎          | 42200/49325 [4:11:44<41:56,  2.83it/s]

epoch 1, step 42200/49325 loss 0.006576397149253662 train acc 0.9987322274881517


 86%|███████████████████████████████████████████████████████████████▌          | 42400/49325 [4:12:56<40:57,  2.82it/s]

epoch 1, step 42400/49325 loss 0.00655100582571627 train acc 0.9987342767295598


 86%|███████████████████████████████████████████████████████████████▉          | 42600/49325 [4:14:10<40:05,  2.80it/s]

epoch 1, step 42600/49325 loss 0.006520319311631296 train acc 0.9987402190923318


 87%|████████████████████████████████████████████████████████████████▏         | 42800/49325 [4:15:22<38:48,  2.80it/s]

epoch 1, step 42800/49325 loss 0.006489926280744435 train acc 0.9987461059190031


 87%|████████████████████████████████████████████████████████████████▌         | 43000/49325 [4:16:34<37:27,  2.81it/s]

epoch 1, step 43000/49325 loss 0.006459819488429458 train acc 0.9987519379844961


 88%|████████████████████████████████████████████████████████████████▊         | 43200/49325 [4:17:46<36:16,  2.81it/s]

epoch 1, step 43200/49325 loss 0.0064299743811836885 train acc 0.9987577160493828


 88%|█████████████████████████████████████████████████████████████████         | 43400/49325 [4:18:59<36:40,  2.69it/s]

epoch 1, step 43400/49325 loss 0.006403923085118389 train acc 0.9987596006144395


 88%|█████████████████████████████████████████████████████████████████▍        | 43600/49325 [4:20:11<33:43,  2.83it/s]

epoch 1, step 43600/49325 loss 0.0063760573107866 train acc 0.9987652905198778


 89%|█████████████████████████████████████████████████████████████████▋        | 43800/49325 [4:21:23<32:59,  2.79it/s]

epoch 1, step 43800/49325 loss 0.006347233356828453 train acc 0.9987709284627094


 89%|██████████████████████████████████████████████████████████████████        | 44000/49325 [4:22:34<32:39,  2.72it/s]

epoch 1, step 44000/49325 loss 0.006318455811932941 train acc 0.9987765151515152


 90%|██████████████████████████████████████████████████████████████████▎       | 44200/49325 [4:23:46<30:47,  2.77it/s]

epoch 1, step 44200/49325 loss 0.006289922965600014 train acc 0.9987820512820514


 90%|██████████████████████████████████████████████████████████████████▌       | 44400/49325 [4:24:58<28:51,  2.84it/s]

epoch 1, step 44400/49325 loss 0.006305305413937089 train acc 0.998783783783784


 90%|██████████████████████████████████████████████████████████████████▉       | 44600/49325 [4:26:08<27:44,  2.84it/s]

epoch 1, step 44600/49325 loss 0.006306487830650198 train acc 0.9987855007473844


 91%|███████████████████████████████████████████████████████████████████▏      | 44800/49325 [4:27:20<26:46,  2.82it/s]

epoch 1, step 44800/49325 loss 0.0062784538893259885 train acc 0.9987909226190478


 91%|███████████████████████████████████████████████████████████████████▌      | 45000/49325 [4:28:32<26:52,  2.68it/s]

epoch 1, step 45000/49325 loss 0.006274465464842906 train acc 0.9987925925925929


 92%|███████████████████████████████████████████████████████████████████▊      | 45200/49325 [4:29:44<24:51,  2.77it/s]

epoch 1, step 45200/49325 loss 0.0062711023744308645 train acc 0.998794247787611


 92%|████████████████████████████████████████████████████████████████████      | 45400/49325 [4:30:56<23:14,  2.81it/s]

epoch 1, step 45400/49325 loss 0.006265700971185839 train acc 0.998795888399413


 92%|████████████████████████████████████████████████████████████████████▍     | 45600/49325 [4:32:08<22:08,  2.80it/s]

epoch 1, step 45600/49325 loss 0.006260841563210373 train acc 0.9987975146198834


 93%|████████████████████████████████████████████████████████████████████▋     | 45800/49325 [4:33:19<21:03,  2.79it/s]

epoch 1, step 45800/49325 loss 0.00623356542986381 train acc 0.9988027656477443


 93%|█████████████████████████████████████████████████████████████████████     | 46000/49325 [4:34:33<20:34,  2.69it/s]

epoch 1, step 46000/49325 loss 0.006206735785229702 train acc 0.9988079710144931


 94%|█████████████████████████████████████████████████████████████████████▎    | 46200/49325 [4:35:45<18:21,  2.84it/s]

epoch 1, step 46200/49325 loss 0.006183499378300806 train acc 0.9988095238095243


 94%|█████████████████████████████████████████████████████████████████████▌    | 46400/49325 [4:36:57<17:29,  2.79it/s]

epoch 1, step 46400/49325 loss 0.006198104733584303 train acc 0.9988110632183913


 94%|█████████████████████████████████████████████████████████████████████▊    | 46573/49325 [4:37:59<16:15,  2.82it/s]

# Evaluation

## Define Classes for Metrics

In [21]:
def get_eval_report(labels, preds):
    f1 = f1_score(labels, preds)
    prc = get_precision(labels, preds)
    tn, fp, fn, tp = confusion_matrix(labels, preds).ravel()
    return {
        'f1': f1,
        'prc': prc,
        'tp': tp,
        'tn': tn,
        'fp': fp,
        'fn': fn
    }

In [22]:
def compute_metrics(labels, preds):
    assert len(preds) == len(labels)
    return get_eval_report(labels, preds)

In [23]:
def get_precision(labels, preds):
    assert len(preds) == len(labels)
    return np.sum(labels == preds) / len(labels)

## Load Model and Weights

In [0]:
# ##### ***** IMPORTANT ***** #####
# ######## SET MODEL NAME #########
# MODEL_NAME = 'kobert_liner'

# # Weights name to load: 'xxxx.bin' 
# # xxxx: floating part of the val loss
# WEIGHTS_NAME = '6790.bin'
# CONFIG_NAME = 'config.json'

# OUTPUTS_DIR = os.path.join(TASK_DIR, f'outputs/{MODEL_NAME}')
# REPORTS_DIR = os.path.join(TASK_DIR, f'reports/{MODEL_NAME}/')

# # The directory where BERT will look for pre-trained models to load parameters from.
# CACHE_DIR = os.path.join(TASK_DIR, f'cache/{MODEL_NAME}')

In [0]:
# model = MODELS[MODEL_NAME]
# model.load_state_dict(torch.load(os.path.join(OUTPUTS_DIR, WEIGHTS_NAME)))

In [29]:
# model.to(device)

# inference public_test

In [53]:
# WEIGHTS_NAME = '/epoch_1_step_49325'
# OUTPUTS_DIR =''
MODEL_NAME = 'kobert_linear'
model = MODELS[MODEL_NAME]
model.load_state_dict(torch.load('./output/epoch_1_step_49325.bin'))

<All keys matched successfully>

In [54]:
for step, (input_ids, labels_ids) in enumerate(test_dataloader):
    print(step)
    print(labels_ids)
    break

0
tensor([2.])


In [55]:
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)
loss_fn = CrossEntropyLoss()

model.eval()
eval_loss = 0
preds = []
TOTAL_STEPS = len(test_dataloader)
for step, (input_ids, label_ids) in enumerate(test_dataloader):
    input_ids = input_ids.to(device)
    label_ids = label_ids.to(device)

    with torch.no_grad():
        logits, _ = model(input_ids, torch.zeros_like(input_ids).to(device), (input_ids > 0).to(torch.long).to(device))
        
#         # create eval loss and other metric required by the task  
#         tmp_eval_loss = loss_fn(logits.view(-1, num_labels), label_ids.view(-1))
#         eval_loss += tmp_eval_loss.mean().item()

        if len(preds) == 0:
            preds.append(logits.detach().cpu().numpy())
        else:
            preds[0] = np.append(preds[0], logits.detach().cpu().numpy(), axis=0)

        if (step + 1) % 100 == 0:
            print('STEPS: {}/{}, Loss: {:.4f}'.format(step, TOTAL_STEPS, eval_loss / step))

# eval_loss = eval_loss / nb_eval_steps
preds = preds[0]
preds = np.argmax(preds, axis=1)

STEPS: 99/1626, Loss: 0.0000
STEPS: 199/1626, Loss: 0.0000
STEPS: 299/1626, Loss: 0.0000
STEPS: 399/1626, Loss: 0.0000
STEPS: 499/1626, Loss: 0.0000
STEPS: 599/1626, Loss: 0.0000
STEPS: 699/1626, Loss: 0.0000
STEPS: 799/1626, Loss: 0.0000
STEPS: 899/1626, Loss: 0.0000
STEPS: 999/1626, Loss: 0.0000
STEPS: 1099/1626, Loss: 0.0000
STEPS: 1199/1626, Loss: 0.0000
STEPS: 1299/1626, Loss: 0.0000
STEPS: 1399/1626, Loss: 0.0000
STEPS: 1499/1626, Loss: 0.0000
STEPS: 1599/1626, Loss: 0.0000


In [57]:
import pandas as pd
sub = pd.read_csv('./public_test.csv')
sub.index = sub['id']
sub = sub.drop(['id', 'year_month', 'text'],axis=1)
sub['smishing'] = preds
sub['smishing'] = sub.apply(lambda x : 1 if x['smishing'] >=0.5 else 0, axis=1)
print(len(sub[sub['smishing']==1]))
sub_lst = sub[sub['smishing']==1].index.values
# 채점 
moai = pd.read_csv('./submission_2.csv')
moai['smishing'] = moai.apply(lambda x : 1 if x['smishing'] >=0.5 else 0, axis=1)
mo_lst = moai[moai['smishing']==1]['id'].values
correct = 0
wrong = 0
for i in sub_lst:
    if i in mo_lst:
        correct+=1
    else:
        wrong+=1
        
print('correct',correct)
print('wrong', wrong)

62
correct 44
wrong 18


## Evaluate Model

In [0]:
dev_dataloader = torch.utils.data.DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=False)
loss_fn = CrossEntropyLoss()

model.eval()
eval_loss = 0
preds = []
TOTAL_STEPS = len(dev_dataloader)
for step, (input_ids, labels_ids) in enumerate(dev_dataloader):
    input_ids = input_ids.to(device)
    label_ids = label_ids.to(device)

    with torch.no_grad():
        logits, _ = model(input_ids, torch.zeros_like(input_ids).to(device), (input_ids > 0).to(torch.long).to(device))

        # create eval loss and other metric required by the task  
        tmp_eval_loss = loss_fn(logits.view(-1, num_labels), label_ids.view(-1))

        eval_loss += tmp_eval_loss.mean().item()
        if len(preds) == 0:
            preds.append(logits.detach().cpu().numpy())
        else:
            preds[0] = np.append(preds[0], logits.detach().cpu().numpy(), axis=0)

        if (step + 1) % 100 == 0:
            print('STEPS: {}/{}, Loss: {:.4f}'.format(step, TOTAL_STEPS, eval_loss / step))

eval_loss = eval_loss / nb_eval_steps
preds = preds[0]
preds = np.argmax(preds, axis=1)
result = compute_metrics(eval_all_label_ids.numpy(), preds)

result['eval_loss'] = eval_loss

output_eval_file = os.path.join(REPORTS_DIR, "eval_results_" + WEIGHTS_NAME[:-4] + ".txt")
    
with open(output_eval_file, "w") as writer:
    logger.info("***** Eval results *****")
    for key in (result.keys()):
        logger.info("   %s = %s", key, str(result[key]))
        writer.write("%s = %s\n" % (key, str(result[key])))