Last Update @ 2020.12.04

- Huggingface Transformers 4.0.0  버전 반영

# Package 설치 & 데이터 받기

In [None]:
!pip install -q transformers pytorch_lightning emoji soynlp
!pip install koco

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
#!git clone https://github.com/e9t/nsmc
import koco
train_dev = koco.load_dataset('korean-hate-speech', mode = 'train_dev')

In [3]:
#!head nsmc/ratings_train.txt

# 패키지 import & 기본 Args 설정

In [3]:
import os
import pandas as pd

from pprint import pprint

import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.optim.lr_scheduler import ExponentialLR

from pytorch_lightning import LightningModule, Trainer, seed_everything

from transformers import BertForSequenceClassification, BertTokenizer, AdamW

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import re
import emoji
from soynlp.normalizer import repeat_normalize

## 기본 학습 Arguments

In [4]:
class Arg:
    random_seed: int = 42  # Random Seed
    pretrained_model: str = 'beomi/kcbert-base'  # Transformers PLM name
    pretrained_tokenizer: str = ''  # Optional, Transformers Tokenizer Name. Overrides `pretrained_model`
    auto_batch_size: str = 'power'  # Let PyTorch Lightening find the best batch size 
    batch_size: int = 0  # Optional, Train/Eval Batch Size. Overrides `auto_batch_size` 
    lr: float = 5e-6  # Starting Learning Rate
    epochs: int = 20  # Max Epochs
    max_length: int = 150  # Max Length input size
    report_cycle: int = 100  # Report (Train Metrics) Cycle
    train_data_path: str = "nsmc/ratings_train.txt"  # Train Dataset file 
    val_data_path: str = "nsmc/ratings_test.txt"  # Validation Dataset file 
    cpu_workers: int = os.cpu_count()  # Multi cpu workers
    test_mode: bool = False  # Test Mode enables `fast_dev_run`
    optimizer: str = 'AdamW'  # AdamW vs AdamP
    lr_scheduler: str = 'exp'  # ExponentialLR vs CosineAnnealingWarmRestarts
    fp16: bool = False  # Enable train on FP16
    tpu_cores: int = 0  # Enable TPU with 1 core or 8 cores

args = Arg()

## 기본값을 Override 하고싶은 경우 아래와 같이 수정

In [None]:
!nvidia-smi

Wed Mar  3 05:19:22 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

위에서 GPU가 V100/P100이면 아래 `batch_size`  를 32 이상으로 하셔도 됩니다.

In [5]:
# args.tpu_cores = 8  # Enables TPU
args.fp16 = True  # Enables GPU FP16
args.batch_size = 32  # Force setup batch_size

# Model 만들기 with Pytorch Lightning

In [6]:
class Model(LightningModule):
    def __init__(self, options):
        super().__init__()
        self.args = options
        self.bert = BertForSequenceClassification.from_pretrained(self.args.pretrained_model)
        self.tokenizer = BertTokenizer.from_pretrained(
            self.args.pretrained_tokenizer
            if self.args.pretrained_tokenizer
            else self.args.pretrained_model
        )
        self.validation_step_outputs = []

    def forward(self, **kwargs):
        return self.bert(**kwargs)

    def training_step(self, batch, batch_idx):
        data, labels = batch
        output = self(input_ids=data, labels=labels)

        # Transformers 4.0.0+
        loss = output.loss
        logits = output.logits
        
        preds = logits.argmax(dim=-1)

        y_true = labels.cpu().numpy()
        y_pred = preds.cpu().numpy()

        # Acc, Precision, Recall, F1
        metrics = [
            metric(y_true=y_true, y_pred=y_pred)
            for metric in
            (accuracy_score, precision_score, recall_score, f1_score)
        ]

        tensorboard_logs = {
            'train_loss': loss.cpu().detach().numpy().tolist(),
            'train_acc': metrics[0],
            'train_precision': metrics[1],
            'train_recall': metrics[2],
            'train_f1': metrics[3],
        }
        if (batch_idx % self.args.report_cycle) == 0:
            print()
            pprint(tensorboard_logs)
        return {'loss': loss, 'log': tensorboard_logs}

    def validation_step(self, batch, batch_idx):
        data, labels = batch
        output = self(input_ids=data, labels=labels)

        # Transformers 4.0.0+
        loss = output.loss
        self.validation_step_outputs.append(loss)
        logits = output.logits

        preds = logits.argmax(dim=-1)

        y_true = list(labels.cpu().numpy())
        y_pred = list(preds.cpu().numpy())

        return {
            'loss': loss,
            'y_true': y_true,
            'y_pred': y_pred,
        }
    def test_step(self, batch, batch_idx):
        data, labels = batch
        output = self(input_ids=data, labels=labels)

        # Transformers 4.0.0+
        #loss = output.loss
        #self.validation_step_outputs.append(loss)
        logits = output.logits
        #print(logits)
        preds = logits.argmax(dim=-1)

        y_true = list(labels.cpu().numpy())
        #print(y_true)
        y_pred = list(preds.cpu().numpy())
        metrics = [
            metric(y_true=y_true, y_pred=y_pred)
            for metric in
            (accuracy_score, precision_score, recall_score, f1_score)
        ]
        acc = metrics[0]
        tensorboard_logs = {
            'test_acc': metrics[0],
            'test_precision': metrics[1],
            'test_recall': metrics[2],
            'test_f1': metrics[3],
        }
        if (batch_idx % self.args.report_cycle) == 0:
            print()
            pprint(tensorboard_logs)
        return {'acc':acc}
    def on_validation_epoch_end(self):
        epoch_average = torch.stack(self.validation_step_outputs).mean()
        self.log("validation_epoch_average", epoch_average)
        self.validation_step_outputs.clear()

    def configure_optimizers(self):
        if self.args.optimizer == 'AdamW':
            optimizer = AdamW(self.parameters(), lr=self.args.lr)
        elif self.args.optimizer == 'AdamP':
            from adamp import AdamP
            optimizer = AdamP(self.parameters(), lr=self.args.lr)
        else:
            raise NotImplementedError('Only AdamW and AdamP is Supported!')
        if self.args.lr_scheduler == 'cos':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=1, T_mult=2)
        elif self.args.lr_scheduler == 'exp':
            scheduler = ExponentialLR(optimizer, gamma=0.5)
        else:
            raise NotImplementedError('Only cos and exp lr scheduler is Supported!')
        return {
            'optimizer': optimizer,
            'scheduler': scheduler,
        }

    def read_data(self, is_train):
        col = 'train' if is_train else 'dev'
        
        df = pd.DataFrame(train_dev[col])
        df.loc[(df['contain_gender_bias']==False) & (df['bias']=='none') & (df['hate']=='none') == True , 'label'] = False
        df.loc[(df['label'] != False), 'label'] = True
        # df['id'] = [ i for i in range(len(df))]
        # df.rename(columns = {'comments' : 'document'}, inplace = True)
        # df['label'] = df["contain_gender_bias"].replace({"False": 0, "True": 1})
        # df= df.loc[:,~df.columns.isin(["contain_gender_bias", "bias", "hate", "news_title"])]
        return df

    def preprocess_dataframe(self, df):
        emojis = ''.join(emoji.EMOJI_DATA.keys())
        pattern = re.compile(f'[^ .,?!/@$%~％·∼()\x00-\x7Fㄱ-힣{emojis}]+')
        url_pattern = re.compile(
            r'https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)')

        def clean(x):
            x = pattern.sub(' ', x)
            x = url_pattern.sub('', x)
            x = x.strip()
            x = repeat_normalize(x, num_repeats=2)
            return x

        df['comments'] = df['comments'].map(lambda x: self.tokenizer.encode(
            clean(str(x)),
            padding='max_length',
            max_length=self.args.max_length,
            truncation=True,
        ))
        return df

    def train_dataloader(self):
        df = self.read_data(True)
        df = self.preprocess_dataframe(df)

        dataset = TensorDataset(
            torch.tensor(df['comments'].to_list(), dtype=torch.long),
            torch.tensor(df['label'].to_list(), dtype=torch.long),
        )
        return DataLoader(
            dataset,
            batch_size=self.args.batch_size or self.batch_size,
            shuffle=True,
            num_workers=self.args.cpu_workers,
        )

    def val_dataloader(self):
        df = self.read_data(False)
        df = self.preprocess_dataframe(df)

        dataset = TensorDataset(
            torch.tensor(df['comments'].to_list(), dtype=torch.long),
            torch.tensor(df['label'].to_list(), dtype=torch.long),
        )
        return DataLoader(
            dataset,
            batch_size=self.args.batch_size or self.batch_size,
            shuffle=False,
            num_workers=self.args.cpu_workers,
        )
    def test_dataloader(self):
        df = self.read_data(False)
        df = self.preprocess_dataframe(df)

        dataset = TensorDataset(
            torch.tensor(df['comments'].to_list(), dtype=torch.long),
            torch.tensor(df['label'].to_list(), dtype=torch.long),
        )
        return DataLoader(
            dataset,
            batch_size=self.args.batch_size or self.batch_size,
            shuffle=False,
            num_workers=self.args.cpu_workers,
        )


In [7]:
print("Using PyTorch Ver", torch.__version__)
print("Fix Seed:", args.random_seed)
seed_everything(args.random_seed)
model = Model(args)

print(":: Start Training ::")
trainer = Trainer(
    max_epochs=20,
    fast_dev_run=args.test_mode,
    num_sanity_val_steps=None if args.test_mode else 0,
    ### auto_scale_batch_size=args.auto_batch_size if args.auto_batch_size and not args.batch_size else False,
    # For GPU Setup
    deterministic=torch.cuda.is_available(),
    ### gpus=-1 if torch.cuda.is_available() else None,
    precision=16 if args.fp16 else 32,
    # For TPU Setup
    # tpu_cores=args.tpu_cores if args.tpu_cores else None,
)
trainer.fit(model)




INFO:lightning_fabric.utilities.seed:Global seed set to 42


Using PyTorch Ver 2.0.1+cu118
Fix Seed: 42


Some weights of the model checkpoint at beomi/kcbert-base were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initiali

:: Start Training ::


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type                          | Params
-------------------------------------------------------
0 | bert | BertForSequenceClassification | 108 M 
-------------------------------------------------------
108 M     Trainable params
0         Non-trainable params
108 M     Total params
435.680   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]


{'train_acc': 0.375,
 'train_f1': 0.0909090909090909,
 'train_loss': 0.7497329711914062,
 'train_precision': 1.0,
 'train_recall': 0.047619047619047616}


  _warn_prf(average, modifier, msg_start, len(result))



{'train_acc': 0.75,
 'train_f1': 0.8000000000000002,
 'train_loss': 0.5343780517578125,
 'train_precision': 0.8,
 'train_recall': 0.8}

{'train_acc': 0.78125,
 'train_f1': 0.8108108108108109,
 'train_loss': 0.4834480285644531,
 'train_precision': 0.8333333333333334,
 'train_recall': 0.7894736842105263}


Validation: 0it [00:00, ?it/s]


{'train_acc': 0.8125,
 'train_f1': 0.8571428571428571,
 'train_loss': 0.3699932098388672,
 'train_precision': 0.782608695652174,
 'train_recall': 0.9473684210526315}

{'train_acc': 0.75,
 'train_f1': 0.7777777777777778,
 'train_loss': 0.5001382827758789,
 'train_precision': 0.7777777777777778,
 'train_recall': 0.7777777777777778}

{'train_acc': 0.6875,
 'train_f1': 0.7368421052631579,
 'train_loss': 0.5329961776733398,
 'train_precision': 0.7368421052631579,
 'train_recall': 0.7368421052631579}


Validation: 0it [00:00, ?it/s]


{'train_acc': 0.84375,
 'train_f1': 0.8275862068965517,
 'train_loss': 0.33103132247924805,
 'train_precision': 0.75,
 'train_recall': 0.9230769230769231}

{'train_acc': 0.71875,
 'train_f1': 0.7567567567567567,
 'train_loss': 0.45322322845458984,
 'train_precision': 0.8235294117647058,
 'train_recall': 0.7}

{'train_acc': 0.90625,
 'train_f1': 0.918918918918919,
 'train_loss': 0.22497057914733887,
 'train_precision': 0.8947368421052632,
 'train_recall': 0.9444444444444444}


Validation: 0it [00:00, ?it/s]


{'train_acc': 0.84375,
 'train_f1': 0.8387096774193549,
 'train_loss': 0.2816026210784912,
 'train_precision': 0.8125,
 'train_recall': 0.8666666666666667}

{'train_acc': 0.96875,
 'train_f1': 0.975609756097561,
 'train_loss': 0.1753554344177246,
 'train_precision': 1.0,
 'train_recall': 0.9523809523809523}

{'train_acc': 0.90625,
 'train_f1': 0.918918918918919,
 'train_loss': 0.26060986518859863,
 'train_precision': 0.9444444444444444,
 'train_recall': 0.8947368421052632}


Validation: 0it [00:00, ?it/s]


{'train_acc': 0.9375,
 'train_f1': 0.9523809523809523,
 'train_loss': 0.18336844444274902,
 'train_precision': 0.9523809523809523,
 'train_recall': 0.9523809523809523}

{'train_acc': 0.90625,
 'train_f1': 0.918918918918919,
 'train_loss': 0.15556633472442627,
 'train_precision': 0.9444444444444444,
 'train_recall': 0.8947368421052632}

{'train_acc': 0.875,
 'train_f1': 0.9047619047619048,
 'train_loss': 0.2212435007095337,
 'train_precision': 0.9047619047619048,
 'train_recall': 0.9047619047619048}


Validation: 0it [00:00, ?it/s]


{'train_acc': 0.96875,
 'train_f1': 0.9696969696969697,
 'train_loss': 0.10260045528411865,
 'train_precision': 1.0,
 'train_recall': 0.9411764705882353}

{'train_acc': 0.90625,
 'train_f1': 0.918918918918919,
 'train_loss': 0.23162028193473816,
 'train_precision': 0.9444444444444444,
 'train_recall': 0.8947368421052632}

{'train_acc': 0.96875,
 'train_f1': 0.9787234042553191,
 'train_loss': 0.06679743528366089,
 'train_precision': 1.0,
 'train_recall': 0.9583333333333334}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.07790935039520264,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0925440788269043,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 0.9375,
 'train_f1': 0.9444444444444444,
 'train_loss': 0.13988229632377625,
 'train_precision': 0.9444444444444444,
 'train_recall': 0.9444444444444444}


Validation: 0it [00:00, ?it/s]


{'train_acc': 0.96875,
 'train_f1': 0.975609756097561,
 'train_loss': 0.08044779300689697,
 'train_precision': 1.0,
 'train_recall': 0.9523809523809523}

{'train_acc': 0.96875,
 'train_f1': 0.967741935483871,
 'train_loss': 0.04793384671211243,
 'train_precision': 0.9375,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.02037845551967621,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 0.96875,
 'train_f1': 0.967741935483871,
 'train_loss': 0.10312189161777496,
 'train_precision': 0.9375,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.016741350293159485,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0216953307390213,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.004731312394142151,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.012099437415599823,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.00192999467253685,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.004361536353826523,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.004633180797100067,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0021402910351753235,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.003343377262353897,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.009666714817285538,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0019460879266262054,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0034547559916973114,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 0.96875,
 'train_f1': 0.9787234042553191,
 'train_loss': 0.1924510896205902,
 'train_precision': 0.9583333333333334,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.00505702942609787,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0016290545463562012,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.006265193223953247,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0024589821696281433,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0029694028198719025,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.004285924136638641,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 0.96875,
 'train_f1': 0.975609756097561,
 'train_loss': 0.030284028500318527,
 'train_precision': 0.9523809523809523,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.005454998463392258,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0018515661358833313,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 0.96875,
 'train_f1': 0.975609756097561,
 'train_loss': 0.08031083643436432,
 'train_precision': 0.9523809523809523,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0011128894984722137,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0007745139300823212,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0038710683584213257,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.009010117501020432,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.00038614869117736816,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.00032414868474006653,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0006824620068073273,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0013292767107486725,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0016157813370227814,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]


{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.000833313912153244,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.0011816620826721191,
 'train_precision': 1.0,
 'train_recall': 1.0}

{'train_acc': 1.0,
 'train_f1': 1.0,
 'train_loss': 0.008055359125137329,
 'train_precision': 1.0,
 'train_recall': 1.0}


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


In [8]:
args.test_mode = True
trainer = Trainer(
    max_epochs=5,
    fast_dev_run=args.test_mode,
    num_sanity_val_steps=None if args.test_mode else 0,
    ### auto_scale_batch_size=args.auto_batch_size if args.auto_batch_size and not args.batch_size else False,
    # For GPU Setup
    deterministic=torch.cuda.is_available(),
    ### gpus=-1 if torch.cuda.is_available() else None,
    precision=16 if args.fp16 else 32,
    # For TPU Setup
    # tpu_cores=args.tpu_cores if args.tpu_cores else None,
)
trainer.test(model)

  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]


{'test_acc': 0.84375,
 'test_f1': 0.8936170212765957,
 'test_precision': 1.0,
 'test_recall': 0.8076923076923077}


[{}]

# 학습!

> 주의: 1epoch별로 GPU-P100기준 약 1-2시간, GPU V100기준 ~30분이 걸립니다.

> Update @ 2020.09.01
> 최근 Colab Pro에서 V100이 배정됩니다.

```python
# 1epoch 기준 아래 score가 나옵니다.
{'val_acc': 0.90522,
 'val_f1': 0.9049023739289227,
 'val_loss': 0.23429009318351746,
 'val_precision': 0.9143146796431468,
 'val_recall': 0.8956818813808446}
```

In [None]:
main()

Global seed set to 42


Using PyTorch Ver 1.7.1+cu101
Fix Seed: 42


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=619.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=438218004.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at beomi/kcbert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initiali

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=249928.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=49.0, style=ProgressStyle(description_w…




GPU available: True, used: True
TPU available: None, using: 0 TPU cores
Using native 16bit precision.


:: Start Training ::



  | Name | Type                          | Params
-------------------------------------------------------
0 | bert | BertForSequenceClassification | 108 M 
-------------------------------------------------------
108 M     Trainable params
0         Non-trainable params
108 M     Total params
435.680   Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…


{'train_acc': 0.59375,
 'train_f1': 0.7450980392156863,
 'train_loss': 0.6852923631668091,
 'train_precision': 0.59375,
 'train_recall': 1.0}


Please use self.log(...) inside the lightningModule instead.
# log on a step or aggregate epoch metric to the logger and/or progress bar (inside LightningModule)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
  _warn_prf(average, modifier, msg_start, len(result))



{'train_acc': 0.75,
 'train_f1': 0.75,
 'train_loss': 0.5262309908866882,
 'train_precision': 0.8571428571428571,
 'train_recall': 0.6666666666666666}

{'train_acc': 0.8125,
 'train_f1': 0.7857142857142856,
 'train_loss': 0.3342723548412323,
 'train_precision': 0.7333333333333333,
 'train_recall': 0.8461538461538461}

{'train_acc': 0.875,
 'train_f1': 0.8823529411764706,
 'train_loss': 0.2689765691757202,
 'train_precision': 0.8823529411764706,
 'train_recall': 0.8823529411764706}

{'train_acc': 0.875,
 'train_f1': 0.8461538461538461,
 'train_loss': 0.3174639940261841,
 'train_precision': 0.7857142857142857,
 'train_recall': 0.9166666666666666}

{'train_acc': 0.75,
 'train_f1': 0.5555555555555556,
 'train_loss': 0.38240528106689453,
 'train_precision': 0.8333333333333334,
 'train_recall': 0.4166666666666667}
