# Model Extra

- 목적 : 가진 데이터셋 dktc를 dktc_imp, dktc_exp 로 분류해서 저장한다.  
- 학습 데이터 : 일상대화문, 비윤리대화문 중 intensity 2.0 이상을 추출한 데이터

- 참고사항 : dktc를 분류하기 위한 모델이므로 학습에 dktc는 사용하지 않는다. 구조는 model_1과 같다. (not, exp을 분류하고 ood를 imp로 처리한다.) 

In [None]:
!pip install keras-bert
!pip install keras-radam
!pip install wandb

# !pip install tensorflow==2.12.0

In [34]:
# 라이브러리 import 및 주요 라이브러리 버전 확인 
import wandb
from wandb.keras import WandbCallback

import tensorflow as tf

import pandas as pd
import numpy as np  
import re
import pickle
from sklearn.model_selection import train_test_split

import keras as keras
from keras.models import load_model
from keras import backend as K
from keras import Input, Model
from keras import optimizers

import codecs
from tqdm import tqdm
import shutil

import json

import os
import warnings

warnings.filterwarnings(action='ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
# tf.logging.set_verbosity(tf.logging.ERROR)

from keras_bert import load_trained_model_from_checkpoint, load_vocabulary
from keras_bert import Tokenizer
from keras_bert import AdamWarmup, calc_train_steps

from keras_radam import RAdam
from keras_radam.training import RAdamOptimizer

In [2]:
import encoding_korbert as enc



## 학습에 쓸 데이터 로딩

In [3]:
daily_train_df = pd.read_csv("./data/daily_train.csv", usecols=['class','conversation']) #일상대화 train 병합 데이터
daily_val_df = pd.read_csv("./data/daily_val.csv", usecols=['class','conversation']) #일상대화 val 병합 데이터

In [4]:
immoral_df = pd.read_csv("./data/immoral.csv", usecols=['class','conversation'])
# immoral_df = pd.read_csv("./data/immoral_over_1_5.csv", usecols=['class','conversation'])
# immoral_df = pd.read_csv("./data/immoral_over_1_7.csv", usecols=['class','conversation'])
# immoral_df = pd.read_csv("./data/immoral_over_2_v.csv", usecols=['class','conversation'])
immoral_df = immoral_df.sample(frac=1).reset_index(drop=True) # 행 섞기


model_extra_exp_train_df, model_extra_exp_val_df = train_test_split(immoral_df, test_size=0.2)


In [5]:
# 샘플링 하고 싶을 때

# daily_train_df =daily_train_df.sample(n=50, random_state=1004)
# daily_val_df =daily_val_df.sample(n=10, random_state=1004)

# model_extra_exp_train_df =model_extra_exp_train_df.sample(n=50, random_state=1004)
# model_extra_exp_val_df =model_extra_exp_val_df.sample(n=10, random_state=1004)

In [5]:
model_extra_train_df = pd.concat([daily_train_df, model_extra_exp_train_df], join="inner")
model_extra_train_df.reset_index(drop="index", inplace=True)

In [6]:
model_extra_val_df = pd.concat([daily_val_df, model_extra_exp_val_df], join="inner")
model_extra_val_df.reset_index(drop="index", inplace=True)

In [7]:
for i in range(10):
    model_extra_train_df = model_extra_train_df.sample(frac=1).reset_index(drop=True) #많이 섞기

for i in range(10):
    model_extra_val_df = model_extra_val_df.sample(frac=1).reset_index(drop=True)#많이 섞기

In [8]:
model_extra_train_df["class"] = model_extra_train_df["class"].astype('category')
model_extra_train_df["class"] = model_extra_train_df["class"].cat.codes # 0:exp, 1:not

model_extra_val_df["class"] = model_extra_val_df["class"].astype('category')
model_extra_val_df["class"] = model_extra_val_df["class"].cat.codes # 0:exp, 1:not

## Encoding

In [9]:
tokenizer = enc.tokenizer

In [10]:
train_x, train_y = enc.load_data(model_extra_train_df)
val_x, val_y = enc.load_data(model_extra_val_df)

100%|██████████| 90926/90926 [01:07<00:00, 1343.50it/s]
100%|██████████| 11771/11771 [00:11<00:00, 1009.68it/s]


### 모델 파인튜닝

In [11]:
wandb.login(key="fff4dcdf86063b9dafa0296b4abaeb7d3639da7d")

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /aiffel/.netrc


True

### init_config

In [12]:
default_config = {
#     "n_channel_1" : 32,
#     "n_channel_2" : 64,
#     "n_dense" : 1024,
    "learning_rate" : 1e-4,
    "epochs" : 5,
    "batch_size" : 16,
    "weight_decay" : 0.025,
    "optimizer" : "radam",
    "loss" : "binary_crossentropy",
    "metrics" : ["accuracy"]
}

### sweep_config

In [41]:
sweep_config = {
    "metric": {"name": "val_loss", "goal": "minimize"},
    "method": "random",
    "parameters": {
        "optimizer" : {
            'value' : 'radam'
            },
        "batch_size" : {
            "values" : [8, 16] # OOM 에러시 바꿔주세용
            },
        "learning_rate" : {
            "min" : 1e-5,
            "max" : 1e-4 # 0에 가까울 수록
            },
        "weight_decay" : {
            "values" : [0.025, 0.001]
        },
        "epochs" : {
            "distribution" : "int_uniform",
            "min" : 3,
            "max" : 5
            }
                    
        }
    }

In [14]:
pretrained_path ="./bert" #상대경로 잡기

SEQ_LEN = enc.SEQ_LEN

config_path = os.path.join(pretrained_path, 'bert_config.json')
checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt')

In [15]:
def build_model():
    model = load_trained_model_from_checkpoint(config_path,
                                                checkpoint_path,
                                                training=True,
                                                trainable=True,
                                                seq_len=SEQ_LEN)
    
    inputs = model.inputs[:2]
    dense = model.layers[-3].output
    
    outputs = keras.layers.Dense(1, activation='sigmoid', 
                               kernel_initializer=keras.initializers.TruncatedNormal(stddev=0.02),
                              name = 'real_output')(dense)

    bert_model = keras.models.Model(inputs, outputs)

    return bert_model

In [16]:
# def set_optimizer():
#     if config.optimizer == "radam":
#         return RAdamOptimizer(learning_rate=config.learning_rate, 
#                               weight_decay=config.weight_decay)

In [17]:
# WandbCallback's labels
CLASS_NAMES = ["위협 대화", "일상 대화"] # 0 exp(위협), 1 not(일상)

In [18]:
path= './saved'

layer_num = 12

In [19]:
def keras_callbacks():                       # 날짜 바꿔주세용
    CK = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(path, "model_extra_weights_0420.{epoch:02d}-{val_loss:.2f}.h5"),
                                              monitor='val_loss',
                                              save_best_only = True,
                                              save_weights_only = True)

    ES = tf.keras.callbacks.EarlyStopping(patience=1)
    
    return CK, ES

In [39]:
def train():
    global default_config

    # wandb.init & config ok
    run = wandb.init(project = 'model_extra_0420', # 날짜만 변경해주세요
                    entity = "m05",
                    config = default_config)
    config = wandb.config

    # Model ok
    model = build_model()    
    
    # optimizer 함수 불러오기 ok
#     optimizer = set_optimizer()
    if config.optimizer == "radam":
        OPTIMIZER = RAdamOptimizer(learning_rate=config.learning_rate, 
                              weight_decay=config.weight_decay)
    
    # model.compile ok
    model.compile(optimizer = OPTIMIZER,
                  loss = config.loss,
                  metrics= config.metrics)
    
    #keras_callbacks ok
    CK, ES = keras_callbacks()
    
    # model.fit
    model.fit(train_x, train_y,
              epochs = config.epochs,
              batch_size = config.batch_size,
              validation_data = (val_x, val_y),
              callbacks = [CK, ES , 
                           WandbCallback(training_data = (train_x[:30], train_y[:30]),
                                         validation_data = (val_x[:30], val_y[:30]),
                                        labels = CLASS_NAMES)])
                                        
    # weight 모델 저장
    tf.keras.models.save_model(model, 'model_extra')
    
#     # evaluate test
#     test_loss, test_accuracy = model.evaluate(test_x, test_y, verbose=2)
    
#     # wandb's 테이블쪽 column 추가하기
#     wandb.log({"Test Accuracy Rate: " : round(test_accuracy * 100, 2),
#                "Test Error Rate: " : round((1 - test_accuracy) * 100, 2)})
    
    run.finish() # run 종료

### model.fit 실행

In [42]:
sweep_id = wandb.sweep(sweep_config,
                       entity = 'm05', 
                       project = 'model_extra_0420') # 날짜만 변경해주세요

# run the sweep
wandb.agent(sweep_id,
            count=5, # 몇회 돌릴지 선택해주세요
            function=train)

Create sweep with ID: n7lg4n94
Sweep URL: https://wandb.ai/m05/model_extra_0419/sweeps/n7lg4n94


[34m[1mwandb[0m: Agent Starting Run: qjtorvab with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 1.0954053526013697e-05
[34m[1mwandb[0m: 	optimizer: radam
[34m[1mwandb[0m: 	weight_decay: 0.001


Epoch 1/3
  15/5683 [..............................] - ETA: 1:39:21 - loss: 0.6968 - accuracy: 0.4125

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


---
---

In [None]:
# model_extra= build_model()
# model_extra.summary()

In [24]:
# #레이어의 trainable 출력해서 확인하기

# for layer in model_extra.layers:
#     if 'Encoder' in layer.name:
#         if 'layer_0' in layer.name:
#             if layer.trainable == False:
#                 print(f'{layer.name} is frozen, 고정됨')
#             else:
#                 print(f'{layer.name} is not frozen, 고정안됨')
#         elif layer.trainable == False:
#             print(f'{layer.name} is frozen, 고정됨')
#         else:
#             print(f'{layer.name} is not frozen, 고정안됨')

Encoder-1-MultiHeadSelfAttention is not frozen, 고정안됨
Encoder-1-MultiHeadSelfAttention-Dropout is not frozen, 고정안됨
Encoder-1-MultiHeadSelfAttention-Add is not frozen, 고정안됨
Encoder-1-MultiHeadSelfAttention-Norm is not frozen, 고정안됨
Encoder-1-FeedForward is not frozen, 고정안됨
Encoder-1-FeedForward-Dropout is not frozen, 고정안됨
Encoder-1-FeedForward-Add is not frozen, 고정안됨
Encoder-1-FeedForward-Norm is not frozen, 고정안됨
Encoder-2-MultiHeadSelfAttention is not frozen, 고정안됨
Encoder-2-MultiHeadSelfAttention-Dropout is not frozen, 고정안됨
Encoder-2-MultiHeadSelfAttention-Add is not frozen, 고정안됨
Encoder-2-MultiHeadSelfAttention-Norm is not frozen, 고정안됨
Encoder-2-FeedForward is not frozen, 고정안됨
Encoder-2-FeedForward-Dropout is not frozen, 고정안됨
Encoder-2-FeedForward-Add is not frozen, 고정안됨
Encoder-2-FeedForward-Norm is not frozen, 고정안됨
Encoder-3-MultiHeadSelfAttention is not frozen, 고정안됨
Encoder-3-MultiHeadSelfAttention-Dropout is not frozen, 고정안됨
Encoder-3-MultiHeadSelfAttention-Add is not frozen, 고정안됨
E

In [20]:
# for layer in model_extra.layers[:layer_num]:
#     layer.trainable = False # 층을 선택하여 freeze 하기.