# NSMC 데이터 - PLM + 그룹 Custom Classifiers 만들기

이 노트북은 크게 2개의 모델을 생성하여 파이프라인으로 연결를 결과로 만듧니다.
- 첫 번재 모델
    - Electra Pretrained Model (PLM)
- 두 번째 모델
    - 4 개의 Classifiers 로 구성된 모델 
        - Classifiers_01, Classifiers02, Classifiers_03, Classifiers_04
- 추론
    - PLM --> Classifiers 로 이루어 지며, 최종 4개의 Classifier 의 모델 결과가 제공 됨.




---

### 참조: 
- 딥러닝으로 리뷰에서 제품 속성 정보 추출하기
    * http://blog.hwahae.co.kr/all/tech/tech-tech/5967/
- A Visual Guide to Using BERT for the First Time
    - http://jalammar.github.io/a-visual-guide-to-using-bert-for-the-first-time/
- PyTorch 101, Part 3: Going Deep with PyTorch
    - https://blog.paperspace.com/pytorch-101-advanced/
- Pytorch freeze part of the layers
    - https://jimmy-shen.medium.com/pytorch-freeze-part-of-the-layers-4554105e03a6
- BERT Fine-Tuning Tutorial with PyTorch
    - https://mccormickml.com/2019/07/22/BERT-fine-tuning/
- How many layers of my BERT model should I freeze?
    - https://raphaelb.org/posts/freezing-bert/
- Add dense layer on top of Huggingface BERT model
    - https://pyquestions.com/add-dense-layer-on-top-of-huggingface-bert-model
    

# 0. 환경 셋업

## 0.1. 변수 로딩 및 라이브러리 로딩

In [1]:
%store -r local_train_output_path
%store -r local_test_output_path

In [2]:
%load_ext autoreload
%autoreload 2

# src 폴더 경로 설정
import sys
sys.path.append('./src')
import config
from  data_util import read_nsmc_split

In [3]:
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# logger.setLevel(logging.WARNING)
logger.addHandler(logging.StreamHandler(sys.stdout))

In [4]:
from transformers import AutoModel

from datasets import load_dataset,Dataset,DatasetDict
from transformers import DataCollatorWithPadding,AutoModelForSequenceClassification, Trainer, TrainingArguments,AutoTokenizer,AutoModel,AutoConfig
from transformers.modeling_outputs import TokenClassifierOutput
import torch
import torch.nn as nn
import pandas as pd
import numpy as np


## 0.2. Pre-trained model_id, tokenizer_id 지정
- [KoElectra Git](https://github.com/monologg/KoELECTRA)
- KoElectra Model
    - Small:
        - "monologg/koelectra-small-v3-discriminator
    - Base: 
        - monologg/koelectra-base-v3-discriminator
        


In [5]:
# from datasets import load_dataset
from transformers import (
    ElectraModel, 
    ElectraTokenizer, 
)

tokenizer_id = 'monologg/koelectra-small-v3-discriminator'
model_id = "monologg/koelectra-small-v3-discriminator"



# 1. 데이터 준비

## 1.1 데이터 로딩

In [6]:
train_texts, train_labels = read_nsmc_split(local_train_output_path)
test_texts, test_labels = read_nsmc_split(local_test_output_path)

In [7]:
logger.info(f"len: {len(train_texts)} \nSample: {train_texts[0:5]}")
logger.info(f"len: {len(train_labels)} \nSample: {train_labels[0:5]}")

len: 149552 
Sample: ['흠   포스터보고 초딩영화줄    오버연기조차 가볍지 않구나', '너무재밓었다그래서보는것을추천한다', '교도소 이야기구먼   솔직히 재미는 없다  평점 조정', '사이몬페그의 익살스런 연기가 돋보였던 영화 스파이더맨에서 늙어보이기만 했던 커스틴 던스트가 너무나도 이뻐보였다', '막 걸음마 뗀 세부터 초등학교 학년생인 살용영화 ㅋㅋㅋ   별반개도 아까움']
len: 149552 
Sample: [1, 0, 0, 1, 0]


## 1.2. 훈련 데이타를 분리하여 검증 데이터 세트 생성

In [8]:
from sklearn.model_selection import train_test_split
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=.2)

# 1.3. Electra Model 입력 인코딩 변환 및 torch custome Dataset 생성 

### 1.3.1. 토큰나이저 로딩 

In [9]:
tokenizer = ElectraTokenizer.from_pretrained(tokenizer_id)

### 1.3.2. Electra Model 입력 인코딩 생성

In [10]:
%%time 

tokenizer = ElectraTokenizer.from_pretrained(tokenizer_id)

train_encodings = tokenizer(train_texts, return_token_type_ids = False, truncation=True, padding=True)
val_encodings = tokenizer(val_texts, return_token_type_ids = False, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, return_token_type_ids = False, truncation=True, padding=True)

CPU times: user 42.5 s, sys: 272 ms, total: 42.8 s
Wall time: 42.9 s


### 1.3.3. torch custome dataset 생성

In [11]:
from data_util import NSMCDataset

train_dataset = NSMCDataset(train_encodings, train_labels)
val_dataset = NSMCDataset(val_encodings, val_labels)
test_dataset = NSMCDataset(test_encodings, test_labels)

In [12]:
logger.info(f"len(train_dataset) : {len(train_dataset)}")
logger.info(f"len(val_dataset) : {len(val_dataset)}")
logger.info(f"len(test_dataset) : {len(test_dataset)}")


len(train_dataset) : 119641
len(val_dataset) : 29911
len(test_dataset) : 49832


### 1.3.4. 데이터 로더 생성

In [13]:
from torch.utils.data import DataLoader, SubsetRandomSampler


from train_util import create_random_sampler
    
subset_train_sampler = create_random_sampler(train_dataset, frac=0.01, is_shuffle=True, logger=logger)
train_sampler = create_random_sampler(train_dataset, frac=1, is_shuffle=True, logger=logger)

subset_eval_sampler = create_random_sampler(val_dataset, frac=0.001, is_shuffle=False, logger=logger)
# eval_sampler = create_random_sampler(val_dataset, frac=1, is_shuffle=False, logger=logger)



dataset size with frac: 0.01 ==> 1196
dataset size with frac: 1 ==> 119641
dataset size with frac: 0.001 ==> 29
dataset size with frac: 1 ==> 29911


In [14]:
train_batch_size = 16
eval_batch_size = 32
test_batch_size = 32


train_sample_loader = DataLoader(dataset=train_dataset, 
                          shuffle=False, 
                          batch_size=train_batch_size, 
                          sampler=subset_train_sampler)    

train_dataloader = DataLoader(dataset=train_dataset, 
                          shuffle=False, 
                          batch_size=train_batch_size, 
                          sampler=train_sampler)    

eval_sample_loader = DataLoader(dataset=val_dataset, 
                          shuffle=False, 
                          batch_size=eval_batch_size, 
                          sampler=subset_eval_sampler)    

eval_dataloader = DataLoader(dataset=val_dataset, 
                          shuffle=False, 
                          batch_size=eval_batch_size)    


test_dataloader = DataLoader(dataset=test_dataset, 
                          shuffle=False, 
                          batch_size=test_batch_size
                            )    




# 2.모델 정의 및 생성

## 2.1. Pre-Trained Model 로딩

In [24]:
plm = AutoModel.from_pretrained(model_id)

Some weights of the model checkpoint at monologg/koelectra-small-v3-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## 2.2. Electra Model 아키텍쳐 확인

In [25]:
def show_module(model):
    for name, child in model.named_children():
        print("name :", name)
        #print("child: \n", child)


In [26]:
show_module(plm)

name : embeddings
name : embeddings_project
name : encoder


In [27]:
# print(plm)

## 2.3. Custom Classifier 추가 하여 Custom Model 생성 하기
- PLM + Classifier 로 구성됨.
- PLM 레이어는 훈련을 안하기 위해 파라이터 Freezing 을 함.

In [28]:
class CustomBERTModel(nn.Module):
    '''
    plm 파라미터는 freezing 하여 훈련을 하지 않음.
    '''
    def __init__(self,model ,num_labels): 
        super(CustomBERTModel,self).__init__() 
        self.num_labels = num_labels 

        self.plm = model
        # self.dropout = nn.Dropout(0.1) 
        # self.classifier = nn.Linear(768,num_labels) # load and initialize weights
        self.classifier = nn.Sequential(
            nn.Dropout(0.1),            
            nn.Linear(256,128),        
            nn.Dropout(0.1),                        
            nn.Linear(128,num_labels) 
        )
                
        self.freeze_plm()        

    def forward(self, input_ids=None, attention_mask=None,labels=None):
        #Extract outputs from the body
        outputs = self.plm(input_ids=input_ids, attention_mask=attention_mask)
        #print("outputs shape: ", outputs[0].shape)
        
        #sequence_output = self.dropout(outputs[0]) # outputs[0]=last hidden state
        # print("sequence_output shape: ", sequence_output.shape)
        
        cls_vector = outputs[0][:,0,:].view(-1,256) # outputs[0] 은 last_hidden_state, outputs[1] 은 pooled_output_state
        # print("cls_vector shape: ", cls_vector.shape)        
        
        logits = self.classifier(cls_vector) 

        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        return TokenClassifierOutput(loss=loss, logits=logits, hidden_states=cls_vector ,attentions=outputs.attentions)

    def freeze_plm(self):
        """
        Freezes the parameters of BERT so when BertWithCustomNNClassifier is trained
        only the wieghts of the custom classifier are modified.
        """
        for param in self.plm.parameters():
            param.requires_grad=False
        #     print(param.requires_grad)
        
    
    def unfreeze_plm(self):
        """
        Unfreezes the parameters of BERT so when BertWithCustomNNClassifier is trained
        both the wieghts of the custom classifier and of the underlying BERT are modified.
        """
        for param in self.plm.parameters():
            param.requires_grad=True



In [29]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_model=CustomBERTModel(model = plm ,num_labels=2).to(device)

최상위 모듈(레이어)  확인

In [30]:
show_module(custom_model)

name : plm
name : classifier


파라미터 Freezing 여부 확인. plm == freezing , classifier == trainable

In [31]:
def show_trainable_layer(model):
    # requires_grad == true 만 출력
    for name, param in model.named_parameters():
        if param.requires_grad: print(name) 

show_trainable_layer(custom_model)

classifier.1.weight
classifier.1.bias
classifier.3.weight
classifier.3.bias


## 2.4. Custome Model 아키텍쳐 확인

In [32]:
# print(custom_model)

# 3. 훈련 준비

## 3.1. 모델 평가 지표 정의

In [33]:
from datasets import load_metric
# metric = load_metric("f1")
metric = load_metric("accuracy")

## 3.2. 옵티마이저, 스케줄러, 훈련 루프 정의

In [34]:
from transformers import AdamW,get_scheduler

def create_optimizer_scheduler(num_epochs, model, train_dataloader):
    # plm freezing optimizer
    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=5e-5)
    # optimizer withoud frezzing
    # optimizer = AdamW(model.parameters(), lr=5e-5)

    
    def get_lr_scheduler(num_epochs, train_dataloader, optimizer):
        num_training_steps = num_epochs * len(train_dataloader)
        lr_scheduler = get_scheduler(
            "linear",
            optimizer=optimizer,
            num_warmup_steps=0,
            num_training_steps=num_training_steps,
        )

        print(num_training_steps)

        return lr_scheduler, num_training_steps
    
    lr_scheduler, num_training_steps = get_lr_scheduler(num_epochs, train_dataloader, optimizer)
    
    return optimizer, lr_scheduler, num_training_steps


In [35]:
def train_loop(num_epochs, model, train_dataloader, progress_bar_train, \
               eval_dataloader, progress_bar_eval, metric, optimizer, lr_scheduler):
    for epoch in range(num_epochs):
        model.train()
        for batch in train_dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            loss.backward()

            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            progress_bar_train.update(1)
                
        # 모델 평가
        model.eval()
        for batch in eval_dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.no_grad():
                outputs = custom_model(**batch)

            logits = outputs.logits
            predictions = torch.argmax(logits, dim=-1)
            metric.add_batch(predictions=predictions, references=batch["labels"])
            progress_bar_eval.update(1)
            

        print(metric.compute())
    
    return model



# 3.3. 훈련 루프 실행 및 평가

훈련 루프에 입력이 될 Batch 확인 함. 'eval_dataloader' 를 'train_dataloader' 로 바꾸어서 보시면 됩니다.
레코드가 많이 출력이 되어서 eval_dataloader 로 확인 함.

In [36]:
# for batch in eval_dataloader:
#     batch = {k: v.to(device) for k, v in batch.items()}
#     print(batch)
#     break

In [37]:
from tqdm.auto import tqdm

num_epochs = 1
optimizer, lr_scheduler, num_training_steps = create_optimizer_scheduler(num_epochs, custom_model, train_dataloader)

progress_bar_train = tqdm(range(num_training_steps))
progress_bar_eval = tqdm(range(num_epochs * len(eval_dataloader)))

custom_model_01 = train_loop(num_epochs, custom_model, train_dataloader, progress_bar_train, \
               eval_dataloader, progress_bar_eval, metric, optimizer, lr_scheduler)      

7478




  0%|          | 0/7478 [00:00<?, ?it/s]

  0%|          | 0/935 [00:00<?, ?it/s]

{'accuracy': 0.6572164086790813}


# 5. 테스트 데이터 로 모델 평가 

In [38]:

def evaL_model(model, test_dataloader, metric):
    model.eval()


    for i, batch in enumerate(test_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        # print("outputs: \n", outputs)
        predictions = torch.argmax(logits, dim=-1)
        # print("batch: \n", batch)
        # print("hidden_states: ", outputs.hidden_states.shape)
        # print("Ground Truth: \n", batch["labels"])        
        # print("predictions: \n", predictions)
        metric.add_batch(predictions=predictions, references=batch["labels"])
        

    print(metric.compute())
    
evaL_model(custom_model_01, test_dataloader, metric)    

{'accuracy': 0.6574891635896613}


# 6. 모델 분리 후 추론

In [39]:
show_module(custom_model_01)

name : plm
name : classifier


## 6.1. 추론 PLM Model 생성 
- custom_model (PLM + Classifier) 에서 PLM 만을 분리 함.

In [40]:
class PLModel(nn.Module):
    def __init__(self, base_model, num_labels): 
        super(PLModel,self).__init__() 

        self.plm = base_model.plm
        

    def forward(self, input_ids=None, attention_mask=None,labels=None):
        #Extract outputs from the body

        #Add custom layers
        outputs = self.plm(input_ids=input_ids, attention_mask=attention_mask)
        
        
        cls_vector = outputs[0][:,0,:].view(-1,256)
        # print("cls_vector shape: ", cls_vector.shape)                

        return cls_vector

PL_Model=PLModel(base_model = custom_model_01 ,num_labels=2).to(device)
print("PLM: ")
show_module(PL_Model)



PLM: 
name : plm


## 6.2. 추천 이진 분류기 모델 
- custom_model (PLM + Classifier) 에서 Classifier 만을 분리 함.

In [41]:
class ClassifierModel(nn.Module):
    def __init__(self, base_model, num_labels): 
        super(ClassifierModel,self).__init__() 

        self.num_labels = num_labels 
        #self.dropout = nn.Dropout(0.1)     
        self.classifier = base_model.classifier    
        

    def forward(self, cls_vector=None, labels=None):

        #Add custom layers
        #cls_vector = self.dropout(cls_vector) #outputs[0]=last hidden state
        logits = self.classifier(cls_vector)
        
        # logits = self.classifier(sequence_output[:,0,:].view(-1,768)) # calculate losses


        return logits


classifier_01 = ClassifierModel(base_model = custom_model_01 ,num_labels=2).to(device)
print("\nClassifier: \n")
show_module(classifier_01)


Classifier: 

name : classifier


## 6.3. PLM 모델 추론
- BERT Encoding 을 입력하여 PLM 모델을 통해서 (Batch_Size, 25, 768) 벡터를 제공

In [42]:
def inference_plm(model, sample_dataloader):
    model.eval()

    output_list = []
    for i, batch in enumerate(sample_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
            output_list.append(outputs)
        
        # if i == 10:
        #     break
            
    return output_list

    
plm_vector = inference_plm(PL_Model, test_dataloader)
print("batch size: " , len(plm_vector))
print("one batch shape: " , plm_vector[0].shape)

batch size:  1558
one batch shape:  torch.Size([32, 256])


## 6.4. 이진 분류기 추론
PLM 모델을 통해서 (Batch_Size, 25, 768) 벡터를 입력으로 하여 Classifier 로 추론

In [43]:
from datasets import load_metric

In [44]:
def inference_classifier(model, plm_vector, test_loader):
    model.eval()

    output_list = []
    for i , (batch, reference) in enumerate(zip(plm_vector, test_loader)):
        # batch = batch[0].to(device)

        #print("batch shape: ", batch.shape)
        # batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(batch)
            # print("outputs: ", outputs)
            predictions = torch.argmax(outputs, dim=-1)
            # print("batch: \n", batch)      
            # print("Ground Truth : \n", reference['labels'])            
            # print("Predictions: ", predictions)
            metric.add_batch(predictions=predictions, references=reference["labels"])
            
            output_list.append(outputs)

        
    print(metric.compute())        
                
    return output_list

output_list = inference_classifier(classifier_01, plm_vector, test_dataloader)

{'accuracy': 0.6574891635896613}


# 7. 두번째 모델 

## 7.1. plm plus custom classifier 로 두번째 모델 생성

In [45]:
custom_model =CustomBERTModel(model = plm ,num_labels=2).to(device)

In [46]:
show_trainable_layer(custom_model)

classifier.1.weight
classifier.1.bias
classifier.3.weight
classifier.3.bias


## 7.2. 모델 훈련

In [47]:
num_epochs = 2

optimizer, lr_scheduler, num_training_steps = create_optimizer_scheduler(num_epochs, custom_model, train_dataloader)

14956




In [48]:
progress_bar_train = tqdm(range(num_training_steps))
progress_bar_eval = tqdm(range(num_epochs * len(eval_dataloader)))

custom_model_02 = train_loop(num_epochs, custom_model, train_dataloader, progress_bar_train, \
               eval_dataloader, progress_bar_eval, metric, optimizer, lr_scheduler)      

  0%|          | 0/14956 [00:00<?, ?it/s]

  0%|          | 0/1870 [00:00<?, ?it/s]

{'accuracy': 0.663501721774598}
{'accuracy': 0.6717261208251145}


## 7.3. 테스트 데이터로 평가

In [49]:
evaL_model(custom_model_02, test_dataloader, metric)    

{'accuracy': 0.6708139348209986}


## 7.4. 두번째 모델에서 Classifier 추출

In [50]:
classifier_02 = ClassifierModel(base_model = custom_model_02 ,num_labels=2).to(device)
show_module(classifier_02)

name : classifier


## 7.6. 이진 분류기로 추론

In [51]:
output_list = inference_classifier(classifier_02, plm_vector, test_dataloader)

{'accuracy': 0.6708139348209986}


# 8. 첫번째, 두번째의 모델을 한개의 모델로 통합
- Classifier_01 , Classifier_02 를 한개의 모델 안으로 포함 시킴 (Combine_Classifier_02)

In [52]:
class CombineClassifier(nn.Module):
    def __init__(self, base_classifier, add_classifier): 
        super(CombineClassifier,self).__init__() 

        # self.dropout = nn.Dropout(0.1)     
        
        self.base_classifier = base_classifier
        self.add_classifier = add_classifier

    def forward(self, cls_vector=None, labels=None):

        #Add custom layers
        #print("cls_vector shape: ", cls_vector.shape)
        x = cls_vector
        base_logits = self.base_classifier(x) 
        add_logits = self.add_classifier(x)


        return base_logits, add_logits


Combine_Classifier_02 = CombineClassifier(base_classifier = classifier_01 ,add_classifier = classifier_02).to(device)
show_module(Combine_Classifier_02)

name : base_classifier
name : add_classifier


In [53]:
print(Combine_Classifier_02)

CombineClassifier(
  (base_classifier): ClassifierModel(
    (classifier): Sequential(
      (0): Dropout(p=0.1, inplace=False)
      (1): Linear(in_features=256, out_features=128, bias=True)
      (2): Dropout(p=0.1, inplace=False)
      (3): Linear(in_features=128, out_features=2, bias=True)
    )
  )
  (add_classifier): ClassifierModel(
    (classifier): Sequential(
      (0): Dropout(p=0.1, inplace=False)
      (1): Linear(in_features=256, out_features=128, bias=True)
      (2): Dropout(p=0.1, inplace=False)
      (3): Linear(in_features=128, out_features=2, bias=True)
    )
  )
)


복수개의 Classifier 가 있는 Combine_Classifier 모델의 추론을 및 평가를 하는 함수 정의

In [54]:
def inference_classifier2(model, plm_vector, test_dataloader, test_batch_size, verbose=False):
    def get_depth(l):
        if isinstance(l, list):
            return 1 + max(get_depth(item) for item in l)
        elif isinstance(l, tuple):
            return 1 + max(get_depth(item) for item in l)

        else:
            return 0

    def unflatten_tuple(t, depth):
        e_list = []
        while True:
            if depth ==0:
                e_list.append(x)
                break
            x, y = t
            e_list.append(y)

            t = x
            #print("x: ", x)

            depth -= 1

        e_list.reverse()

        return e_list

    def get_num_model():
        model.eval()

        test_batch_num = len(test_dataloader)
        total_correct, correct = 0 , 0

        output_list = []
        for batch, reference in zip(plm_vector, test_dataloader):
            # print(reference['labels'])
            batch = batch[0].to(device)
            # batch = {k: v.to(device) for k, v in batch.items()}
            with torch.no_grad():
                probs = model(batch)
                # print("outputs: ", probs)   
                depth = get_depth(probs)    
                probs_list = unflatten_tuple(probs, depth)            
            break
        return len(probs_list), depth
    
    def eval_model():
        #############################
        # 정확도 계산 위한 변수 정의
        #############################        
        test_batch_num = len(test_dataloader) # 총 배치 숫자
        num_models , depth = get_num_model() # 총 모델안의 분류기 수
        print("# of Moddels: ", num_models)
        
        total_correct = np.zeros((num_models,1)) # 통계를 내기 위해 각 모델마다 할당
        
        correct = 0 
        output_list = []

        model.eval()

        for batch, reference in zip(plm_vector, test_dataloader):
            # print(reference['labels'])
            # batch = batch[0].to(device)
            # batch = {k: v.to(device) for k, v in batch.items()}
            with torch.no_grad():
                probs = model(batch)
                # print("outputs: ", probs)   
                #depth = get_depth(probs)    
                probs_list = unflatten_tuple(probs, depth)            
                # print("probs_list: ", probs_list)

                ground_truth = reference["labels"].to(device)
                
                if verbose:
                    print("Ground_Truth: \n", ground_truth, "\n")            
                # print("outputs: ", outputs.shape)   

                # 각 모델 마다 correct 수를 구함.
                for i, pred in enumerate(probs_list):

                    correct += (pred.argmax(1) == ground_truth).type(torch.float).sum().item()
                    total_correct[i] +=correct                
                    correct /= test_batch_size

                    correct = 0

                    if verbose:
                        print(f"From model_0{i+1} - Predicted Label:")                
                        print(pred.argmax(1))

                        print(f"From model_0{i+1} Accuracy: {(100*correct):>0.2f}% \n")


        # 전체 배치에 대한 모델 마다 정확도 구함.
        num_total_payload = test_batch_num * test_batch_size                
        for i in range(num_models):
            total_correct[i] /= num_total_payload    
            #print(" total_correct[i] : ",         total_correct[i])
            print(f"From model_0{i+1} Accuracy: {(100*total_correct[i][0]):>0.2f}% \n")    
    
    
    eval_model()
    

In [55]:
output_list = inference_classifier2(Combine_Classifier_02, plm_vector, test_dataloader, test_batch_size)

# of Moddels:  2
From model_01 Accuracy: 65.72% 

From model_02 Accuracy: 67.05% 



# 9. 세번째 모델 생성 및 통합 추론

## 9.1. 세번째 모델 생성

In [56]:
num_epochs = 2
# 모델 훈련
custom_model =CustomBERTModel(model = plm ,num_labels=2).to(device)
optimizer, lr_scheduler, num_training_steps = create_optimizer_scheduler(num_epochs, custom_model, train_dataloader)
progress_bar_train = tqdm(range(num_training_steps))
progress_bar_eval = tqdm(range(num_epochs * len(eval_dataloader)))


def create_bert_model(num_epochs, custom_model):
    print("eval dataset accuracy in training loop")
    custom_model = train_loop(num_epochs, custom_model, train_dataloader, progress_bar_train, \
                   eval_dataloader, progress_bar_eval, metric, optimizer, lr_scheduler)      

    # 모델 평가
    print("inference accuracy for plm plus classifer")    
    evaL_model(custom_model, test_dataloader, metric)    

    # Classifier 추출
    classifier = ClassifierModel(base_model = custom_model ,num_labels=2).to(device)

    # Classifier 모델 구조 확인
    print("classifier architecture: ")        
    show_module(classifier)

    # plm vector 추출
    plm_vector = inference_plm(PL_Model, test_dataloader)
    # print("batch size: " , len(plm_vector))
    # print("one batch shape: " , plm_vector[0][0].shape)

    # Classifier 로 추론
    print("inference accuracy for classifer")
    output_list = inference_classifier(classifier, plm_vector, test_dataloader)
    
    return classifier

    
classifier_03 = create_bert_model(num_epochs, custom_model)


14956




  0%|          | 0/14956 [00:00<?, ?it/s]

  0%|          | 0/1870 [00:00<?, ?it/s]

eval dataset accuracy in training loop
{'accuracy': 0.6650061850155461}
{'accuracy': 0.6750359399552004}
inference accuracy for plm plus classifer
{'accuracy': 0.6743257344678119}
classifier architecture: 
name : classifier
inference accuracy for classifer
{'accuracy': 0.6743257344678119}


## 9.2. 세 번째 모델(classifier_03) 을 기존의 모델 (classifier_01, classifier_02) 로 병합

In [57]:
Combine_Classifier_03 = CombineClassifier(base_classifier=Combine_Classifier_02 ,
                                          add_classifier=classifier_03).to(device)
show_module(Combine_Classifier_03)

name : base_classifier
name : add_classifier


In [58]:
print(Combine_Classifier_03)

CombineClassifier(
  (base_classifier): CombineClassifier(
    (base_classifier): ClassifierModel(
      (classifier): Sequential(
        (0): Dropout(p=0.1, inplace=False)
        (1): Linear(in_features=256, out_features=128, bias=True)
        (2): Dropout(p=0.1, inplace=False)
        (3): Linear(in_features=128, out_features=2, bias=True)
      )
    )
    (add_classifier): ClassifierModel(
      (classifier): Sequential(
        (0): Dropout(p=0.1, inplace=False)
        (1): Linear(in_features=256, out_features=128, bias=True)
        (2): Dropout(p=0.1, inplace=False)
        (3): Linear(in_features=128, out_features=2, bias=True)
      )
    )
  )
  (add_classifier): ClassifierModel(
    (classifier): Sequential(
      (0): Dropout(p=0.1, inplace=False)
      (1): Linear(in_features=256, out_features=128, bias=True)
      (2): Dropout(p=0.1, inplace=False)
      (3): Linear(in_features=128, out_features=2, bias=True)
    )
  )
)


## 9.3. 통합 모델 (classifier_01, classifier_02, classifier_03) 을 추론

In [59]:
output_list = inference_classifier2(Combine_Classifier_03, plm_vector, test_dataloader, test_batch_size, verbose=False)

# of Moddels:  3
From model_01 Accuracy: 65.72% 

From model_02 Accuracy: 67.05% 

From model_03 Accuracy: 67.40% 



# 10. 4번째 모델 생성 및 통합 모델 (classifier_01, classifier_02, classifier_03, classifier_04) 을 추론

In [60]:
print("(1) Create Bert MOdel (plm + classifier)")

num_epochs = 4
custom_model =CustomBERTModel(model = plm ,num_labels=2).to(device)
optimizer, lr_scheduler, num_training_steps = create_optimizer_scheduler(num_epochs, custom_model, train_dataloader)
progress_bar_train = tqdm(range(num_training_steps))
progress_bar_eval = tqdm(range(num_epochs * len(eval_dataloader)))
classifier_04 = create_bert_model(num_epochs, custom_model)

print("\n(2) Create a group of four classifiers")
Combine_Classifier_04 = CombineClassifier(base_classifier=Combine_Classifier_03 ,
                                          add_classifier=classifier_04).to(device)
print("\n(3) Look at the architecture")
show_module(Combine_Classifier_04)
print(Combine_Classifier_04)
print("\n(4) Inference the group of 4 classifier")
output_list = inference_classifier2(Combine_Classifier_04, plm_vector, test_dataloader, test_batch_size, verbose=False)

(1) Create Bert MOdel (plm + classifier)
29912




  0%|          | 0/29912 [00:00<?, ?it/s]

  0%|          | 0/3740 [00:00<?, ?it/s]

eval dataset accuracy in training loop
{'accuracy': 0.6545418073618401}
{'accuracy': 0.6624653137641671}
{'accuracy': 0.6824245260940791}
{'accuracy': 0.6810203604025274}
inference accuracy for plm plus classifer
{'accuracy': 0.6813493337614385}
classifier architecture: 
name : classifier
inference accuracy for classifer
{'accuracy': 0.6813493337614385}

(2) Create a group of four classifiers

(3) Look at the architecture
name : base_classifier
name : add_classifier
CombineClassifier(
  (base_classifier): CombineClassifier(
    (base_classifier): CombineClassifier(
      (base_classifier): ClassifierModel(
        (classifier): Sequential(
          (0): Dropout(p=0.1, inplace=False)
          (1): Linear(in_features=256, out_features=128, bias=True)
          (2): Dropout(p=0.1, inplace=False)
          (3): Linear(in_features=128, out_features=2, bias=True)
        )
      )
      (add_classifier): ClassifierModel(
        (classifier): Sequential(
          (0): Dropout(p=0.1, inpla

# E. 커널 리스타팅

In [61]:
import IPython

IPython.Application.instance().kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}