## [0] Import

---------------------

In [1]:
# !pip install transformers datasets

In [1]:
import pandas as pd
import datetime
import time
# Model Save & Load 
import os
# GPU Reset
from numba import cuda 

import torch
import torch.nn as nn
import torch.optim as optim

from torch import functional as F
# from torchsummary import summary

# 모델, Tokenizer Load
from transformers import AutoModel, AutoTokenizer, LEDModel

# 데이터셋 Load from Summarize_from_feedvback, Huggingface 
from datasets import load_dataset


print("This code is written at " + str(datetime.datetime.now()))

This code is written at 2023-06-10 11:35:35.356879


#### GPU Reset & Setting device

In [2]:
def GPU_reset():
    device = cuda.get_current_device()
    device.reset
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(device)
    
    return device

In [3]:
device = GPU_reset()

cuda


In [4]:
!nvidia-smi

Sat Jun 10 11:35:35 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.105.01   Driver Version: 515.105.01   CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:1D:00.0 Off |                  N/A |
| 47%   51C    P2    95W / 350W |    505MiB / 24576MiB |      3%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## [*] Hyper Parameter Setting

-------------------

In [16]:
BATCH_SIZE = 2
EPOCH = 1
LEARNING_RATE = 1e-5

## [1] Tokenizer
- Longformer의 Tokenizer 정의
- Global Attention Mask 함수 정의

-----------------------------------

In [6]:
tokenizer = AutoTokenizer.from_pretrained("allenai/led-large-16384-arxiv")

In [7]:
def generate_global_attention_mask(tokenizer, input_ids):
    mask = torch.torch.zeros_like(input_ids)
    mask[((input_ids == tokenizer.bos_token_id) | (input_ids == tokenizer.eos_token_id)).nonzero(as_tuple=True)] = 1
    return mask

## [2] DataLoad
- 데이터 Load 
- 데이터 전처리
- 데이터 Loader

---------------------------------

### [2.1] Data Preprocessing

- Human Feedback 데이터 Dictionary를 Dataframe으로 변환
- 알맞은 문장 추출
- DataLoader 생성 

#### Text DataFrame 생성

In [8]:
'''
class Data_Preprocessing
    - HuggingFace의 Summarize from feedback 전용 데이터 전처리 Class 
    - Train 데이터와 Validation 데이터 출력 
'''

class Data_Preprocessing():
    def __init__(self):
        # DownLoad Data from huggingFace
        self.data_feedback = load_dataset("openai/summarize_from_feedback", 'comparisons')
        
        # Split into Train and Validation dataset
        # Convert to DataFrame
        self.df_train = pd.DataFrame(self.data_feedback['train'])
        self.df_valid = pd.DataFrame(self.data_feedback['validation'])
    
    
    # Original Text + 긍정적 Human Feedback을 받은 Text 데이터 Columm 추출 
    # Original Text + 부정적 Human Feedback을 받은 Text 데이터 Column 추출
    def Data_cleaning(self, df_feedback):
        df_feedback['original_text'] =[row['post'] for row in df_feedback['info']]
        df_feedback['text_0'] = [row[0]['text'] for row in df_feedback['summaries']]
        df_feedback['text_1'] = [row[1]['text'] for row in df_feedback['summaries']]
        df_feedback['sum_good_text'] = df_feedback.apply(lambda row: row['text_0'] if row['choice'] == 0 else row['text_1'], axis=1)
        df_feedback['sum_bad_text'] = df_feedback.apply(lambda row: row['text_1'] if row['choice'] == 0 else row['text_0'], axis=1)
        
        df_feedback['original_with_good_sum'] = df_feedback['original_text'] + df_feedback['sum_good_text']
        df_feedback['original_with_bad_sum'] = df_feedback['original_text'] + df_feedback['sum_bad_text']
        
        return df_feedback[['original_with_good_sum', 'original_with_bad_sum']]
    
    # 최종 DataFrame 출력 
    def data_complete_form(self):
        df_train = self.Data_cleaning(self.df_train)
        df_valid = self.Data_cleaning(self.df_valid)
        
        return df_train, df_valid

# 실행 코드
df_train, df_valid = Data_Preprocessing().data_complete_form()

Found cached dataset summarize_from_feedback (/root/.cache/huggingface/datasets/openai___summarize_from_feedback/comparisons/0.0.0/483f970ceb55b926b0a087ef4f678ab1b089bc8174a107a452c6152e88af7ff0)


  0%|          | 0/2 [00:00<?, ?it/s]

### [2.2] DataLoader
- 원본 Text와 Summarize가 합쳐진 데이터 형식의 DataFrame을 DataLoader로 처리
- 입력: DataFrame <br>
- Feature : original_with_good_sum,   original_with_bad_sum  </br>
- 내용: 원본 텍스트 + 긍정 Summary, 원본 텍스트 + 부정 Summary

In [9]:
class Reward_Dataset(torch.utils.data.Dataset):

    def __init__(self, df_text): #, transforms_=None, random_masking = False,  unaligned=True ):
        
        self.sum_good_text = df_text['original_with_good_sum']
        self.sum_bad_text = df_text['original_with_bad_sum']

        print(f"My_dataset __init__ received : {self.sum_good_text.shape}, {self.sum_bad_text.shape} ")
        print(f"Data Type : {type(self.sum_good_text[0])}, {type(self.sum_bad_text[0])}")
        # print(f"Data example : {self.sum_good_text[0]}, {self.sum_bad_text[0]}")

    def __getitem__(self, index):
        sum_good_text = self.sum_good_text[index]
        sum_bad_text = self.sum_bad_text[index]

        return sum_good_text, sum_bad_text

    def __len__(self):
        return len(self.sum_good_text)


In [10]:
print('====================================================================')
print('')
print("TRAIN LOADER")
train_loader = torch.utils.data.DataLoader(Reward_Dataset(df_train), batch_size=BATCH_SIZE, shuffle=False, drop_last = False)
print('')
print("====================================================================")
print('')
print("VALID LOADER")
valid_loader = torch.utils.data.DataLoader(Reward_Dataset(df_valid), batch_size=BATCH_SIZE, shuffle=False, drop_last = False)
print('')
print("====================================================================")

# test_loader = torch.utils.data.DataLoader(Reward_Dataset(df_feedback), batch_size=batch_size, shuffle=False, drop_last = False)


TRAIN LOADER
My_dataset __init__ received : (92858,), (92858,) 
Data Type : <class 'str'>, <class 'str'>


VALID LOADER
My_dataset __init__ received : (86086,), (86086,) 
Data Type : <class 'str'>, <class 'str'>



## [3] Reward Model
- Reward model : LED encdoer + Linear Layer
- Input : (token, global_attention_mask) type: tensor
-------------------------

In [11]:
class RewardModel(nn.Module):
    def __init__(self, model="allenai/led-large-16384-arxiv", head_layer_size=32):
        super(RewardModel, self).__init__()
        self.led_encoder = LEDModel.from_pretrained(model).get_encoder()
        self._encoder_output_size = self.led_encoder.layernorm_embedding.weight.shape[0]
        self.head = nn.Sequential(
            nn.Linear(self._encoder_output_size, head_layer_size, bias=False),
            nn.ReLU(),
            nn.Linear(head_layer_size, 1, bias=False)
        )

    def forward(self, input_ids, global_attention_mask):
        hidden_state = self.led_encoder(input_ids, global_attention_mask=global_attention_mask).last_hidden_state
        output = hidden_state.view(hidden_state.size(0), -1, hidden_state.size(-1))[:, -1, :]
        output = self.head(output)
        return output.squeeze()

In [12]:
reward_model= RewardModel()
reward_model.to(device)

Some weights of the model checkpoint at allenai/led-large-16384-arxiv were not used when initializing LEDModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing LEDModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LEDModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


RewardModel(
  (led_encoder): LEDEncoder(
    (embed_tokens): Embedding(50265, 1024, padding_idx=1)
    (embed_positions): LEDLearnedPositionalEmbedding(16384, 1024)
    (layers): ModuleList(
      (0): LEDEncoderLayer(
        (self_attn): LEDEncoderAttention(
          (longformer_self_attn): LEDEncoderSelfAttention(
            (query): Linear(in_features=1024, out_features=1024, bias=True)
            (key): Linear(in_features=1024, out_features=1024, bias=True)
            (value): Linear(in_features=1024, out_features=1024, bias=True)
            (query_global): Linear(in_features=1024, out_features=1024, bias=True)
            (key_global): Linear(in_features=1024, out_features=1024, bias=True)
            (value_global): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (output): Linear(in_features=1024, out_features=1024, bias=True)
        )
        (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (activation_fn

## [4] Reward model Training
- Input
  - Original + Summary_target
  - Original + Summary_predict 

- Output
  - Scalar

- Loss 
  - Log Sigmoid


In [13]:
class Reward_Trainer:
    def __init__(self, _epoch= EPOCH, _reward_model = reward_model, _tokenizer = tokenizer, _lr = LEARNING_RATE, _train_loader = train_loader, _valid_loader = valid_loader):
        
        # Training 관련 
        self.reward_model = _reward_model
        self.tokenizer = _tokenizer 
        
        self.epoch = _epoch
        
        self.optimizer = optim.AdamW(self.reward_model.parameters(), lr=_lr)
        
        self.train_loader = _train_loader
        self.valid_loader = _valid_loader
    
    # Loss 정의 (Log Sigmoid)
    def loss(self, good_reward, bad_reward):
        logsig = nn.LogSigmoid()
        return -logsig(good_reward - bad_reward)
    
    
    # 모델 저장 
    def save_model_info(self, _model, _optimizer, _mean_loss = [], _valid_loss= [], _version="ver_1"):
        if not os.path.isdir("./reward_model"):
            os.makedirs("./reward_model")
        # 모델 정보 저장
        torch.save({'model_state_dict': _model.state_dict(),
                    'optimizer_state_dict': _optimizer.state_dict(),
                    'record_list' : {'mean_loss': _mean_loss, 'valid_loss': _valid_loss},
                    }, f"./reward_model/reward_model_{_version}.pth")  #reward_model_ver_1 

        print(f"model_saved : reward_model_{_version}")
    
    
    
    ''' ======================================= 매   우   중   요 ============================================================='''
    # 모델 Training
    def train(self):
        
        ## 초기화 
        train_loss_list = []
        valid_loss_list = []
        
        record_train_loss = []
        record_valid_loss = []
        
        # Optimizer & Loss function 
        optimizer = self.optimizer
        
        # Data Loader 
        train_loader = self.train_loader
        valid_loader= self.valid_loader 
        
        # 모델 정의
        model = self.reward_model
        tokenizer = self.tokenizer
        
        # Hyper Parameter
        epoch = self.epoch
        
        for i in range(epoch):
            start_time = time.time()
        
            for index, (good_sum, bad_sum) in enumerate(train_loader):
                
                good_token = tokenizer.batch_encode_plus(good_sum, padding=True, return_tensors='pt').input_ids
                bad_token = tokenizer.batch_encode_plus(bad_sum, padding=True, return_tensors='pt').input_ids

                good_attention_mask = generate_global_attention_mask(tokenizer, good_token)
                bad_attention_mask = generate_global_attention_mask(tokenizer, bad_token)

                good_token = good_token.to(device)
                bad_token = bad_token.to(device)

                good_attention_mask= good_attention_mask.to(device)
                bad_attention_mask = bad_attention_mask.to(device)

                good_reward = model(input_ids = good_token, global_attention_mask = good_attention_mask)
                bad_reward = model(input_ids = bad_token, global_attention_mask = bad_attention_mask)

                # Log Sigmoid
                loss = self.loss(good_reward, bad_reward)

                optimizer.zero_grad()

                loss.mean().backward()

                optimizer.step()

                end_time = time.time()
                train_loss_list.append(loss.mean().item())
                
                if (index+1)%500 == 0:
                    # Validation loss 계산
                    valid_loss_list = []
                    
                    ##### 이거 validatoin 끝나고 model.train() 있는지 꼭꼭 확인 ##### 
                    model.eval() 
                    
                    with torch.no_grad():
                        for valid_index, (valid_good_sum, valid_bad_sum) in enumerate(valid_loader):
                            good_token = tokenizer.batch_encode_plus(valid_good_sum, padding=True, return_tensors='pt').input_ids
                            bad_token = tokenizer.batch_encode_plus(valid_bad_sum, padding=True, return_tensors='pt').input_ids

                            good_attention_mask = generate_global_attention_mask(tokenizer, good_token)
                            bad_attention_mask = generate_global_attention_mask(tokenizer, bad_token)

                            good_token = good_token.to(device)
                            bad_token = bad_token.to(device)
                            good_attention_mask= good_attention_mask.to(device)
                            bad_attention_mask = bad_attention_mask.to(device)

                            good_reward = model(input_ids = good_token, global_attention_mask = good_attention_mask)
                            bad_reward = model(input_ids = bad_token, global_attention_mask = bad_attention_mask)
                            
                            valid_loss = self.loss(good_reward, bad_reward)
                            valid_loss_list.append(valid_loss.mean())
                            
                            if (valid_index+1)%30 == 0:
                                break;
                            
                    model.train()
                    
                    train_loss_mean = sum(train_loss_list) / len(train_loss_list)
                    valid_loss_mean = sum(valid_loss_list) / len(valid_loss_list) 
                    
                    print("==================================================================================")
                    print(f"Batch {(index+1)}  ({((index+1)/len(train_loader))*100 :.3f} %) \t \
                            Train Loss : {train_loss_mean :.4f} \t \
                            Valid Loss : {valid_loss_mean :.4f} \t \
                            Elapsed Time: {(end_time - start_time) :.2f} sec")
                    
                    train_loss_list = []
                    record_train_loss.append(train_loss_mean)
                    record_valid_loss.append(valid_loss_mean)
                    
                if (index+1)%10000 == 0:
                    self.save_model_info(model, optimizer, record_train_loss, record_valid_loss, f"ver_{(index+1)//10000}")
                    
        return model, record_train_loss, record_valid_loss
        
    
    
    

In [14]:
reward_trainer = Reward_Trainer(EPOCH, 
                                reward_model, 
                                tokenizer, 
                                LEARNING_RATE, 
                                train_loader, 
                                valid_loader)


In [15]:
reward_model, record_train_loss, record_valid_loss = reward_trainer.train()

Batch 20  (0.043 %) 	                             Train Loss : 0.6912 	                             Valid Loss : 0.6917 	                             Elapsed Time: 22.18 sec
Batch 40  (0.086 %) 	                             Train Loss : 0.6945 	                             Valid Loss : 0.6913 	                             Elapsed Time: 48.84 sec
Batch 60  (0.129 %) 	                             Train Loss : 0.6886 	                             Valid Loss : 0.6902 	                             Elapsed Time: 75.54 sec
Batch 80  (0.172 %) 	                             Train Loss : 0.6888 	                             Valid Loss : 0.6911 	                             Elapsed Time: 102.26 sec
Batch 100  (0.215 %) 	                             Train Loss : 0.6942 	                             Valid Loss : 0.6909 	                             Elapsed Time: 128.98 sec
Batch 120  (0.258 %) 	                             Train Loss : 0.6945 	                             Valid Loss : 0.6912 	     

KeyboardInterrupt: 

----------------------

## Load Model

In [48]:
def load_model_info(version, device):

    file_path = f"./reward_model/reward_model_{version}.pth"

    if not os.path.exists(file_path):
        print("FATAL ERROR : model path not exist")
    model_info = torch.load(file_path)
    print(f"model_loaded : reward_model_{version}, You can cehck train loss and valid loss in dictionary form")

    model = RewardModel() 
    model.load_state_dict(model_info['model_state_dict'])
    model.to(device)
    model.eval()

    return model

----
