In [None]:
!pip install datasets
!pip install peft
!pip install gdown

In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, DistilBertForSequenceClassification, get_scheduler, BertForSequenceClassification
from peft import get_peft_model, LoraConfig, TaskType
import gdown
import time

In [None]:
url = 'https://drive.google.com/uc?id=12MOGiCveDE8CTvtHKqmEhyJIXc3gEscd'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_name = 'TwoStageDistilBERT_LoRA.pt'
checkpoint = "distilbert/distilbert-base-uncased"
bert_checkpoint = 'skt/kobert-base-v1'

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
#bert_tokenizer = AutoTmokenizer.from_pretrained(bert_checkpoint)

gdown.download(url, model_name, quiet = False)

model_checkpoint = torch.load(model_name, map_location = device)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
checkpoint = "distilbert/distilbert-base-uncased"
bert_checkpoint = 'skt/kobert-base-v1'

tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [None]:
class TwoStageDistilBERT_LoRA(nn.Module):
  def __init__(self, distilbert_checkpoint, num_labels_1stage = 2, num_labels_2stage = 3):
    super(TwoStageDistilBERT_LoRA, self).__init__()


    # 첫 번째 stage
    self.distilbert1 = DistilBertForSequenceClassification.from_pretrained(distilbert_checkpoint,
                                                                           num_labels = num_labels_1stage, ignore_mismatched_sizes = True,
                                                                           output_hidden_states=True)

    lora_config1 = LoraConfig(task_type = TaskType.SEQ_CLS, r = 8, lora_alpha = 32, target_modules = ['q_lin', 'v_lin'], lora_dropout = 0.1 )
    self.distilbert1 = get_peft_model(self.distilbert1, lora_config1)

    # 두 번째 stage
    self.distilbert2 = DistilBertForSequenceClassification.from_pretrained(distilbert_checkpoint,
                                                                           num_labels = num_labels_2stage, ignore_mismatched_sizes = True)

    lora_config2 = LoraConfig(task_type = TaskType.SEQ_CLS, r = 8, lora_alpha = 32, target_modules = ['q_lin', 'v_lin'], lora_dropout = 0.1 )
    self.distilbert1 = get_peft_model(self.distilbert1, lora_config2)


  def forward(self, input_ids,  attention_mask, labels1 = None, labels2 = None):
    output1 = self.distilbert1(input_ids = input_ids, attention_mask = attention_mask, labels = labels1)
    hidden1 = output1.hidden_states[-1] # 마지막 레이어의 hidden state 가져오기
    logits1 = output1.logits

    pred1 = torch.argmax(logits1, dim = 1)

    output2 = self.distilbert2(inputs_embeds = hidden1, attention_mask = attention_mask, labels = labels2)
    logits2 = output2.logits

    total_loss = output1.loss + output2.loss


    return total_loss, logits1, logits2

In [None]:
def load_checkpoint(model, model_checkpoint):
  model.load_state_dict(model_checkpoint['model_state_dict'])

  print(f"Checkpoint loaded!")
  return model


model = TwoStageDistilBERT_LoRA(distilbert_checkpoint = checkpoint)

model = load_checkpoint(model, model_checkpoint)

In [None]:
text = ['서울대 캠퍼스 입학 교육 대학 교수 학생 공지 연구 대학원 서울대학교 지원 도서관 서비스 미디어 월 행정 캘린더 센터 학사 뉴스 프로그램 학습 수 제 인스타그램 성과 구지원 학술 사항 안내 생활 관악 소식 소개 기념 역사 맵 가을 일 년 단 부문 영상 모습 회 중앙 예술 메뉴 일반']

In [None]:
# https://seungseop.tistory.com/41

def model_inference(model, tokenizer, text):
  model.eval()
  model = model.to(device)

  start_event = torch.cuda.Event(enable_timing = True)
  end_event = torch.cuda.Event(enable_timing = True)

  with torch.no_grad():


    # 텍스트를 토큰화
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)

    start_event.record()

    #입력에 대한 추론 (추론에서는 gradient 필요없음)
    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)

    # 1단계 모델에 입력
    output1 = model.distilbert1(input_ids=input_ids, attention_mask=attention_mask)
    logits1 = output1.logits

    hidden1 = output1.hidden_states[-1]  # 마지막 레이어의 hidden state
    output2 = model.distilbert2(inputs_embeds=hidden1, attention_mask=attention_mask)

    end_event.record()

  torch.cuda.synchronize()

  time_taken = start_event.elapsed_time(end_event)
  return time_taken

In [None]:
distilbert_inference = model_inference(model, tokenizer, text)
print(f"Elapsed time on GPU: {distilbert_inference} mile seconds")

In [None]:
for name, module in BertForSequenceClassification.from_pretrained(bert_checkpoint,
                                                                           num_labels = 2, ignore_mismatched_sizes = True,
                                                                           output_hidden_states=True).named_modules():
  print(name)

In [None]:
class TwoStageBERT_LoRA(nn.Module):
  def __init__(self, bert_checkpoint, num_labels_1stage = 2, num_labels_2stage = 3):
    super(TwoStageBERT_LoRA, self).__init__()


    # 첫 번째 stage
    self.distilbert1 = BertForSequenceClassification.from_pretrained(bert_checkpoint,
                                                                           num_labels = num_labels_1stage, ignore_mismatched_sizes = True,
                                                                           output_hidden_states=True)

    lora_config1 = LoraConfig(task_type = TaskType.SEQ_CLS, r = 8, lora_alpha = 32, target_modules = ['query', 'value'], lora_dropout = 0.1 )
    self.distilbert1 = get_peft_model(self.distilbert1, lora_config1)

    # 두 번째 stage
    self.distilbert2 = BertForSequenceClassification.from_pretrained(bert_checkpoint,
                                                                           num_labels = num_labels_2stage, ignore_mismatched_sizes = True)

    lora_config2 = LoraConfig(task_type = TaskType.SEQ_CLS, r = 8, lora_alpha = 32, target_modules = ['query', 'value'], lora_dropout = 0.1 )
    self.distilbert1 = get_peft_model(self.distilbert1, lora_config2)


  def forward(self, input_ids,  attention_mask, labels1 = None, labels2 = None):
    output1 = self.distilbert1(input_ids = input_ids, attention_mask = attention_mask, labels = labels1)
    hidden1 = output1.hidden_states[-1] # 마지막 레이어의 hidden state 가져오기
    logits1 = output1.logits

    pred1 = torch.argmax(logits1, dim = 1)

    output2 = self.distilbert2(inputs_embeds = hidden1, attention_mask = attention_mask, labels = labels2)
    logits2 = output2.logits

    total_loss = output1.loss + output2.loss


    return total_loss, logits1, logits2

In [None]:
bert_model = TwoStageBERT_LoRA(bert_checkpoint)

In [None]:
bert_checkpoint = 'skt/kobert-base-v1'

bert_tokenizer = AutoTokenizer.from_pretrained(bert_checkpoint)


bert_inference = model_inference(bert_model, bert_tokenizer, text)
print(f"Elapsed time on GPU: {bert_inference } mile seconds")

In [None]:
print(f'DistilBERT: {distilbert_inference:.3f}ms, BERT: {bert_inference:.3f}ms')
print(f'DistilBERT model is {bert_inference /distilbert_inference:.1f} times faster')

In [None]:
a = []
b = []
for i in range(100):
  bert_inference = model_inference(bert_model, bert_tokenizer, text)
  distilbert_inference = model_inference(model, tokenizer, text)
  a.append(bert_inference)
  b.append(distilbert_inference)
  print(f'DistilBERT: {distilbert_inference:.3f}ms, BERT: {bert_inference:.3f}ms')
  print(f'DistilBERT model is {bert_inference /distilbert_inference:.1f} times faster')

In [None]:
print(sum(b) / 10)
print(sum(a) / 10)

print(f'{sum(a) / sum(b):.3f}')
print(f'{(sum(a) / sum(b) - 1.0) * 100 :.1f}')
print(f'{(sum(b) / sum(a)) * 100 :.1f}')
