# Deploy a Fine-tuned Korean ReRanker Model on AWS
 - 학습된 모델을 SageMaker를 이용하여 배포합니다.

## AutoReload

In [2]:
%load_ext autoreload
%autoreload 2

## 1. Inference by local

In [3]:
import os
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer

In [4]:
model_dir = "./model"
model_data_uri = "s3://sagemaker-us-east-1-419974056037/fine-tune-reranker-kr/training/model-output/huggingface-pytorch-training-2023-12-21-07-22-38-356/output/model.tar.gz"
#model_data_uri = "s3://sagemaker-us-east-1-419974056037/fine-tune-reranker-kr/training/checkpoints/g3_b1_gas32/checkpoint-10000/"
if "model.tar.gz" in model_data_uri:
    model_data_uri = model_data_uri.replace("model.tar.gz", "")
model_tar_data = os.path.join(model_dir, "model.tar.gz")
model_data = os.path.join(model_dir, "model_data")

### 1.1 Model source 선택
- `hf`: 허깅페이스에 등록된 모델
- `train`: SageMaker를 통해 학습한 모델
- `train_ckpt`: SageMaker를 통해 학습하는 과정에서 발생되는 checkpoint 모델

In [5]:
model_source = "train" #["hf", "train", "train_ckpt"]

In [6]:
if model_source != "hf":
    !rm -rf $model_dir
    !mkdir $model_dir
    !aws s3 sync $model_data_uri $model_dir
    if model_source == "train":
        !mkdir $model_data
        !tar -xvf $model_tar_data -C $model_data

download: s3://sagemaker-us-east-1-419974056037/fine-tune-reranker-kr/training/model-output/huggingface-pytorch-training-2023-12-21-07-22-38-356/output/model.tar.gz to model/model.tar.gz
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.creationtime'
training_args.bin
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.creationtime'
tokenizer_config.json
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.creationtime'
special_tokens_map.json
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.creationtime'
pytorch_model.bin
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.creationtime'
checkpoint-11000/
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.creationtime'
checkpoint-11000/rng_state_1.pth
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.creationtime'
checkpoint-11000/scheduler.pt
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.creationtime'
checkpoint-11000/rng_state_0.pth
tar: Ignoring unknown extended header key

In [7]:
if model_source == "hf": model_path = "Dongjin-kr/ko-reranker" #model_path = "BAAI/bge-reranker-large"
elif model_source == "train": model_path = model_data
elif model_source == "train_ckpt": model_path = model_dir

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

model.eval()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


XLMRobertaForSequenceClassification(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=1024, out_fe

In [8]:
def exp_normalize(x):
    b = x.max()
    y = np.exp(x - b)
    return y / y.sum()

* [Tip] 스코어는 두 문장이 관련이 있을 수록 1에 가까워 진다

In [9]:
pairs = [["나는 너를 싫어해", "나는 너를 사랑해"],
         ["나는 너를 좋아해", "너에 대한 나의 감정은 사랑 일 수도 있어"]]

with torch.no_grad():
    inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
    scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
    scores = exp_normalize(scores.numpy())
    print (f'first: {scores[0]}, second: {scores[1]}')

first: 0.00018567717052064836, second: 0.9998143315315247


## 2. Inference by cloud

### 2.1 Deploy model on AWS

In [10]:
import json
import boto3
import sagemaker
from sagemaker.huggingface import HuggingFaceModel

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


#### [확인] `0.setup.ipynb`에서 셋팅한 custom container images를 활용합니다. 

In [15]:
# ecr_repository_uri = "<your ecr repo uri>" #"419974056037.dkr.ecr.us-east-1.amazonaws.com/ko-reranker-serve"
ecr_repository_uri = "419974056037.dkr.ecr.us-east-1.amazonaws.com/ko-reranker-serve" #"419974056037.dkr.ecr.us-east-1.amazonaws.com/ko-reranker-serve"

#### Create model_data for deploy
> 이 단계는 약 3분 소요 됩니다.

In [16]:
#model_data_path = "s3://sagemaker-us-east-1-419974056037/fine-tune-reranker-kr/training/checkpoints/g3_b2_gas8/checkpoint-15000/"
model_data_path = "s3://sagemaker-us-east-1-419974056037/fine-tune-reranker-kr/training/model-output/huggingface-pytorch-training-2023-12-21-01-06-58-832/output/model.tar.gz"

if "model.tar.gz" not in model_data_path:
    model_tmp_dir = "./model"
    model_data_tmp_uri = "s3://sagemaker-us-east-1-419974056037/fine-tune-reranker-kr/training/model_data_for_depoly"

    !rm -rf $model_tmp_dir
    !mkdir $model_tmp_dir
    !aws s3 sync $model_data_path $model_tmp_dir
    !tar -zcvf ./model.tar.gz -C $model_tmp_dir .
    !aws s3 cp ./model.tar.gz $model_data_tmp_uri/model.tar.gz
    !rm -rf ./model.tar.gz

    model_data_path = os.path.join(model_data_tmp_uri, "model.tar.gz")

print (f'model_data_path: {model_data_path}')
print (f'ecr_repository_uri = {ecr_repository_uri}')

model_data_path: s3://sagemaker-us-east-1-419974056037/fine-tune-reranker-kr/training/model-output/huggingface-pytorch-training-2023-12-21-01-06-58-832/output/model.tar.gz
ecr_repository_uri = 419974056037.dkr.ecr.us-east-1.amazonaws.com/ko-reranker-serve


#### Depoly model on cloud

In [17]:
depoly = True

In [18]:
if depoly:

    print (f'model deloy from : {model_data_path}')

    try:
        role = sagemaker.get_execution_role()
    except ValueError:
        iam = boto3.client('iam')
        role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

    # Hub Model configuration. https://huggingface.co/models
    hub = {
        #'HF_MODEL_ID':'BAAI/bge-reranker-large',
        'HF_MODEL_ID':'Dongjin-kr/ko-reranker',
        'HF_TASK':'text-classification'
    }

    # create Hugging Face Model Class
    huggingface_model = HuggingFaceModel(
        model_data=model_data_path,
        #transformers_version='4.28.1',
        #pytorch_version='2.0.0',
        #py_version='py310',
        #env=hub,
        role=role,
        image_uri=ecr_repository_uri
    )

    # deploy model to SageMaker Inference
    predictor = huggingface_model.deploy(
        initial_instance_count=1,
        instance_type='ml.g5.xlarge',
        wait=True
    )

    print(f'Accept: {predictor.accept}')
    print(f'ContentType: {predictor.content_type}')
    print(f'Endpoint: {predictor.endpoint}')

model deloy from : s3://sagemaker-us-east-1-419974056037/fine-tune-reranker-kr/training/model-output/huggingface-pytorch-training-2023-12-21-01-06-58-832/output/model.tar.gz
----------!

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


Accept: ('application/json',)
ContentType: application/json
Endpoint: ko-reranker-serve-2023-12-27-02-01-53-780


### 2.2 Invocation

In [19]:
runtime_client = boto3.Session().client('sagemaker-runtime')
print (f'runtime_client: {runtime_client}')

runtime_client: <botocore.client.SageMakerRuntime object at 0x7f74a2e921d0>


#### [중요] 아래 endpoint_name 변수를 Depoly model on cloud 의 실행 결과인 Endpoint 값으로 바꾸어 주세요. 

In [21]:
# example: endpoint_name = "ko-reranker-serve-2023-12-27-02-01-53-780"  
endpoint_name = "<Type Endpoint Name>"  
deserializer = "application/json"

In [22]:
payload = json.dumps(
    {
        "inputs": [
            {"text": "나는 너를 싫어해", "text_pair": "나는 너를 사랑해"},
            {"text": "나는 너를 좋아해", "text_pair": "너에 대한 나의 감정은 사랑 일 수도 있어"}
        ]
    }
)

In [23]:
%%time
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Accept=deserializer,
    Body=payload
)
## deserialization
out = json.loads(response['Body'].read().decode()) ## for json
print (f'Response: {out}')

Response: [{'label': 'LABEL_0', 'score': 0.0009399178670719266}, {'label': 'LABEL_0', 'score': 0.9089847803115845}]
CPU times: user 11.5 ms, sys: 641 µs, total: 12.1 ms
Wall time: 2.89 s


## 3. Comparison before/after finetuning

In [24]:
import numpy as np

In [25]:
pairs = [
    ["섬유소 용해제 또는 혈전 용해제의 작용 메커니즘은 무엇입니까?", \
     "Bailliãre의 임상 혈액학. 6 혈전 용해제의 작용 메커니즘. 6 혈전 용해제의 작용 메커니즘 JEFFREY I. WEITZ 피브린은 지혈, 염증 또는 조직 복구 과정에서 형성되는 일시적인 역할을 하며 정상적인 조직 기능 및 구조를 복원하려면 분해되어야 합니다."
    ],
    ["섬유소 용해제 또는 혈전 용해제의 작용 메커니즘은 무엇입니까?", \
     "단백질 분해 효소는 점막의 팽창 감소, 모세 혈관 투과성 감소, 혈전 형성 섬유소 침전물 및 미세 혈전 용해 등 다양한 메커니즘에 의해 염증 과정을 조절합니다.효소는 혈액의 점도 (두께) 를 줄임으로써 혈액 순환을 개선합니다. 브로멜라인, 파파인, 판크레아틴, 트립신, 키모트립신, 루틴과 같은 단백질 분해 효소는 염증 반응의 필수 조절제 및 조절제입니다.이들의 중요한 작용 중에는 대식세포의 '식욕'이 7~10배 증가하고 자연 살해 (NK) 세포의 효능이 7~10배 증가한다는 것입니다."
    ]
]
payload = json.dumps(
    {
        "inputs": [
            {"text": pairs[0][0], "text_pair": pairs[0][1]},
            {"text": pairs[1][0], "text_pair": pairs[1][1]}
        ]
    }
)

with torch.no_grad():
    inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
    scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
    scores = exp_normalize(scores.numpy())
    print(f'original: {scores}')

response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Accept=deserializer,
    Body=payload
)
## deserialization
out = json.loads(response['Body'].read().decode()) ## for json
print (f'Fine-tune: {out}')

original: [0.9096387  0.09036128]
Fine-tune: [{'label': 'LABEL_0', 'score': 0.8999713659286499}, {'label': 'LABEL_0', 'score': 0.3249973654747009}]


## [Optional] Upload to HuggingFace

In [None]:
# from huggingface_hub import login
# login()

In [None]:
# hf_hub_path = "Dongjin-kr/ko-reranker"
# print(hf_hub_path)

In [None]:
# model.push_to_hub(
#     repo_id=hf_hub_path,
#     safe_serialization=False
# )
# tokenizer.push_to_hub(hf_hub_path, legacy_format='False')