# <B> # SageMaker deployment </B>
* Container: codna_python3

## AutoReload

In [None]:
%load_ext autoreload
%autoreload 2

## 0. Install packages

In [2]:
install_needed = False  # should only be True once
# install_needed = False

In [3]:
%%bash
#!/bin/bash

DAEMON_PATH="/etc/docker"
MEMORY_SIZE=10G

FLAG=$(cat $DAEMON_PATH/daemon.json | jq 'has("data-root")')
# echo $FLAG

if [ "$FLAG" == true ]; then
    echo "Already revised"
else
    echo "Add data-root and default-shm-size=$MEMORY_SIZE"
    sudo cp $DAEMON_PATH/daemon.json $DAEMON_PATH/daemon.json.bak
    sudo cat $DAEMON_PATH/daemon.json.bak | jq '. += {"data-root":"/home/ec2-user/SageMaker/.container/docker","default-shm-size":"'$MEMORY_SIZE'"}' | sudo tee $DAEMON_PATH/daemon.json > /dev/null
    sudo service docker restart
    echo "Docker Restart"
fi

Already revised


In [4]:
import sys
import IPython

if install_needed:
    print("installing deps and restarting kernel")
    !{sys.executable} -m pip install -U pip
    !{sys.executable} -m pip install -U smdebug sagemaker-experiments
    !{sys.executable} -m pip install -U sagemaker
    !{sys.executable} -m pip install -U xgboost==1.3.1

    IPython.Application.instance().kernel.do_shutdown(True)

installing deps and restarting kernel
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


## 1. parameter store 설정

In [1]:
import boto3
from utils.ssm import parameter_store

In [2]:
strRegionName=boto3.Session().region_name
pm = parameter_store(strRegionName)
strPrefix = pm.get_params(key="PREFIX")

In [3]:
strBucketName = pm.get_params(key="-".join([strPrefix, "BUCKET"]))
strExecutionRole = pm.get_params(key="-".join([strPrefix, "SAGEMAKER-ROLE-ARN"]))

In [4]:
print (f'strBucketName: {strBucketName}')
print (f'strExecutionRole: {strExecutionRole}')

strBucketName: sagemaker-us-east-1-419974056037
strExecutionRole: arn:aws:iam::419974056037:role/service-role/AmazonSageMaker-ExecutionRole-20221206T163436


## 2. Dataset

In [5]:
import os

In [6]:
strS3DataPath = f"s3://{strBucketName}/dataset" 
strLocalDataPath = os.path.join(os.getcwd(), "data")

## 3.Depoly

### 3.1 Check functions in local mode
[중요] inference.py를 만들어 주어야 함
* model_fn: 학습한 모델 로드
* input_fn: endpoint invocation시 전달 되는 input 처리 하는 함수
* predict_fn: forword propagation, input_fn의 이후 호출 
* output_fn: 유저에게 결과 전달

- 사용자 정의 inference 코드를 정의해서 사용하기 전에, 노트북에서 사전 테스트 및 디버깅을 하고 진행하면 빠르게 추론 개발을 할수 있습니다.
- 디폴트 inference code (input_fn, model_fn, predict_fn, output_fn) 을 사용해도 되지만, 상황에 따라서는 사용자 정의가 필요할 수 있습니다. 디폴트 코드는 아래 링크를 참고 하세요.
    - [Deploy PyTorch Models](https://sagemaker.readthedocs.io/en/stable/frameworks/pytorch/using_pytorch.html#deploy-pytorch-models)
    - [디폴트 inference Code](https://github.com/aws/sagemaker-pytorch-inference-toolkit/blob/master/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py)

### 로컬 모드 수행시, 새로운 로컬모드 수행을 위해서는 이전 사용했던 도커는 반드시 stop 해줘야 한다
* docker ps -a 로 현재 수행중인 contatiner ID 확인 후
* docker stop "<<contatiner ID>>"
* docker container prune -f

* 3.1.1 inference.py 생성

In [26]:
%%writefile source/deploy/inference.py
import io
import os
import csv
import time
import json
import pickle as pkl
import numpy as np
import pandas as pd
from io import BytesIO
import xgboost as xgb
import sagemaker_xgboost_container.encoder as xgb_encoders
from sagemaker.serializers import CSVSerializer
from io import StringIO

#For Gunicorn/Flask xgboost image, we need to ensure input and output encoding match exactly for model monitor (CSV or JSON)
from flask import Response 

NUM_FEATURES = 58
CSV_SERIALIZER = CSVSerializer(content_type='text/csv')

def model_fn(model_dir):
    """
    Deserialize and return fitted model.
    """
    model_file = "xgboost-model"
    model = xgb.Booster()
    model.load_model(os.path.join(model_dir, model_file))
    return model
                     

def input_fn(request_body, request_content_type):
    """
    The SageMaker XGBoost model server receives the request data body and the content type,
    and invokes the `input_fn`.
    Return a DMatrix (an object that can be passed to predict_fn).
    """

    print (f'Input, Content_type: {request_content_type}')
    if request_content_type == "application/x-npy":        
        stream = BytesIO(request_body)
        array = np.frombuffer(stream.getvalue())
        array = array.reshape(int(len(array)/NUM_FEATURES), NUM_FEATURES)
        return xgb.DMatrix(array)
    
    elif request_content_type == "text/csv":
        return xgb_encoders.csv_to_dmatrix(request_body.rstrip("\n"))
    
    elif request_content_type == "text/libsvm":
        return xgb_encoders.libsvm_to_dmatrix(request_body)
    
    else:
        raise ValueError(
            "Content type {} is not supported.".format(request_content_type)
        )

def predict_fn(input_data, model):
    """
    SageMaker XGBoost model server invokes `predict_fn` on the return value of `input_fn`.

    Return a two-dimensional NumPy array (predictions and scores)
    """
    start_time = time.time()
    y_probs = model.predict(input_data)
    print("--- Inference time: %s secs ---" % (time.time() - start_time))    
    y_preds = [1 if e >= 0.5 else 0 for e in y_probs] 
    #return np.vstack((y_preds, y_probs))
    y_probs = np.array(y_probs).reshape(1, -1)
    y_preds = np.array(y_preds).reshape(1, -1)   
    output = np.concatenate([y_probs, y_preds], axis=1)
    
    return output


def output_fn(predictions, content_type="text/csv"):
    """
    After invoking predict_fn, the model server invokes `output_fn`.
    """
    print (f'Output, Content_type: {content_type}')
    
    if content_type == "text/csv":
        outputs = CSV_SERIALIZER.serialize(predictions)
        print (outputs)
        return Response(outputs, mimetype=content_type)

    elif content_type == "application/json":

        outputs = json.dumps({
            'pred': predictions[0][0],
            'prob': predictions[0][1]
        })                
        #return outputs
        return Response(outputs, mimetype=content_type)
    else:
        raise ValueError("Content type {} is not supported.".format(content_type))

Overwriting source/deploy/inference.py


* 3.1.2 param setting

In [27]:
import time
import sagemaker

In [28]:
local_mode = False

if local_mode:
    
    from sagemaker.local import LocalSession
    
    strInstanceType = "local"
    sagemaker_session = LocalSession()
    sagemaker_session.config = {'local': {'local_code': True}}
    strDeployType = "local"
        
else:
    strInstanceType = "ml.m5.2xlarge"
    sagemaker_session = sagemaker.Session()
    strDeployType = "cloud"
    
strS3ModelPath = pm.get_params(key="-".join([strPrefix, "MODEL-PATH"]))
strEndpointName = f"endpoint-{strDeployType}-{strPrefix}-{int(time.time())}"


In [29]:
print (f'strInstanceType: {strInstanceType}')
print (f'sagemaker_session: {sagemaker_session}')
print (f'strS3ModelPath: {strS3ModelPath}')
print (f'strEndpointName: {strEndpointName}')

strInstanceType: ml.m5.2xlarge
sagemaker_session: <sagemaker.session.Session object at 0x7fc70b0c6110>
strS3ModelPath: s3://sagemaker-us-east-1-419974056037/DJ-SM-IMD/training/model-output/DJ-SM-IMD-experiments-0424-04371682311053/output/model.tar.gz
strEndpointName: endpoint-cloud-DJ-SM-IMD-1682313903


* Create model

In [30]:
from sagemaker.xgboost.model import XGBoostModel
from sagemaker.serializers import CSVSerializer, NumpySerializer, JSONSerializer
from sagemaker.deserializers import JSONDeserializer, NumpyDeserializer, CSVDeserializer

In [31]:
xgb_model = XGBoostModel(
    model_data=strS3ModelPath,
    role=strExecutionRole,
    source_dir="./source/deploy",
    entry_point="inference.py",
    framework_version="1.3-1",
    sagemaker_session=sagemaker_session,
)

* Create Endpoint
    * SageMaker SDK는 `deploy(...)` 메소드를 호출 시, `create-endpoint-config`와 `create-endpoint`를 같이 수행합니다. 좀 더 세분화된 파라메터 조정을 원하면 AWS CLI나 boto3 SDK client 활용을 권장 드립니다.

In [32]:
xgb_predictor = xgb_model.deploy(
    endpoint_name=strEndpointName,
    instance_type=strInstanceType, 
    initial_instance_count=1,
    serializer=CSVSerializer('text/csv'), ## 미적용 시 default: application/x-npy, boto3 기반 invocation시 무시
    deserializer=CSVDeserializer(), ## 미적용 시 default: application/x-npy, boto3 기반 invocation시 무시
    wait=True,
    log=True,
)

------!

* inference (based on **sagemaker SDK**)

In [34]:
import pandas as pd

In [35]:
pdTest = pd.read_csv(f'{strLocalDataPath}/test.csv')
pdLabel = pdTest.iloc[:, 0].astype('int')
pdTest = pdTest.drop('fraud', axis=1)

In [46]:
payload = pdTest.values[0, :]
print (type(payload))

<class 'numpy.ndarray'>


* model deploy 수행 시, **serializer**, **deserializer** 를 설정 했음을 기억해 주세요. <BR><BR>
xgb_predictor = xgb_model.deploy( <BR><BR>
    endpoint_name=strEndpointName, <BR>
    instance_type=strInstanceType, <BR>
    initial_instance_count=1, <BR>
    **serializer=CSVSerializer('text/csv')**, ## 미적용 시 default: application/x-npy, boto3 기반 invocation시 무시 <BR>
    **deserializer=CSVDeserializer('text/csv')**, ## 미적용 시 default: application/x-npy, boto3 기반 invocation시 무시 <BR>
    wait=True, <BR>
    log=True, <BR>
) <BR>
    
* **즉, sagemaker SDK기반 model deploy 수행 시 serialization, deserialization을 설정 했다면,<BR>
inference 할 때, 별도의 serialization, deserialization 과정이 필요 없음**

In [48]:
outputs = xgb_predictor.predict(payload) ## Auto serialization/deserialization
outputs

[['0.6454359889030457', '1.0']]

* inference (based on **boto3**)
    - **boto3 기반 invocation시 runtime_client가 필요**
    - deploy 시 설정했던 "serialization, deserialization"이 적용되지 않음, 즉, **serialization, deserialization을 manually 해 줘야 함**
        - 번거로울 수 있으나 de/serialization에 대한 자유도가 높음

In [38]:
import json
import boto3
import sagemaker

In [39]:
if "local" in strInstanceType: runtime_client = sagemaker.local.LocalSagemakerRuntimeClient()    
else: runtime_client = boto3.Session().client('sagemaker-runtime')
print (f'runtime_client: {runtime_client}')

runtime_client: <botocore.client.SageMakerRuntime object at 0x7fc70ad0ece0>


In [40]:
pdTest = pd.read_csv(f'{strLocalDataPath}/test.csv')
pdLabel = pdTest.iloc[:, 0].astype('int')
pdTest = pdTest.drop('fraud', axis=1)

* serialzaiton (numpy)

In [55]:
payload = pdTest.values[1, :].tobytes() ## numpy serialziation
print (type(payload))

<class 'bytes'>


In [68]:
#strDeserializer = "text/csv"
strDeserializer = "application/json"

response = runtime_client.invoke_endpoint(
    EndpointName=strEndpointName, 
    ContentType='application/x-npy',
    Accept=strDeserializer,
    Body=payload
)
## deserialization
if strDeserializer == "application/json":
    out = json.loads(response['Body'].read().decode()) ## for json
elif strDeserializer == "text/csv":
    out =response['Body'].read().decode().split(",") ## for csv

print (f'Response: {out}')


Response: {'pred': 0.6454359889030457, 'prob': 1.0}


* clean-up (local endpoint)

In [49]:
if "local" in strInstanceType:
    xgb_predictor.delete_endpoint(strEndpointName)

* save endpoint name

In [50]:
pm.put_params(key="-".join([strPrefix, "ENDPOINT-NAME-DEPLOY"]), value=strEndpointName, overwrite=True)

'Store suceess'

In [54]:
strEndpointName

'endpoint-cloud-DJ-SM-IMD-1682313903'