# [모듈 8.1] 모델 배포 스텝 개발

## 0. 기본 세이지 메이커 정보 및 기본 변수 로딩

In [22]:
import boto3
import sagemaker
import pandas as pd

region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()

%store -r 
%store

Stored variables and their in-db values:
base_preproc_input_dir                 -> 'opt/ml/processing/input'
dataset_path                           -> 'opt/ml/processing/input/dataset.csv'
default_bucket                         -> 'sagemaker-ap-northeast-2-057716757052'
image_uri                              -> '366743142698.dkr.ecr.ap-northeast-2.amazonaws.com
input_data_uri                         -> 's3://sagemaker-ap-northeast-2-057716757052/fraud2
preprocessing_code_dir                 -> 'fraud/preprocessing.py'
processing_instance_count              -> ParameterInteger(name='ProcessingInstanceCount', p
project_prefix                         -> 'fraud2scratch'
s3_dataset_path                        -> 's3://sagemaker-ap-northeast-2-057716757052/fraud2
sagemaker_model                        -> 'pipelines-n5qxc409wxod-fraudscratchmodel-jdpbccud
test_preproc__dir_artifact             -> 's3://sagemaker-ap-northeast-2-057716757052/sklear
train_model_artifact                   -> 's3:

## 1. 환경 셋업

In [50]:
import os

def split_X_y(test_preproc_path):
    df = pd.read_csv(test_preproc_path)
    y_test = df.iloc[:, 0].to_numpy()
    df.drop(df.columns[0], axis=1, inplace=True)
    
    return y_test, df

s3_batch_test_path = f"{s3_dataset_path}/batch"
print("s3_batch_test_path: ", s3_batch_test_path)


# 전처리된 테스트 데이터 생성
test_preproc_path = f"{test_preproc__dir_artifact}/test.csv"
y_test, test_batch_df = split_X_y(test_preproc_path)

# 로컬에 배치 변환 테스트 데이터 폴더 및 경로 생성 
base_preproc_input_batch_dir = 'opt/ml/processing/input/batch'
os.makedirs(base_preproc_input_batch_dir, exist_ok=True)
batch_test_path = f"{base_preproc_input_batch_dir}/batch.csv"
print("batch_test_path: ", batch_test_path)

# 로컬에 배치 변환 파일 저장
test_batch_df.to_csv(batch_test_path, index=None)

# S3에 업로드
input_batch_data_uri = sagemaker.s3.S3Uploader.upload(
    local_path=batch_test_path, 
    desired_s3_uri=s3_batch_test_path,
)
print("input_batch_data_uri: ", input_batch_data_uri)

s3_batch_test_path:  s3://sagemaker-ap-northeast-2-057716757052/fraud2scratch/input/batch
batch_test_path:  opt/ml/processing/input/batch/batch.csv
input_batch_data_uri:  s3://sagemaker-ap-northeast-2-057716757052/fraud2scratch/input/batch/batch.csv


In [78]:
print("base_dir: \n", base_dir)
print("base_model_path: \n", base_model_path)
print("base_test_path: \n", base_test_path)
print("output_evaluation_dir: \n", output_evaluation_dir)

base_dir: 
 opt/ml/processing
base_model_path: 
 opt/ml/processing/model/model.tar.gz
base_test_path: 
 s3://sagemaker-ap-northeast-2-057716757052/sklearn-fraud-process-2021-04-13-03-08-45-278/output/test/test.csv
output_evaluation_dir: 
 opt/ml/processing/evaluation


## 2. 로컬에서 스크립트 실행

In [9]:
import sagemaker
region = sagemaker.Session().boto_region_name
print("Using AWS Region: {}".format(region))

endpoint_name = 'fraud2scratch-0120'
endpoint_instance_type = 'ml.t2.xlarge'

Using AWS Region: ap-northeast-2


In [18]:
%%sh -s "$sagemaker_model" "$region" "$endpoint_instance_type" "$endpoint_name" 
python fraud/deploy_model.py \
--model_name $1 \
--region $2 \
--endpoint_instance_type $3 \
--endpoint_name $4



#############################################
args.model_name: pipelines-n5qxc409wxod-fraudscratchmodel-jdpbccudlk
args.region: ap-northeast-2
args.endpoint_instance_type: ml.t2.xlarge
args.endpoint_name: fraud2scratch-0120
Endpoint exists
Endpoint status is creating
Endpoint status: Creating
Endpoint status: InService


### 추론 테스트

In [116]:
import numpy as np

def get_predictor(endpoint_name, sagemaker_session):
    predictor = sagemaker.predictor.Predictor(
        endpoint_name=endpoint_name,
        sagemaker_session=sagemaker_session)
    return predictor

def predict(data_df,predictor, rows=5):
    
    data_df = data_df[0:rows]

    for index, data in data_df.iterrows():
        sample = data.squeeze().tolist()
        sample = ','.join(str(e) for e in sample)
        
        response = predictor.predict(sample, initial_args = {"ContentType": "text/csv"})    
        print(response.decode('utf-8'))
    
test_df = pd.read_csv(input_batch_data_uri, )    
predictor = get_predictor(endpoint_name, sagemaker_session)    
predict(test_df, predictor, rows=1)

0.02895057201385498


## 3. 모델 빌딩 파이프라인에서  실행 
---



### 모델 빌딩 파이프라인 변수 생성



In [81]:
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
)

processing_instance_count = ParameterInteger(
    name="ProcessingInstanceCount",
    default_value=1
)
processing_instance_type = ParameterString(
    name="ProcessingInstanceType",
    default_value="ml.m5.xlarge"
)

## 배포 스텝 정의
[중요] `pipeline_endpoint_name` 에 '_' 언데바를 넣으면 에러가 납니다. '-' 대시는 가능합니다.

In [103]:

local_deploy_code_path = 'fraud/deploy_model.py'
s3_deploy_code_path = f"s3://{default_bucket}/{project_prefix}/code"
s3_deploy_code_uri = sagemaker.s3.S3Uploader.upload(
    local_path=local_deploy_code_path, 
    desired_s3_uri=s3_deploy_code_path,
)
print("s3_deploy_code_uri: ", s3_deploy_code_uri)

pipeline_endpoint_name = 'pipeline-endpoint-0414'

s3_deploy_code_uri:  s3://sagemaker-ap-northeast-2-057716757052/fraud2scratch/code/deploy_model.py


In [104]:
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.workflow.steps import ProcessingStep

deploy_model_processor = SKLearnProcessor(
    framework_version='0.23-1',
    role= role,
    instance_type="ml.t3.medium",
    instance_count=1,
    base_job_name='fraud-scratch-deploy-model',
    sagemaker_session=sagemaker_session)


deploy_step = ProcessingStep(
    name='DeployModel',
    processor=deploy_model_processor,
    job_arguments=[
        "--model_name", sagemaker_model, 
        "--region", region,
        "--endpoint_instance_type", endpoint_instance_type,
        "--endpoint_name", pipeline_endpoint_name
    ],
    code=s3_deploy_code_uri)

### 파리마터, 단계, 조건을 조합하여 최종 파이프라인 정의



In [105]:
from sagemaker.workflow.pipeline import Pipeline


pipeline_name = project_prefix
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        processing_instance_type, 
        processing_instance_count,
    ],
    steps=[deploy_step],
)

#### (선택) 파이프라인 정의 확인 

파이프라인을 정의하는 JSON을 생성하고 파이프라인 내에서 사용하는 파라미터와 단계별 속성들이 잘 정의되었는지 확인할 수 있습니다.

In [106]:
import json


definition = json.loads(pipeline.definition())
# definition

### 파이프라인을 SageMaker에 제출하고 실행하기 

파이프라인 정의를 파이프라인 서비스에 제출합니다. 함께 전달되는 역할(role)을 이용하여 AWS에서 파이프라인을 생성하고 작업의 각 단계를 실행할 것입니다.   

In [107]:
pipeline.upsert(role_arn=role)

{'PipelineArn': 'arn:aws:sagemaker:ap-northeast-2:057716757052:pipeline/fraud2scratch',
 'ResponseMetadata': {'RequestId': '43168427-a281-48ea-b335-c33a7f4ceeaa',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '43168427-a281-48ea-b335-c33a7f4ceeaa',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '86',
   'date': 'Wed, 14 Apr 2021 02:00:10 GMT'},
  'RetryAttempts': 0}}

In [108]:
execution = pipeline.start()

### 파이프라인 운영: 파이프라인 대기 및 실행상태 확인

워크플로우의 실행상황을 살펴봅니다. 

In [109]:
execution.describe()
# execution.wait()

{'PipelineArn': 'arn:aws:sagemaker:ap-northeast-2:057716757052:pipeline/fraud2scratch',
 'PipelineExecutionArn': 'arn:aws:sagemaker:ap-northeast-2:057716757052:pipeline/fraud2scratch/execution/ypachf1q2vwf',
 'PipelineExecutionDisplayName': 'execution-1618365613074',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2021, 4, 14, 2, 0, 12, 975000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2021, 4, 14, 2, 0, 12, 975000, tzinfo=tzlocal()),
 'CreatedBy': {},
 'LastModifiedBy': {},
 'ResponseMetadata': {'RequestId': '36dae26d-a450-4986-9204-78a7f9d7ce74',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '36dae26d-a450-4986-9204-78a7f9d7ce74',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '401',
   'date': 'Wed, 14 Apr 2021 02:00:14 GMT'},
  'RetryAttempts': 0}}

In [115]:
execution.list_steps()

[{'StepName': 'DeployModel',
  'StartTime': datetime.datetime(2021, 4, 14, 2, 0, 13, 387000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2021, 4, 14, 2, 18, 13, 662000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:ap-northeast-2:057716757052:processing-job/pipelines-ypachf1q2vwf-deploymodel-vgtaatgirp'}}}]

## 9.배포 파이프라인 추론 테스트

In [117]:
predictor = get_predictor(pipeline_endpoint_name, sagemaker_session)    
predict(test_df, predictor, rows=1)

0.02895057201385498
