# [모듈 9.1] 추론 및 결과 분석

## 0. 기본 세이지 메이커 정보 및 기본 변수 로딩

In [2]:
import boto3
import sagemaker
import pandas as pd

region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()

%store -r 
%store

Stored variables and their in-db values:
base_preproc_input_dir                 -> 'opt/ml/processing/input'
dataset_path                           -> 'opt/ml/processing/input/dataset.csv'
default_bucket                         -> 'sagemaker-ap-northeast-2-057716757052'
image_uri                              -> '366743142698.dkr.ecr.ap-northeast-2.amazonaws.com
input_data_uri                         -> 's3://sagemaker-ap-northeast-2-057716757052/fraud2
pipeline_endpoint_name                 -> 'all-pipeline-endpoint-0414'
preprocessing_code_dir                 -> 'fraud/preprocessing.py'
processing_instance_count              -> ParameterInteger(name='ProcessingInstanceCount', p
project_prefix                         -> 'fraud2scratch'
s3_dataset_path                        -> 's3://sagemaker-ap-northeast-2-057716757052/fraud2
sagemaker_model                        -> 'pipelines-n5qxc409wxod-fraudscratchmodel-jdpbccud
test_preproc__dir_artifact             -> 's3://sagemaker-ap-northea

## 1. 환경 셋업

In [3]:
import os

def split_X_y(test_preproc_path):
    df = pd.read_csv(test_preproc_path)
    y_test = df.iloc[:, 0].to_numpy()
    df.drop(df.columns[0], axis=1, inplace=True)
    
    return y_test, df

s3_batch_test_path = f"{s3_dataset_path}/batch"
print("s3_batch_test_path: ", s3_batch_test_path)


# 전처리된 테스트 데이터 생성
test_preproc_path = f"{test_preproc__dir_artifact}/test.csv"
y_test, test_batch_df = split_X_y(test_preproc_path)

# 로컬에 배치 변환 테스트 데이터 폴더 및 경로 생성 
base_preproc_input_batch_dir = 'opt/ml/processing/input/batch'
os.makedirs(base_preproc_input_batch_dir, exist_ok=True)
batch_test_path = f"{base_preproc_input_batch_dir}/batch.csv"
print("batch_test_path: ", batch_test_path)

# 로컬에 배치 변환 파일 저장
test_batch_df.to_csv(batch_test_path, index=None)

# S3에 업로드
input_batch_data_uri = sagemaker.s3.S3Uploader.upload(
    local_path=batch_test_path, 
    desired_s3_uri=s3_batch_test_path,
)
print("input_batch_data_uri: ", input_batch_data_uri)

s3_batch_test_path:  s3://sagemaker-ap-northeast-2-057716757052/fraud2scratch/input/batch
batch_test_path:  opt/ml/processing/input/batch/batch.csv
input_batch_data_uri:  s3://sagemaker-ap-northeast-2-057716757052/fraud2scratch/input/batch/batch.csv


## 2. 로컬에서 스크립트 실행

In [5]:
import numpy as np

def get_predictor(endpoint_name, sagemaker_session):
    predictor = sagemaker.predictor.Predictor(
        endpoint_name=endpoint_name,
        sagemaker_session=sagemaker_session)
    return predictor

def predict(data_df,predictor, rows=5):
    
    data_df = data_df[0:rows]

    for index, data in data_df.iterrows():
        sample = data.squeeze().tolist()
        sample = ','.join(str(e) for e in sample)
        
        response = predictor.predict(sample, initial_args = {"ContentType": "text/csv"})    
        print(response.decode('utf-8'))
    
test_df = pd.read_csv(input_batch_data_uri, )    
predictor = get_predictor(pipeline_endpoint_name, sagemaker_session)    
predict(test_df, predictor, rows=1)

0.029452353715896606
