# SageMaker Pipeline (pipeline.py) 로컬에서 Test 하기

# 1. 환경 설정 및 컨피그 파일 로딩

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
code_pipeline_train_config_json_path = 'pipelines/ncf/src/code_pipeline_train_config.json'
sm_pipeline_train_config_json_path = 'pipelines/ncf/src/sm_pipeline_train_config.json'

In [None]:
from pipelines.ncf.src.common_utils import load_json

code_pipeline_train_dict = load_json(code_pipeline_train_config_json_path)
sm_pipeline_train_dict = load_json(sm_pipeline_train_config_json_path)

import json
print("Code Pipeline Series Params: ")
print (json.dumps(code_pipeline_train_dict, indent=2))
print("SageMaker Pipeline Series Params: ")
print (json.dumps(sm_pipeline_train_dict, indent=2))


### 필요한 설정 값 로딩

In [None]:
import boto3
import sagemaker
import os

region = code_pipeline_train_dict["region"]
account_id = code_pipeline_train_dict["account_id"]
bucket = code_pipeline_train_dict["bucket"]
role = code_pipeline_train_dict["code_build_service_arn"]
model_package_group_name = code_pipeline_train_dict["model_package_group_name"]

pipeline_name = sm_pipeline_train_dict["sm_pipeline_name"]
s3_input_data_uri = sm_pipeline_train_dict["s3_input_data_uri"]


project_prefix = sm_pipeline_train_dict["project_prefix"]
inference_image_uri = sm_pipeline_train_dict["inference_image_uri"]
training_instance_type = sm_pipeline_train_dict["training_instance_type"]
training_instance_count = sm_pipeline_train_dict["training_instance_count"]
ModelApprovalStatus = sm_pipeline_train_dict["ModelApprovalStatus"]

# 2. src 코드 S3 업로딩 
- 리패키징 람다 스텝에서 사용.
- code_buildspec.yml 파일에서 upload.py 에 해당 함.

## 2.1. source.tar.gz 로 압축

In [None]:
code_data_dir = 'pipelines/ncf/src'
code_artifact_name = 'source.tar.gz'

In [None]:
%%sh -s {code_data_dir} {code_artifact_name}
code_data_dir=$1
code_artifact_name=$2

cd $code_data_dir
rm -rf $code_artifact_name
tar -czvf $code_artifact_name *.*

## 2.2. S3 에 업로딩

In [None]:
source_code_prefix = 'code'
# S3에 저장되는 데이터의 기본 폴더 위치
s3_code_uri = f"s3://{bucket}/{source_code_prefix}"

In [None]:
! aws s3 ls {s3_code_uri} --recursive
! aws s3 rm {s3_code_uri} --recursive

In [None]:
import os
local_code = os.path.join(code_data_dir, code_artifact_name)

In [None]:
_ = sagemaker.s3.S3Uploader.upload(
    local_path=local_code, 
    desired_s3_uri=s3_code_uri,    
)
print(s3_code_uri)

s3_code_uri 에 source.tar.gz 이름 붙임

In [None]:
s3_code_uri = os.path.join(s3_code_uri, 'source.tar.gz')
print("s3_code_uri: \n", s3_code_uri)

## 2.3. code_location.json 파일에 위치 명시하고 저장

In [None]:
def store_s3_code_uri_json(s3_code_uri):
    '''
    json_file_name 안에 S3_URL 을 저장
    '''
 
    # Data to be written
    dictionary = {
        "s3_code_uri": s3_code_uri,
    }
    print("dictionary: \n", dictionary)    

    # Serializing json
    json_object = json.dumps(dictionary, indent=4)

    json_file_name = "code_location.json"    
    # Writing to sample.json
    with open(json_file_name, "w") as outfile:
        outfile.write(json_object)


    return json_file_name

store_s3_code_uri_json(s3_code_uri)

# 3. Pipeline 테스트

## 3.1. 컨피그 파일에서 설정 값 로딩

In [None]:
print("s3_input_data_uri: \n", s3_input_data_uri)
print("project_prefix: \n", project_prefix)
print("region: \n", region)
print("inference_image_uri: \n", inference_image_uri)
print("role: \n", role)
print("bucket: \n", bucket)
print("model_package_group_name: \n", model_package_group_name)
print("ModelApprovalStatus: \n", ModelApprovalStatus)
print("pipeline_name: \n", pipeline_name)
print("training_instance_type: \n", training_instance_type)
print("training_instance_count: \n", training_instance_count)



In [None]:
from pipelines.ncf.pipeline import get_pipeline


pipeline = get_pipeline(
    project_prefix = project_prefix,
    region=region,
    role= role, # SAGEMAKER_PIPELINE_ROLE_ARN 이 넘어옴.
    default_bucket= bucket,
    model_package_group_name= model_package_group_name,
    pipeline_name= pipeline_name    
)

In [None]:
definition = json.loads(pipeline.definition())
# definition

In [None]:
pipeline.upsert(role_arn=role)
#execution = pipeline.start()
execution = pipeline.start(
    parameters=dict(
        InputData= s3_input_data_uri,
        training_instance_type = training_instance_type,
        training_instance_count = training_instance_count,
        ModelApprovalStatus = ModelApprovalStatus,                                
        inference_image_uri = inference_image_uri,                        
    )
)

We'll start the pipeline, accepting all the default parameters.

Values can also be passed into these pipeline parameters on starting of the pipeline, and will be covered later. 

In [None]:
execution.wait()
execution.describe()

In [None]:
execution.list_steps()