In [1]:
LOCAL_MODE = False

# 0. 환경설정

In [2]:
import argparse
import os
import requests
import tempfile
import subprocess, sys

import pandas as pd
import numpy as np
from glob import glob
import copy
from collections import OrderedDict
from pathlib import Path
import joblib

from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder

import logging
import logging.handlers

import json
import base64
import boto3
import sagemaker
from botocore.client import Config
from botocore.exceptions import ClientError

import time
from datetime import datetime as dt
import datetime
from pytz import timezone
from dateutil.relativedelta import *

In [3]:
def get_secret():
    # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
    secret_name = "prod/sagemaker"
    region_name = "ap-northeast-2"
    
    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    try:
        get_secret_value_response = client.get_secret_value(
            SecretId='prod/sagemaker',
        )
    except ClientError as e:
        if e.response['Error']['Code'] == 'DecryptionFailureException': # Secrets Manager can't decrypt the protected secret text using the provided KMS key.
            raise e
        elif e.response['Error']['Code'] == 'InternalServiceErrorException': # An error occurred on the server side.
            raise e
        elif e.response['Error']['Code'] == 'InvalidParameterException': # You provided an invalid value for a parameter.
            raise e
        elif e.response['Error']['Code'] == 'InvalidRequestException': # You provided a parameter value that is not valid for the current state of the resource.
            raise e
        elif e.response['Error']['Code'] == 'ResourceNotFoundException': # We can't find the resource that you asked for.
            raise e
    else:
        if 'SecretString' in get_secret_value_response:
            secret = get_secret_value_response['SecretString']
            return secret
        else:
            decoded_binary_secret = base64.b64decode(get_secret_value_response['SecretBinary'])
            return decoded_binary_secret

keychain = json.loads(get_secret())
ACCESS_KEY_ID = keychain['ACCESS_KEY_ID_ent']
ACCESS_SECRET_KEY = keychain['ACCESS_SECRET_KEY_ent']

BUCKET_NAME_USECASE = keychain['BUCKET_NAME_USECASE_ent']
S3_PATH_STAGE = keychain['S3_PATH_STAGE']
S3_PATH_GOLDEN = keychain['S3_PATH_GOLDEN']
S3_PATH_TRAIN = keychain['S3_PATH_TRAIN']
S3_PATH_log = keychain['S3_PATH_LOG']
S3_PATH_FORECAST = keychain['S3_PATH_FORECAST']

boto3_session = boto3.Session(ACCESS_KEY_ID, ACCESS_SECRET_KEY)
sm_session = sagemaker.Session(boto_session = boto3_session)
region = boto3_session.region_name

s3_resource = boto3_session.resource('s3')
bucket = s3_resource.Bucket(BUCKET_NAME_USECASE)
s3_client = boto3_session.client('s3')
sm_client = boto3.client('sagemaker',
                         aws_access_key_id = ACCESS_KEY_ID,
                         aws_secret_access_key = ACCESS_SECRET_KEY,
                         region_name = 'ap-northeast-2')

In [23]:
%%writefile src/model_validation.py

import glob
import os
import pandas as pd
import time
from datetime import datetime as dt
import argparse
import json
import boto3
from io import StringIO, BytesIO
import joblib
import sys
import subprocess
import logging
import logging.handlers

import tarfile


###############################
######### util 함수 설정 ##########
###############################
def _get_logger():
    loglevel = logging.DEBUG
    l = logging.getLogger(__name__)
    if not l.hasHandlers():
        l.setLevel(loglevel)
        logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))        
        l.handler_set = True
    return l  
logger = _get_logger()

def get_secret():
    secret_name = "prod/sagemaker"
    region_name = "ap-northeast-2"

    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    try:
        get_secret_value_response = client.get_secret_value(
            SecretId='prod/sagemaker',
        )
    except ClientError as e:
        if e.response['Error']['Code'] == 'DecryptionFailureException': # Secrets Manager can't decrypt the protected secret text using the provided KMS key.
            raise e
        elif e.response['Error']['Code'] == 'InternalServiceErrorException': # An error occurred on the server side.
            raise e
        elif e.response['Error']['Code'] == 'InvalidParameterException': # You provided an invalid value for a parameter.
            raise e
        elif e.response['Error']['Code'] == 'InvalidRequestException': # You provided a parameter value that is not valid for the current state of the resource.
            raise e
        elif e.response['Error']['Code'] == 'ResourceNotFoundException': # We can't find the resource that you asked for.
            raise e
    else:
        if 'SecretString' in get_secret_value_response:
            secret = get_secret_value_response['SecretString']
            return secret
        else:
            decoded_binary_secret = base64.b64decode(get_secret_value_response['SecretBinary'])
            return decoded_binary_secret

def convert_series_to_description(leaderboard : pd.Series):
    return ','.join(leaderboard.loc[0,['model','score_test','score_val']].to_string().split())

def get_bucket_key_from_uri(uri):
    uri_aws_path = uri.split('//')[1]
    uri_bucket = uri_aws_path.rsplit('/')[0]
    uri_file_path = '/'.join(uri_aws_path.rsplit('/')[1:])
    return uri_bucket, uri_file_path

if __name__=='__main__':
    ################################
    ###### 커맨드 인자 파싱   ##########
    ################################    
    parser = argparse.ArgumentParser()
    parser.add_argument('--base_input_path', type=str, default="/opt/ml/processing/input")   
    parser.add_argument('--s3_model_uri', type=str, default="/opt/ml/processing/model")   
    parser.add_argument('--model_package_group_name', type=str, default='palm-oil-price-forecast')   
    args = parser.parse_args()     

    logger.info("######### Argument Info ####################################")
    logger.info(f"args.base_input_path: {args.base_input_path}")
    logger.info(f"args.s3_model_uri: {args.s3_model_uri}")
    logger.info(f"args.model_package_group_name: {args.model_package_group_name}")
    
    base_input_path = args.base_input_path
    s3_model_uri = args.s3_model_uri
    model_package_group_name = args.model_package_group_name
    
    ############################################
    ###### Secret Manager에서 키값 가져오기  #######
    ########################################### 
    logger.info(f"\n### Loading Key value from Secret Manager")
    
    keychain = json.loads(get_secret())
    ACCESS_KEY_ID = keychain['ACCESS_KEY_ID_ent']
    ACCESS_SECRET_KEY = keychain['ACCESS_SECRET_KEY_ent']

    BUCKET_NAME_USECASE = keychain['BUCKET_NAME_USECASE_ent']
    S3_PATH_STAGE = keychain['S3_PATH_STAGE']
    S3_PATH_GOLDEN = keychain['S3_PATH_GOLDEN']
    S3_PATH_TRAIN = keychain['S3_PATH_TRAIN']
    S3_PATH_log = keychain['S3_PATH_LOG']
    boto3_session = boto3.Session(ACCESS_KEY_ID, ACCESS_SECRET_KEY)

    region = boto3_session.region_name

    s3_resource = boto3_session.resource('s3')
    s3_client = boto3_session.client('s3')
    sm_client = boto3.client('sagemaker',
                             aws_access_key_id = ACCESS_KEY_ID,
                             aws_secret_access_key = ACCESS_SECRET_KEY,
                             region_name = 'ap-northeast-2')
    
    ############################################
    ##### Model, Leaderboard 파일 가져오기 #####
    ########################################### 
    logger.info(f"\n### Loading Model, Leaderboard zip files ")
    logger.info(f"\n#### Extract output.tar.gz and Read a Leaderboard ")
    ## 22.11.29 추가: 이전 step인, step_train에서 model.tar.gz의 uri는 가져올 수 있었지만, output.tar.gz는 못가져왔다. 이를 model.tar.gz에서 output.tar.gz으로 바꾸는방식으로 우회하자
    leaderboard_uri = s3_model_uri.replace('model.tar.gz','output.tar.gz')#,f'{base_input_path}/output.tar.gz'
    logger.info(f"\n#### output.tar.gz uri : {leaderboard_uri}")
    output_bucket, output_key = get_bucket_key_from_uri(leaderboard_uri)  
    output_obj = s3_client.get_object(Bucket = output_bucket, Key = output_key)
   
    logger.info("\n######### Model zip file extraction ####################################")
    with tarfile.open(fileobj=output_obj['Body'], mode='r|gz') as file:
        file.extractall(base_input_path)    
    logger.info(f"file list in {base_input_path}: {os.listdir(base_input_path)}")        
    
    # if leaderboard_path.endswith("tar.gz"):
    #     tar = tarfile.open(leaderboard_path, "r:gz")
    #     tar.extractall(base_input_path)
    #     tar.close()
    # elif leaderboard_path.endswith("tar"):
    #     tar = tarfile.open(leaderboard_path, "r:")
    #     tar.extractall(base_input_path)
    #     tar.close()

    leaderboard = pd.read_csv(f'{base_input_path}/leaderboard.csv').sort_values(by = ['score_val', 'score_test'],
                                                                                ascending = False)
    logger.info(f"leaderboard train sample: head(5) \n {leaderboard.head()}")
    logger.info(f"\n#### Set  ")
    model_package_group_name = model_package_group_name
    modelpackage_inference_specification =  {
        "InferenceSpecification": {
            "Containers": [
                {
                    "Image": '763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/autogluon-inference:0.4-cpu-py38',
                    "ModelDataUrl": s3_model_uri#'#args.model_path_uri
                }
            ],
            "SupportedContentTypes": [ "text/csv" ],
            "SupportedResponseMIMETypes": [ "text/csv" ],
        }
    }
    if len(leaderboard[leaderboard['score_val'] > -0.13]) > 0:
        logger.info(f"\n#### Pass the first performance filtering")
        
        create_model_package_input_dict = {
            "ModelPackageGroupName" : model_package_group_name,
            "ModelPackageDescription" : convert_series_to_description(leaderboard),
            "ModelApprovalStatus" : "PendingManualApproval"
        }
        create_model_package_input_dict.update(modelpackage_inference_specification)
        create_model_package_response = sm_client.create_model_package(**create_model_package_input_dict)
        model_package_arn = create_model_package_response["ModelPackageArn"]
        logger.info('### Passed ModelPackage Version ARN : {}'.format(model_package_arn))
        
    else:
        logger.info(f"\n#### None of them passed the filtering")
        create_model_package_input_dict = {
            "ModelPackageGroupName" : model_package_group_name,
            "ModelPackageDescription" : convert_series_to_description(leaderboard),
            "ModelApprovalStatus" : "Rejected"
        }
        create_model_package_input_dict.update(modelpackage_inference_specification)
        create_model_package_response = sm_client.create_model_package(**create_model_package_input_dict)
        model_package_arn = create_model_package_response["ModelPackageArn"]
        logger.info('### Rejected ModelPackage Version ARN : {}'.format(model_package_arn))

Overwriting src/model_validation.py


In [5]:
model_validation_code = 'src/model_validation.py'
%store model_validation_code

Stored 'model_validation_code' (str)


In [6]:
%store 

Stored variables and their in-db values:
bucket                             -> 'palm-oil-price-forecast'
leaderboard_uri                    -> 's3://palm-oil-price-forecast/trained-model/2022/1
model_validation_code              -> 'src/model_validation.py'
preproc_data_dir                   -> 's3://palm-oil-price-forecast/golden-data/2022/11/
preprocessed_stage_uri             -> 's3://palm-oil-price-forecast/golden-data/2022/11/
preprocessed_test_uri              -> 's3://palm-oil-price-forecast/golden-data/2022/11/
preprocessed_train_uri             -> 's3://palm-oil-price-forecast/golden-data/2022/11/
preprocessing_code                 -> 'src/preprocessing.py'
project_prefix                     -> 'palm-oil-price-forecast'
stage_data_uri                     -> 's3://palm-oil-price-forecast/staged-data'
test_data_uri                      -> 's3://palm-oil-price-forecast/golden-data/2022/11/
train_data_uri                     -> 's3://palm-oil-price-forecast/golden-data/2022/11/
tr

In [7]:
%store -r

In [8]:
!aws s3 ls {train_model_uri} --recursive

2022-11-28 09:05:06    2984726 trained-model/2022/11/28/pipelines-5c2k5eti372r-Palm-oil-forecast-Tr-IcBejQCKws/output/model.tar.gz


In [9]:
!aws s3 ls {leaderboard_uri} --recursive

2022-11-28 09:05:07        442 trained-model/2022/11/28/pipelines-5c2k5eti372r-Palm-oil-forecast-Tr-IcBejQCKws/output/output.tar.gz


# 1. 모델 검증 파이프라인 의 스텝(Step) 생성
## 1) 모델 검증 파이프라인 변수 생성
파이프라인에서 사용할 파이프라인 파라미터를 정의합니다. 파이프라인을 스케줄하고 실행할 때 파라미터를 이용하여 실행조건을 커스마이징할 수 있습니다. 파라미터를 이용하면 파이프라인 실행시마다 매번 파이프라인 정의를 수정하지 않아도 됩니다.

지원되는 파라미터 타입은 다음과 같습니다:

- ParameterString - 파이썬 타입에서 str
- ParameterInteger - 파이썬 타입에서 int
- ParameterFloat - 파이썬 타입에서 float
이들 파라미터를 정의할 때 디폴트 값을 지정할 수 있으며 파이프라인 실행시 재지정할 수도 있습니다. 지정하는 디폴트 값은 파라미터 타입과 일치하여야 합니다.

본 노트북에서 사용하는 파라미터는 다음과 같습니다.

- processing_instance_type - 프로세싱 작업에서 사용할 ml.* 인스턴스 타입
- processing_instance_count - 프로세싱 작업에서 사용할 인스턴스 개수
- validation_instance_type - 학습작업에서 사용할 ml.* 인스턴스 타입
- model_approval_status - 학습된 모델을 CI/CD를 목적으로 등록할 때의 승인 상태 (디폴트는 "PendingManualApproval")
- input_data - 입력데이터에 대한 S3 버킷 URI
파이프라인의 각 스텝에서 사용할 변수를 파라미터 변수로서 정의 합니다.

In [11]:
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
)
model_validation_instance_count = ParameterInteger(
    name="ModelValidationInstanceCount",
    default_value=1
)
model_validation_instance_type = ParameterString(
    name="ModelValidationInstanceType",
    default_value='ml.c5.xlarge'
)
input_model_uri = ParameterString(
    name="InputModelData",
    default_value = train_model_uri,
)
input_leaderboard_data = ParameterString(
    name="Input_Leaderboard_Data",
    default_value = leaderboard_uri,
)
split_date = '2022-10-31'

## 2) 로컬에서 테스트

In [None]:
if LOCAL_MODE:
    # 도커 컨테이너 입력 폴더: staged data가 들어가는 부분
    base_preproc_input_dir = 'opt/ml/processing/input'
    os.makedirs(base_preproc_input_dir, exist_ok=True)

    # 도커 컨테이너 기본 출력 폴더
    base_output_dir = 'opt/ml/processing/output'
    os.makedirs(base_output_dir, exist_ok=True)

    # 도커 컨테이너 출력 폴더: stage 데이터셋이 들어가는 부분
    base_preproc_output_stage_dir = f'{base_output_dir}/stage'
    os.makedirs(base_preproc_output_stage_dir, exist_ok=True)

    # 도커 컨테이너 출력 폴더: train 데이터셋이 들어가는 부분
    base_preproc_output_train_dir = f'{base_output_dir}/train'
    os.makedirs(base_preproc_output_train_dir, exist_ok=True)

    # 도커 컨테이너 출력 폴더: test 데이터셋이 들어가는 부분
    base_preproc_output_test_dir =  f'{base_output_dir}/test'
    os.makedirs(base_preproc_output_test_dir, exist_ok=True)


In [14]:
if LOCAL_MODE:
    # 도커 컨테이너 입력 폴더: staged data가 들어가는 부분
    base_input_dir = 'opt/ml/processing/input'
    os.makedirs(base_input_dir, exist_ok=True)

    # 도커 컨테이너 모델 폴더: model 데이터가 압축해제되고 실행되는곳
    base_model_dir = 'opt/ml/model' 
    os.makedirs(base_model_dir, exist_ok=True)
    
    model_package_group_name = '

In [15]:
!aws s3 cp {leaderboard_uri} {base_input_dir}

download: s3://palm-oil-price-forecast/trained-model/2022/11/26/pipelines-vrnf2bg3ujuy-Palm-oil-forecast-Tr-4suokL1ZzD/output/output.tar.gz to opt/ml/processing/input/output.tar.gz


In [16]:
!aws s3 cp {train_model_uri} {base_model_dir}

download: s3://palm-oil-price-forecast/trained-model/2022/11/26/pipelines-vrnf2bg3ujuy-Palm-oil-forecast-Tr-4suokL1ZzD/output/model.tar.gz to opt/ml/model/model.tar.gz


In [24]:
!python src/model_validation.py --base_input_path {base_input_dir} \
                                --s3_model_uri {train_model_uri} \
                                --model_package_group_name {bucket} \

######### Argument Info ####################################
args.base_input_path: opt/ml/processing/input
args.s3_model_uri: s3://palm-oil-price-forecast/trained-model/2022/11/26/pipelines-vrnf2bg3ujuy-Palm-oil-forecast-Tr-4suokL1ZzD/output/model.tar.gz
args.model_package_group_name: palm-oil-price-forecast

### Loading Key value from Secret Manager

### Loading Model, Leaderboard zip files 

#### Extract output.tar.gz and Read a Leaderboard 
leaderboard train sample: head(5) 
               model  score_test  ...  fit_time_marginal  fit_order
0  WeightedEnsemble         NaN  ...          76.943682          6
3             Naive         NaN  ...           0.000711          1
1             Theta         NaN  ...           0.000511          4
5             ARIMA         NaN  ...           0.000502          5
4               ETS         NaN  ...           0.000520          3

[5 rows x 7 columns]

#### Set  

#### Pass the first performance filtering
### Passed ModelPackage Version ARN :

In [34]:
def tewt(leaderboard : pd.Series):
    return ','.join(leaderboard.loc[0,['model','score_test','score_val']].to_string().split())


In [37]:
tewt(pd.read_csv('opt/ml/processing/input/leaderboard.csv'))

'model,WeightedEnsemble,score_test,NaN,score_val,-0.115708'

## 3) 모델 검증 프로세서 정의
전처리의 내장 SKLearnProcessor 를 통해서 sklearn_processor 오브젝트를 생성 합니다.

In [12]:
from sagemaker import get_execution_role
from sagemaker.sklearn.processing import SKLearnProcessor

framework_version = "0.23-1"

sklearn_processor = SKLearnProcessor(
    framework_version = framework_version,
    instance_type = model_validation_instance_type,
    instance_count = model_validation_instance_count,
    base_job_name = "Palm_oil_forecast-Autogluon052-sklearn0231",
    role = sagemaker.get_execution_role(),
)

The input argument instance_type of function (sagemaker.image_uris.retrieve) is a pipeline variable (<class 'sagemaker.workflow.parameters.ParameterString'>), which is not allowed. The default_value of this Parameter object will be used to override it. Please make sure the default_value is valid.


## 4) 모델 검증 단계 정의
처리 단계에서는 아래와 같은 주요 인자가 있습니다.
단계 이름
- processor 기술: 위에서 생성한 processor 오브젝트를 제공
- inputs: S3의 경로를 기술하고, 다커안에서의 다운로드 폴더(destination)을 기술 합니다.
- outputs: 처리 결과가 저장될 다커안에서의 폴더 경로를 기술합니다.

도커안의 결과 파일이 저장 후에 자동으로 S3로 업로딩을 합니다.
- job_arguments: 사용자 정의의 인자를 기술 합니다.
- code: 전처리 코드의 경로를 기술 합니다.
처리 단계의 상세한 사항은 여기를 보세요. --> 처리 단계, Processing Step

In [13]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

step_model_validaion = ProcessingStep(
    name = "Palm_oil_forecast-Model_validation",
    processor = sklearn_processor,
    inputs=[
            ProcessingInput(
                source = leaderboard_uri,
                destination = "/opt/ml/processing/input")
        ],
    job_arguments=["--s3_model_uri", train_model_uri],    
    code = model_validation_code
)

## 5) 파리마터, 단계, 조건을 조합하여 최종 파이프라인 정의 및 실행
이제 지금까지 생성한 단계들을 하나의 파이프라인으로 조합하고 실행하도록 하겠습니다.

파이프라인은 name, parameters, steps 속성이 필수적으로 필요합니다. 여기서 파이프라인의 이름은 (account, region) 조합에 대하여 유일(unique))해야 합니다.

주의:

- 정의에 사용한 모든 파라미터가 존재해야 합니다.
- 파이프라인으로 전달된 단계(step)들은 실행순서와는 무관합니다. SageMaker Pipeline은 단계가 실행되고 완료될 수 있도록 의존관계를를 해석합니다.
- [알림] 정의한 stpes 이 복수개이면 복수개를 기술합니다. 만약에 step 간에 의존성이 있으면, 명시적으로 기술하지 않아도 같이 실행 됩니다.

### 5-1) 파이프라인 정의

In [14]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = project_prefix
pipeline = Pipeline(name = pipeline_name,
                    parameters = [
                        model_validation_instance_type, 
                        model_validation_instance_count,
                        input_leaderboard_data
                    ],
                    steps = [step_model_validaion],
)

### 5-2) 파이프라인 정의 확인

In [15]:
import json

definition = json.loads(pipeline.definition())
definition

{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'ModelValidationInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.c5.xlarge'},
  {'Name': 'ModelValidationInstanceCount',
   'Type': 'Integer',
   'DefaultValue': 1},
  {'Name': 'Input_Leaderboard_Data',
   'Type': 'String',
   'DefaultValue': 's3://palm-oil-price-forecast/trained-model/2022/11/28/pipelines-5c2k5eti372r-Palm-oil-forecast-Tr-IcBejQCKws/output/output.tar.gz'}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'Palm_oil_forecast-Model_validation',
   'Type': 'Processing',
   'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': {'Get': 'Parameters.ModelValidationInstanceType'},
      'InstanceCount': {'Get': 'Parameters.ModelValidationInstanceCount'},
      'VolumeSizeInGB': 30}},
    'AppSpecification': {'ImageUri': '366743142698.dkr.ecr.ap-northeast-2.amazonaws.com/s

### 5-3) 파이프라인 정의를 제출하고 실행하기
파이프라인 정의를 파이프라인 서비스에 제출합니다. 함께 전달되는 역할(role)을 이용하여 AWS에서 파이프라인을 생성하고 작업의 각 단계를 실행할 것입니다.

In [16]:
pipeline.upsert(role_arn=sagemaker.get_execution_role())
execution = pipeline.start()

In [17]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:ap-northeast-2:276114397529:pipeline/palm-oil-price-forecast',
 'PipelineExecutionArn': 'arn:aws:sagemaker:ap-northeast-2:276114397529:pipeline/palm-oil-price-forecast/execution/8oygdfkxnz4d',
 'PipelineExecutionDisplayName': 'execution-1669626590683',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2022, 11, 28, 9, 9, 50, 611000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2022, 11, 28, 9, 9, 50, 611000, tzinfo=tzlocal()),
 'CreatedBy': {},
 'LastModifiedBy': {},
 'ResponseMetadata': {'RequestId': 'af10743f-5776-43aa-866a-42fae25da9b0',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'af10743f-5776-43aa-866a-42fae25da9b0',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '421',
   'date': 'Mon, 28 Nov 2022 09:09:49 GMT'},
  'RetryAttempts': 0}}

In [18]:
%%time
start = time.time()

pipeline.upsert(role_arn = sagemaker.get_execution_role())
execution = pipeline.start()
#실행이 완료될 때까지 기다린다.
execution.wait() 
end = time.time()

CPU times: user 118 ms, sys: 18.3 ms, total: 137 ms
Wall time: 4min 32s


In [19]:
print(f"model validation 시간 : {end - start:.1f} sec")
print(f"model validation 시간 : {((end - start)/60):.1f} min")

model validation 시간 : 272.1 sec
model validation 시간 : 4.5 min


- 2022년 11월 26일 Model validation : 4.5min

In [20]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:ap-northeast-2:276114397529:pipeline/palm-oil-price-forecast',
 'PipelineExecutionArn': 'arn:aws:sagemaker:ap-northeast-2:276114397529:pipeline/palm-oil-price-forecast/execution/ihllruxjmyz4',
 'PipelineExecutionDisplayName': 'execution-1669626592798',
 'PipelineExecutionStatus': 'Succeeded',
 'PipelineExperimentConfig': {'ExperimentName': 'palm-oil-price-forecast',
  'TrialName': 'ihllruxjmyz4'},
 'CreationTime': datetime.datetime(2022, 11, 28, 9, 9, 52, 696000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2022, 11, 28, 9, 14, 10, 690000, tzinfo=tzlocal()),
 'CreatedBy': {},
 'LastModifiedBy': {},
 'ResponseMetadata': {'RequestId': '53a4b375-bbba-4667-978a-d56c0ab0870f',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '53a4b375-bbba-4667-978a-d56c0ab0870f',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '519',
   'date': 'Mon, 28 Nov 2022 09:14:23 GMT'},
  'RetryAttempts': 0}}

In [21]:
#실행된 단계들을 리스트업. 파이프라인의 단계실행 서비스에 의해 시작되거나 완료된 단계를 보여준다.
execution.list_steps()

[{'StepName': 'Palm_oil_forecast-Model_validation',
  'StartTime': datetime.datetime(2022, 11, 28, 9, 9, 54, 843000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2022, 11, 28, 9, 14, 10, 445000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'AttemptCount': 0,
  'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:ap-northeast-2:276114397529:processing-job/pipelines-ihllruxjmyz4-palm-oil-forecast-mo-slyyvl4vmm'}}}]

In [22]:
response = execution.list_steps()
proc_arn = response[-1]['Metadata']['ProcessingJob']['Arn'] # index -1은 가장 처음 실행 step
proc_job_name = proc_arn.split('/')[-1] # Processing job name만 추출
response = sm_client.describe_processing_job(ProcessingJobName = proc_job_name)
response

{'ProcessingInputs': [{'InputName': 'input-1',
   'AppManaged': False,
   'S3Input': {'S3Uri': 's3://palm-oil-price-forecast/trained-model/2022/11/28/pipelines-5c2k5eti372r-Palm-oil-forecast-Tr-IcBejQCKws/output/output.tar.gz',
    'LocalPath': '/opt/ml/processing/input',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributionType': 'FullyReplicated',
    'S3CompressionType': 'None'}},
  {'InputName': 'code',
   'AppManaged': False,
   'S3Input': {'S3Uri': 's3://sagemaker-ap-northeast-2-276114397529/Palm_oil_forecast-Model_validation-0e5c405ff69c8ee99ba2d955b0810166/input/code/model_validation.py',
    'LocalPath': '/opt/ml/processing/input/code',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributionType': 'FullyReplicated',
    'S3CompressionType': 'None'}}],
 'ProcessingJobName': 'pipelines-ihllruxjmyz4-Palm-oil-forecast-Mo-SlYYVl4VmM',
 'ProcessingResources': {'ClusterConfig': {'InstanceCount': 1,
   'InstanceType': 'ml.c5.xlarge',