In [2]:
# 필요한 package 들의 kernel 이 시작될 때, 미리 작성해둔 LifeCycle Configurations 의 Script 로 설치될 수 있도록 설정함. 
%load_ext autoreload
%autoreload 2
import sys
import IPython

#install_needed = True
install_needed = False

if install_needed:
    print("===> Installing deps and restarting kernel. Please change 'install_needed = False' and run this code cell again.")
    !{sys.executable} -m pip install -U "nbformat" "argparse" "torchvision==0.14.1"  "awscli==1.27.68" "boto3==1.26.68" "botocore==1.29.68" "datasets==1.18.4" "sagemaker==2.143.0" "s3fs==0.4.2" "s3transfer==0.6.0" "transformers==4.17.0" "nvidia-cublas-cu11==11.10.3.66" "nvidia-cuda-nvrtc-cu11==11.7.99" "nvidia-cuda-runtime-cu11==11.7.99" "nvidia-cudnn-cu11==8.5.0.96"  
    IPython.Application.instance().kernel.do_shutdown(True)

# MLOps with SageMaker Pipelines


## Prerequisites

Reference: 

https://github.com/gonsoomoon-ml/SageMaker-Pipelines-Step-By-Step
https://github.com/gonsoomoon-ml/SageMaker-Pipelines-Step-By-Step/tree/main/phase01
https://github.com/gonsoomoon-ml/SageMaker-Pipelines-Step-By-Step/tree/main/phase02

- SageMaker Pipelines SDK: https://docs.aws.amazon.com/sagemaker/latest/dg/pipelines-sdk.html
- Caching Pipeline Steps: https://docs.aws.amazon.com/sagemaker/latest/dg/pipelines-caching.html
- AWS AIML Blog: Use a SageMaker Pipeline Lambda step for lightweight model deployments: https://aws.amazon.com/de/blogs/machine-learning/use-a-sagemaker-pipeline-lambda-step-for-lightweight-model-deployments/

[Check]
- ROLE :  sagemaker role arn: arn:aws:iam::353411055907:role/service-role/AmazonSageMaker-ExecutionRole-20230315T235247
- `AmazonSageMakerFullAccess`와 `AmazonSageMakerPipelinesIntegrations` policy 필수 

In [3]:
import boto3
import os
import numpy as np
import sagemaker
import sys
import time

import sagemaker
import sagemaker.huggingface
from sagemaker.huggingface import HuggingFace, HuggingFaceModel

from sagemaker.workflow.parameters import ParameterInteger, ParameterFloat, ParameterString

from sagemaker.lambda_helper import Lambda

from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.huggingface.processing import HuggingFaceProcessor

from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import CacheConfig, ProcessingStep

from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

from sagemaker.processing import ScriptProcessor
from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.step_collections import CreateModelStep, RegisterModel

from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo,ConditionGreaterThanOrEqualTo
from sagemaker.workflow.condition_step import ConditionStep
from sagemaker.workflow.functions import JsonGet

from sagemaker.workflow.pipeline import Pipeline, PipelineExperimentConfig
from sagemaker.workflow.execution_variables import ExecutionVariables

In [4]:
sess = sagemaker.Session()
region = sess.boto_region_name

# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sagemaker_session.default_bucket()}")
print(f"sagemaker session region: {sagemaker_session.boto_region_name}")

sagemaker role arn: arn:aws:iam::353411055907:role/service-role/AmazonSageMaker-ExecutionRole-20230315T235247
sagemaker bucket: sagemaker-us-east-1-353411055907
sagemaker session region: us-east-1


### S3 bucket 에 올려둔 필요한 src , utils 현재 작업 중인 directory 로 가져오기 (for the Scheduling Job by Sagemaker Studio Notebook Jobs)

In [5]:
import boto3
import os
import zipfile

# S3 URI 설정
s3_uri = 's3://sagemaker-us-east-1-353411055907/GP-LJP-mlops/LJP_MLops.zip'

# 현재 작업 중인 디렉토리 경로 가져오기
current_dir = os.getcwd()

# S3에서 파일 다운로드
s3 = boto3.client('s3')
bucket, key = s3_uri.split('//')[1].split('/', 1)
s3.download_file(bucket, key, os.path.join(current_dir, os.path.basename(key)))

# 압축 해제
zip_file = os.path.join(current_dir, os.path.basename(key))
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall(current_dir)


## 1. Defining the Pipeline
---

### 1.1. Pipeline parameters

References:  https://docs.aws.amazon.com/sagemaker/latest/dg/build-and-manage-parameters.html

In [6]:
# S3 prefix where every assets will be stored
s3_prefix = "GP-LJP-mlops"

# s3 bucket used for storing assets and artifacts
bucket = sagemaker_session.default_bucket()

# aws region used
region = sagemaker_session.boto_region_name

# base name prefix for sagemaker jobs (training, processing, inference)
base_job_prefix = s3_prefix

# Cache configuration for workflow
cache_config = CacheConfig(enable_caching=True, expire_after="7d")

# package versions
transformers_version = '4.17.0'
pytorch_version = '1.10.2'
py_version = "py38"

model_id_ = "lawcompany/KLAID_LJP_base"
tokenizer_id_ = "lawcompany/KLAID_LJP_base"
dataset_name_ = "lawcompany/KLAID"

model_id = ParameterString(name="ModelId", default_value=model_id_)
tokenizer_id = ParameterString(name="TokenizerId", default_value=tokenizer_id_)
dataset_name = ParameterString(name="DatasetName", default_value=dataset_name_)

### 1.2. Processing Step

빌트인 `SKLearnProcessor`를 통해 전처리 스텝을 정의

References: 
- AWS AIML Blog: https://aws.amazon.com/ko/blogs/machine-learning/use-deep-learning-frameworks-natively-in-amazon-sagemaker-processing/
- 개발자 가이드: https://docs.aws.amazon.com/ko_kr/sagemaker/latest/dg/build-and-manage-steps.html#step-type-processing

In [7]:
from sagemaker.processing import ProcessingInput, ProcessingOutput, ScriptProcessor
from sagemaker.workflow.parameters import ParameterString, ParameterInteger
from sagemaker.workflow.steps import ProcessingStep
import os


data_processing_script_py = "./src/collecting_data.py"  

# S3 버킷 이름과 경로 설정
s3_bucket = bucket  # S3 버킷 이름
s3_prefix = "GP-LJP-mlops"  # S3에 저장할 폴더 이름


# 데이터 전처리 스크립트 파일과 출력 경로 설정
data_processing_script = "./src/collecting_data.py"  # 데이터 전처리를 위한 Python 스크립트 파일
output_data_path = f"s3://{s3_bucket}/{s3_prefix}/data/collected_data"  # 전처리된 데이터를 저장할 S3 경


# file_name 파라미터 정의
from datetime import datetime 
# 현재 날짜와 시간을 문자열로 변환 (예: '2023-08-04 12:00:00')
current_date_time_str = datetime.now().strftime('%Y-%m-%d %H')
file_name = ParameterString(name='FileName', default_value='data_{current_date_time_str}.csv')

# SageMaker SKLearnProcessor 생성
sklearn_processor = SKLearnProcessor(
    framework_version='0.23-1',  # 사전 정의된 scikit-learn 버전 지정
    role=role,  # 미리 생성한 IAM 역할 ARN을 사용
    instance_count=1,
    instance_type='ml.m5.xlarge',
)


# SageMaker Processing Job 정의
step_data_collection = ProcessingStep(
    name='DataProcessing',
    processor=sklearn_processor,
    inputs=[
        ProcessingInput(
            source=f"s3://{s3_bucket}/{s3_prefix}/labels.csv",
            destination="/opt/ml/processing/input",
        ),
        # 다른 입력 데이터에 대한 설정 추가 (필요에 따라)
    ],
    outputs=[
        ProcessingOutput(
            source="/opt/ml/processing/output",
            destination=output_data_path,
            output_name='file_name',
        ),
        ProcessingOutput(
            source="/opt/ml/processing/processed_output",
            destination=output_data_path,
        )
    ],
    code=data_processing_script,
)



In [8]:
processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge")
processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
processing_script = ParameterString(name="ProcessingScript", default_value="./src/processing_sklearn.py")

In [9]:
processing_output_destination = f"s3://{bucket}/{s3_prefix}/data"

sklearn_processor = SKLearnProcessor(
    instance_type="ml.m5.xlarge", 
    instance_count=processing_instance_count,
    framework_version="1.0-1",    
    base_job_name=base_job_prefix + "-preprocessing",
    sagemaker_session=sagemaker_session,    
    role=role
)

step_process = ProcessingStep(
    name="ProcessDataForTraining",
    #cache_config=cache_config,
    processor=sklearn_processor,
    inputs=[
        # 여기에 step_data_collection의 file_name을 전달해줍니다.
        ProcessingInput(
            input_name='file_name',
            source=step_data_collection.properties.ProcessingOutputConfig.Outputs['file_name'].S3Output.S3Uri,
            destination="/opt/ml/processing/file_name",
        ),
    ],
        
    job_arguments=["--model_id", model_id_,
                   "--tokenizer_id", tokenizer_id_,
                   "--dataset_name", dataset_name_,
                   "--transformers_version", transformers_version,
                   "--pytorch_version", pytorch_version,
                   #"--file_name", file_name  # file_name을 job_arguments로 추가
                   
                  ],
    outputs=[
        ProcessingOutput(
            output_name="train",
            destination=f"{processing_output_destination}/train",
            source="/opt/ml/processing/train",
        ),
        ProcessingOutput(
            output_name="validation",
            destination=f"{processing_output_destination}/test",
            source="/opt/ml/processing/validation",
        ),
        ProcessingOutput(
            output_name="test",
            destination=f"{processing_output_destination}/test",
            source="/opt/ml/processing/test",
        )        
    ],
    code="./src/processing_sklearn.py"
)

In [10]:
import datetime
file_name = f"data_{datetime.datetime.now().strftime('%Y-%m-%d %H')}.csv"
print(file_name)

data_2023-08-07 12.csv


### 1.3. Model Training Step

이전 랩에서 진행한 훈련 스크립트를 그대로 활용하여 훈련 스텝을 정의합니다. SageMaker Pipelines에 적용하기 위해 워크플로 파라메터(`ParameterInteger, ParameterFloat, ParameterString`)도 같이 정의합니다.

훈련, 검증 및 테스트 데이터에 대한 S3 경로는 이전 랩처럼 수동으로 지정하는 것이 아니라 체인으로 연결되는 개념이기에, 아래 예시처럼 전처리 스텝 결괏값(`step_process`)의 프로퍼티(`properties`)를 참조하여 지정해야 합니다.
```python
"train": TrainingInput(
    s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri
)
```

#### Training Parameter

In [11]:
# training step parameters
training_entry_point = ParameterString(name="TrainingEntryPoint", default_value="train.py")
training_source_dir = ParameterString(name="TrainingSourceDir", default_value="./src")
training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.p3.8xlarge")
training_instance_count = ParameterInteger(name="TrainingInstanceCount", default_value=1)

# hyperparameters, which are passed into the training job
n_gpus = ParameterString(name="NumGPUs", default_value="1")
epochs = ParameterString(name="Epochs", default_value="1")
seed = ParameterString(name="Seed", default_value="42")
train_batch_size = ParameterString(name="TrainBatchSize", default_value="1")
eval_batch_size = ParameterString(name="EvalBatchSize", default_value="2")           
learning_rate = ParameterString(name="LearningRate", default_value="5e-5") 

model_id = ParameterString(name="ModelId", default_value=model_id_)
tokenizer_id = ParameterString(name="TokenizerId", default_value=tokenizer_id_)
dataset_name = ParameterString(name="DatasetName", default_value=dataset_name_)

In [12]:
hyperparameters = {
    'n_gpus': n_gpus,                       # number of GPUs per instance
    'epochs': epochs,                       # number of training epochs
    'seed': seed,                           # seed
    'train_batch_size': train_batch_size,   # batch size for training
    'eval_batch_size': eval_batch_size,     # batch size for evaluation
    'warmup_steps': 0,                      # warmup steps
    'learning_rate': learning_rate,         # learning rate used during training
    'tokenizer_id': model_id,               # pre-trained tokenizer
    'model_id': tokenizer_id                # pre-trained model
} 

chkpt_s3_path = f's3://{bucket}/{s3_prefix}/processing/checkpoints'

In [13]:
huggingface_estimator = HuggingFace(
    entry_point="train.py",
    source_dir="./src",
    base_job_name=base_job_prefix + "-training",
    instance_type="ml.p3.8xlarge",
    instance_count=training_instance_count,
    role=role,
    transformers_version=transformers_version,
    pytorch_version=pytorch_version,
    py_version=py_version,
    hyperparameters=hyperparameters,
    sagemaker_session=sagemaker_session,    
    disable_profiler=True,
    debugger_hook_config=False,
    checkpoint_s3_uri=chkpt_s3_path,
    checkpoint_local_path='/opt/ml/checkpoints'
)

step_train = TrainingStep(
    name="TrainModel",
    estimator=huggingface_estimator,
    inputs={
        "train": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                "train"
            ].S3Output.S3Uri
        ),
        "test": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                "test"
            ].S3Output.S3Uri
        ),
    },
    cache_config=cache_config,
)

### 1.4. Model evaluation Step

훈련된 모델의 성능을 평가하기 위해 추가 `ProcessingStep`을 정의합니다. 평가 결과에 따라 모델이 생성, 등록 및 배포되거나 파이프라인이 중단됩니다.
평가 결과는 `PropertyFile`에 복사되며, 이는 이후 `ConditionStep`에서 사용됩니다.

#### Evaluation Parameter

In [14]:
evaluation_script = ParameterString(name="EvaluationScript", default_value="./src/evaluate.py")
evaluation_instance_type = ParameterString(name="EvaluationInstanceType", default_value="ml.m5.xlarge")
evaluation_instance_count = ParameterInteger(name="EvaluationInstanceCount", default_value=1)

#### Evaluator

In [15]:
!pygmentize ./src/evaluate.py

[34mimport[39;49;00m [04m[36msubprocess[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36msys[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mjson[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mlogging[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mpathlib[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mtarfile[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m[37m[39;49;00m
[37m[39;49;00m
[34mimport[39;49;00m [04m[36mnumpy[39;49;00m [34mas[39;49;00m [04m[36mnp[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mpandas[39;49;00m [34mas[39;49;00m [04m[36mpd[39;49;00m[37m[39;49;00m
[37m[39;49;00m
[37m[39;49;00m
logger = logging.getLogger()[37m[39;49;00m
logger.setLevel(logging.INFO)[37m[39;49;00m
logger.addHandler(logging.StreamHandler())[37m[39;49;00m
[37m[39;49;00m
[34mif[39;49;00m [31m__name__[39;49;00m == [33m"[39;49;00m[33m__main__[39;49;00m[33m"[

In [16]:
script_eval = SKLearnProcessor(
    framework_version="1.0-1",
    instance_type="ml.m5.xlarge",
    instance_count=evaluation_instance_count,
    base_job_name=base_job_prefix + "-evaluation",
    sagemaker_session=sagemaker_session,
    role=role,
)

evaluation_report = PropertyFile(
    name="EvaluationReport",
    output_name="evaluation",
    path="evaluation.json",
)

step_eval = ProcessingStep(
    name="EvalLoss",
    processor=script_eval,
    inputs=[
        ProcessingInput(
            source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
            destination="/opt/ml/processing/model",
        )
    ],
    outputs=[
        ProcessingOutput(
            output_name="evaluation",
            source="/opt/ml/processing/evaluation",
            destination=f"s3://{bucket}/{s3_prefix}/evaluation_report",
        ),
    ],
    code="./src/evaluate.py",
    property_files=[evaluation_report],
    cache_config=cache_config,
)

### 1.5. Register the model

훈련된 모델은 모델 패키지 그룹(Model Package Group)의 모델 레지스트리(Model Registry)에 등록됩니다. 모델 레지스트리는 SageMaker Pipelines에서 소개된 개념으로, 기존 SageMaker 모델과 다르게 모델 버전 관리가 가능하며 승인 여부를 지정할 수 있습니다. 모델 승인은 `ConditionStep`의 조건을 만족할 때에만 가능하게 할 수 있습니다. (예: 정확도가 95% 이상인 경우에만 모델 배포)

In [17]:
model = HuggingFaceModel(
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    role=role,
    transformers_version=transformers_version,
    pytorch_version=pytorch_version,
    py_version=py_version,
    sagemaker_session=sagemaker_session,
)
model_package_group_name = "LJPModelPackageGroup"
step_register = RegisterModel(
    name="RegisterModel",
    model=model,
    content_types=["application/json"],
    response_types=["application/json"],
    inference_instances=["ml.m5.xlarge", "ml.g4dn.xlarge"],
    transform_instances=["ml.m5.xlarge", "ml.g4dn.xlarge"],
    model_package_group_name=model_package_group_name,
    approval_status="Approved",
)

### 1.6. Model Deployment


`LambdaStep`에서 파생된 커스텀 단계 `ModelDeployment`를 생성합니다. LambdaStep에서 정의한 Lambda 함수를 통해 호스팅 리얼타임 엔드포인트를 배포합니다.

In [18]:
!pygmentize utils/deploy_step.py

[34mimport[39;49;00m [04m[36mtime[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mjson[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mboto3[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m[37m[39;49;00m
[34mfrom[39;49;00m [04m[36msagemaker[39;49;00m[04m[36m.[39;49;00m[04m[36mworkflow[39;49;00m[04m[36m.[39;49;00m[04m[36mstep_collections[39;49;00m [34mimport[39;49;00m StepCollection[37m[39;49;00m
[34mfrom[39;49;00m [04m[36msagemaker[39;49;00m[04m[36m.[39;49;00m[04m[36mworkflow[39;49;00m[04m[36m.[39;49;00m[04m[36m_utils[39;49;00m [34mimport[39;49;00m _RegisterModelStep[37m[39;49;00m
[34mfrom[39;49;00m [04m[36msagemaker[39;49;00m[04m[36m.[39;49;00m[04m[36mlambda_helper[39;49;00m [34mimport[39;49;00m Lambda[37m[39;49;00m
[34mfrom[39;49;00m [04m[36msagemaker[39;49;00m[04m[36m.[39;49;00m[04m[36mworkflow[39;49;00m[04m[36m.[39;49;00m[04m[36mlambda_step[39;49;00m [34

In [19]:
# custom Helper Step for ModelDeployment
from utils.deploy_step import ModelDeployment

# we will use the iam role from the notebook session for the created endpoint
# this role will be attached to our endpoint and need permissions, e.g. to download assets from s3
sagemaker_endpoint_role=sagemaker.get_execution_role()
model_n_ = "lawcompany/LJP"
model_name = f"{model_n_.split('/')[-1]}-{time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())}"

step_deployment = ModelDeployment(
    model_name=model_name,
    registered_model=step_register.steps[0],
    endpoint_instance_type="ml.m5.xlarge",
    sagemaker_endpoint_role=sagemaker_endpoint_role,
    autoscaling_policy=None,
)

Using ARN from existing role: sagemaker-pipelines-model-deployment-role


### 1.7. Condition for deployment

`ConditionStep`을 통해 모델 평가 결과를 검사합니다. 정확도가 일정 이상일 때(accuracy > 0.95) 모델 등록 및 배포 파이프라인을 진행합니다.

#### Condition Parameter

In [20]:
threshold_accuracy = ParameterFloat(name="ThresholdAccuracy", default_value=0.95)

#### Condition

In [21]:
cond_gte = ConditionGreaterThanOrEqualTo(
    left=JsonGet(
        step_name=step_eval.name,
        property_file=evaluation_report,
        json_path="eval_accuracy",
    ),
    right=threshold_accuracy,
)

step_cond = ConditionStep(
    name="CheckEvalAccuracy",
    conditions=[cond_gte],
    if_steps=[step_register, step_deployment],
    else_steps=[],
)

<br>

## 2. Pipeline definition and execution

---

모든 스텝을 정의하였다면 파이프라인을 정의합니다. 

파이프라인 인스턴스는 이름(`name`), 파라메터(`parameters`), 및 스텝(`steps`)으로 구성됩니다. 
- 파이프라인 이름: (AWS 계정, 리전) 쌍 내에서 고유해야 합니다 
- 파라메터: 스텝 정의에 사용했던 모든 파라메터들을 파이프라인에서 정의해야 합니다. 
- 스텝: 리스트 형태로 이전 스텝들을 정의합니다. 내부적으로 데이터 종속성을 사용하여 각 스텝 간의 관계를 DAG으로 정의하기 때문에 실행 순서대로 나열할 필요는 없습니다.

In [22]:
pipeline = Pipeline(
    name=f"LJP-Pipeline",
    parameters=[
        file_name,
        model_id,
        tokenizer_id,        
        dataset_name,
        processing_instance_type,
        processing_instance_count,
        processing_script,
        training_entry_point,
        training_source_dir,
        training_instance_type,
        training_instance_count,
        evaluation_script,
        evaluation_instance_type,
        evaluation_instance_count,
        threshold_accuracy,
        n_gpus,
        epochs,
        seed,
        eval_batch_size,
        train_batch_size,
        learning_rate,
    ],
    steps=[step_data_collection, step_process, step_train, step_eval, step_cond],
    sagemaker_session=sagemaker_session,
)

#### Check the pipeline definition


In [23]:
import json

definition = json.loads(pipeline.definition())
definition

Popping out 'CertifyForMarketplace' from the pipeline definition since it will be overridden in pipeline execution time.


{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'ModelId',
   'Type': 'String',
   'DefaultValue': 'lawcompany/KLAID_LJP_base'},
  {'Name': 'TokenizerId',
   'Type': 'String',
   'DefaultValue': 'lawcompany/KLAID_LJP_base'},
  {'Name': 'DatasetName',
   'Type': 'String',
   'DefaultValue': 'lawcompany/KLAID'},
  {'Name': 'ProcessingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.xlarge'},
  {'Name': 'ProcessingInstanceCount', 'Type': 'Integer', 'DefaultValue': 1},
  {'Name': 'ProcessingScript',
   'Type': 'String',
   'DefaultValue': './src/processing_sklearn.py'},
  {'Name': 'TrainingEntryPoint', 'Type': 'String', 'DefaultValue': 'train.py'},
  {'Name': 'TrainingSourceDir', 'Type': 'String', 'DefaultValue': './src'},
  {'Name': 'TrainingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.p3.8xlarge'},
  {'Name': 'TrainingInstanceCount', 'Type': 'Integer', 'DefaultValue': 1},
  {'Name': 'EvaluationScript',
   'Type': 'String',
   'DefaultValue':

In [24]:
pipeline.upsert(role_arn=role)

Popping out 'CertifyForMarketplace' from the pipeline definition since it will be overridden in pipeline execution time.
Popping out 'CertifyForMarketplace' from the pipeline definition since it will be overridden in pipeline execution time.


{'PipelineArn': 'arn:aws:sagemaker:us-east-1:353411055907:pipeline/ljp-pipeline',
 'ResponseMetadata': {'RequestId': '785a4a05-4d39-4fac-b8e9-ec7e749bb769',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '785a4a05-4d39-4fac-b8e9-ec7e749bb769',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '80',
   'date': 'Mon, 07 Aug 2023 12:59:19 GMT'},
  'RetryAttempts': 0}}

### Run the pipeline

파이프라인을 실행합니다.

In [25]:
execution = pipeline.start()

In [26]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:353411055907:pipeline/ljp-pipeline',
 'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:353411055907:pipeline/ljp-pipeline/execution/anu8oatvmh7v',
 'PipelineExecutionDisplayName': 'execution-1691413160174',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2023, 8, 7, 12, 59, 20, 68000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2023, 8, 7, 12, 59, 20, 68000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:353411055907:user-profile/d-irdpcbrlhtyb/default-1680148488767',
  'UserProfileName': 'default-1680148488767',
  'DomainId': 'd-irdpcbrlhtyb'},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:353411055907:user-profile/d-irdpcbrlhtyb/default-1680148488767',
  'UserProfileName': 'default-1680148488767',
  'DomainId': 'd-irdpcbrlhtyb'},
 'ResponseMetadata': {'RequestId': 'b6b44c9d-8310-4057-a941-b1b7d13b6264',
  'HTTPStatusCode': 200,
  'HTTPHeade

In [27]:
execution.wait()

WaiterError: Waiter PipelineExecutionComplete failed: Waiter encountered a terminal failure state: For expression "PipelineExecutionStatus" we matched expected path: "Failed"

실행된 스텝들을 리스트업합니다.

In [None]:
execution.list_steps()

<br>

## Clean up
---

과금을 방지하기 위해 사용하지 않는 리소스를 삭제합니다. 아래 코드셀은 Lambda 함수와 엔드포인트를 삭제합니다. 

In [None]:
sm_client = boto3.client("sagemaker")

# Delete the Lambda function
step_deployment.func.delete()

# Endpoint 는 그냥 추후에 미사용시 삭제 
# Delete the endpoint
#hf_predictor.delete_endpoint()