_API Reference: https://sagemaker.readthedocs.io/en/stable/workflows/pipelines/sagemaker.workflow.pipelines.html#steps_

In [1]:
!pip install sagemaker --upgrade

Collecting sagemaker
  Using cached sagemaker-2.231.0-py3-none-any.whl.metadata (16 kB)
Collecting boto3<2.0,>=1.34.142 (from sagemaker)
  Downloading boto3-1.35.14-py3-none-any.whl.metadata (6.6 kB)
Collecting sagemaker-core<2.0.0,>=1.0.0 (from sagemaker)
  Downloading sagemaker_core-1.0.3-py3-none-any.whl.metadata (4.9 kB)
Collecting botocore<1.36.0,>=1.35.14 (from boto3<2.0,>=1.34.142->sagemaker)
  Downloading botocore-1.35.14-py3-none-any.whl.metadata (5.7 kB)
Collecting mock<5.0,>4.0 (from sagemaker-core<2.0.0,>=1.0.0->sagemaker)
  Using cached mock-4.0.3-py3-none-any.whl.metadata (2.8 kB)
Using cached sagemaker-2.231.0-py3-none-any.whl (1.6 MB)
Downloading boto3-1.35.14-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sagemaker_core-1.0.3-py3-none-any.whl (377 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m377.3/377.3 kB[0m [31m36.5 MB/s[0m eta

In [14]:
import sagemaker
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import TrainingStep, ProcessingStep, CreateModelStep
from sagemaker.workflow.parameters import ParameterString
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.execution_variables import ExecutionVariables
from sagemaker.workflow.model_step import ModelStep
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator
import boto3

# 初始化 SageMaker 執行角色和區域
role = get_execution_role()
region = boto3.Session().region_name
sess = sagemaker.Session()

print(role)
print(region)
sess

arn:aws:iam::070576557102:role/service-role/AmazonSageMaker-ExecutionRole-20240512T164029
ap-northeast-1


<sagemaker.session.Session at 0x7fc376c12320>

In [7]:
# 定義參數
s3_input_data = ParameterString(name="InputData", default_value="s3://your-bucket/input-data/")
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
model_output_path = ParameterString(name="ModelOutputPath", default_value="s3://your-bucket/model-output/")


In [8]:
# 訓練模型
estimator = Estimator(
    image_uri='382416733822.dkr.ecr.us-east-1.amazonaws.com/xgboost:1.2-1',
    role=role,
    instance_count=1,
    instance_type=instance_type,
    output_path=model_output_path
)

# 定義訓練步驟
train_step = TrainingStep(
    name="TrainModel",
    estimator=estimator,
    inputs={
        'train': TrainingInput(s3_input_data, content_type="csv")
    }
)


In [26]:
from sagemaker.model import Model
from sagemaker.workflow.pipeline_context import PipelineSession

# 創建模型
model = Model(
    image_uri=estimator.image_uri,
    model_data=train_step.properties.ModelArtifacts.S3ModelArtifacts,
    role=role,
    sagemaker_session=PipelineSession()
)

model

<sagemaker.model.Model at 0x7fc37435ab00>

In [27]:
from sagemaker.workflow.model_step import ModelStep

# 定義模型部署步驟
deploy_step = ModelStep(
    name='ModelDeployment',
    step_args=model.create(instance_type="ml.m5.large"),
)



In [29]:
# 定義 Pipeline
pipeline = Pipeline(
    name="TrainingAndDeploymentPipeline",
    parameters=[s3_input_data, instance_type, model_output_path],
    steps=[train_step, deploy_step]
)

# 開始執行 Pipeline
pipeline.upsert(role_arn=role)
# execution = pipeline.start()




{'PipelineArn': 'arn:aws:sagemaker:ap-northeast-1:070576557102:pipeline/TrainingAndDeploymentPipeline',
 'ResponseMetadata': {'RequestId': '516e9c96-7cc7-4fdd-a51c-2edd86858855',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '516e9c96-7cc7-4fdd-a51c-2edd86858855',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '102',
   'date': 'Mon, 09 Sep 2024 23:26:39 GMT'},
  'RetryAttempts': 0}}

In [None]:
# 取得執行狀態
execution.describe()

# 等待 Pipeline 執行完成
execution.wait()
