### Import statements and declare parameters and constants

In [8]:
import boto3 
import pandas as pd 
import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession 

s3_client = boto3.resource('s3')
pipeline_name = f"sagemaker-mlops-fraud-inference-pipeline" 

import sagemaker

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name 
role = sagemaker.get_execution_role()
pipeline_session = PipelineSession()
default_bucket = sagemaker_session.default_bucket()
model_package_group_name = f"FraudModelPackageGroup"

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [9]:
from sagemaker.workflow.parameters import ( 
 ParameterInteger, 
 ParameterString, 
 ParameterFloat) 

bucket_name = 'eliezerraj-908671954593-dataset'
prefix_name = 'payment'
file_name = 'payment.csv'

input_dataset = 's3://{}/{}/{}'.format(bucket_name, prefix_name, file_name)
model_path = f"s3://{bucket_name}/{prefix_name}/output"
batch_data_path = "s3://{}/{}/data/batch/batch.csv".format(bucket_name, prefix_name,)

print("input_dataset : ",input_dataset)
print("batch_data_path : ",batch_data_path)
print("model_path : ",model_path)

base_job_prefix = "fraud-model"

processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)

processing_instance_type = ParameterString( name="ProcessingInstanceType", default_value="ml.m4.xlarge")
transform_instance_type = ParameterString(name="TransformInstanceType", default_value="ml.m4.xlarge")

transform_instance_count = ParameterInteger(name="TransformInstanceCount", default_value=1)

model_approval_status = ParameterString( name="ModelApprovalStatus", default_value="PendingManualApproval")

input_dataset :  s3://eliezerraj-908671954593-dataset/payment/payment.csv
batch_data_path :  s3://eliezerraj-908671954593-dataset/payment/data/batch/batch.csv
model_path :  s3://eliezerraj-908671954593-dataset/payment/output


### Get info about aproved models

In [11]:
sm_client = boto3.client("sagemaker") 

# get a list of approved model packages from the model package group you specified earlier
approved_model_packages = sm_client.list_model_packages(
      ModelApprovalStatus='Approved',
      ModelPackageGroupName=model_package_group_name,
      SortBy='CreationTime',
      SortOrder='Descending'
  )

# find the latest approved model package
try:
    latest_approved_model_package_arn = approved_model_packages['ModelPackageSummaryList'][0]['ModelPackageArn']
except Exception as e:
    print("Failed to retrieve an approved model package:", e)

print("latest_approved_model_package_arn: ",latest_approved_model_package_arn)

# retrieve required information about the model
latest_approved_model_package_descr =  sm_client.describe_model_package(ModelPackageName = latest_approved_model_package_arn)

# model artifact uri (tar.gz file)
model_artifact_uri = latest_approved_model_package_descr['InferenceSpecification']['Containers'][0]['ModelDataUrl']
# sagemaker image in ecr
image_uri = latest_approved_model_package_descr['InferenceSpecification']['Containers'][0]['Image']

print("model_artifact_uri: ",model_artifact_uri)
print("image_uri: ",image_uri)

latest_approved_model_package_arn:  arn:aws:sagemaker:us-east-2:908671954593:model-package/FraudModelPackageGroup/4
model_artifact_uri:  s3://eliezerraj-908671954593-dataset/payment/output/y17j1cxmga28-HyperPar-6guVCDEOrA-002-0521b70b/output/model.tar.gz
image_uri:  257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3


In [5]:
### Step 5: Define create model step

In [6]:
from sagemaker import Model
from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.model_step import ModelStep
from time import gmtime, strftime

model_name = "xgboost-ec2-fraud-v3-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

env_vars = {"SAGEMAKER_CONTAINER_LOG_LEVEL": "20", 
            "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false"}

model = Model(
            name=model_name,
            image_uri=image_uri,
            model_data=model_artifact_uri,
            sagemaker_session=pipeline_session,
            role=role,
            env=env_vars 
        )

step_create_model = ModelStep(
                name="FraudPublishModelEC2",
                step_args=model.create(instance_type="ml.m4.large",
                                       accelerator_type="ml.eia1.medium"),
)

print("step_create_model: ", step_create_model)

step_create_model:  ModelStep(name='FraudPublishModelEC2', steps=[CreateModelStep(name='FraudPublishModelEC2-CreateModel', display_name=None, description=None, step_type=<StepTypeEnum.CREATE_MODEL: 'Model'>, depends_on=None)])




In [7]:
from sagemaker import Model
from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.model_step import ModelStep
from time import gmtime, strftime

model_name = "xgboost-serverless-fraud-v3-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

env_vars = {"SAGEMAKER_CONTAINER_LOG_LEVEL": "20", 
            "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false"}

model = Model(
            name=model_name,
            image_uri=image_uri,
            model_data=model_artifact_uri,
            sagemaker_session=pipeline_session,
            role=role,
            env=env_vars 
        )

step_create_model_serveless = ModelStep(
                                        name="FraudPublishModelServeless",
                                        step_args=model.create(instance_type="SingleModel"),
)

print("step_create_model_serveless: ", step_create_model_serveless)

step_create_model_serveless:  ModelStep(name='FraudPublishModelServeless', steps=[CreateModelStep(name='FraudPublishModelServeless-CreateModel', display_name=None, description=None, step_type=<StepTypeEnum.CREATE_MODEL: 'Model'>, depends_on=None)])


In [54]:
from sagemaker.workflow.pipeline import Pipeline

pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        processing_instance_type,
        processing_instance_count,
        transform_instance_type,
        transform_instance_count,
    ],
    steps=[ step_create_model,
            step_create_model_serveless],
) 

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [53]:
# Create a new or update existing Pipeline
pipeline.upsert(role_arn=role)
# start Pipeline execution
pipeline.start()

Popping out 'ModelName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.
Popping out 'ModelName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.
Popping out 'ModelName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.
Popping out 'ModelName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.


_PipelineExecution(arn='arn:aws:sagemaker:us-east-2:908671954593:pipeline/sagemaker-mlops-fraud-inference-pipeline/execution/628edctlpdhr', sagemaker_session=<sagemaker.session.Session object at 0x7fc53e6e92d0>)