In [1]:
# install SageMaker python SDK
!pip install -U sagemaker

[33mDEPRECATION: celery 4.4.0 has a non-standard dependency specifier pytz>dev. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of celery or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m

In [2]:
%load_ext autoreload
%autoreload 2
%load_ext dotenv
%dotenv

In [3]:
import sys
import os
import boto3
import sagemaker
from sagemaker.workflow.pipeline_context import  PipelineSession 

sagemaker_session = sagemaker.session.Session()
region = sagemaker_session.boto_region_name
# role = sagemaker.get_execution_role()
role = os.environ["ROLE"]
pipeline_session = PipelineSession()
# default_bucket = sagemaker_session.default_bucket()
default_bucket = os.environ["BUCKET"]
model_package_group_name = f"AbaloneModelPackageGroupName"

print(role)
print(default_bucket)


sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/kiptoo/Library/Application Support/sagemaker/config.yaml
arn:aws:iam::956763695025:role/service-role/AmazonSageMaker-ExecutionRole-20231107T213330
hillaryabalone


In [4]:
!mkdir -p data

In [5]:
local_path = "data/abalone-dataset.csv"

s3 = boto3.resource("s3")
s3.Bucket(f"sagemaker-example-files-prod-{region}").download_file("datasets/tabular/uci_abalone/abalone.csv", local_path)

base_uri = f"s3://{default_bucket}/abalone"
input_data_uri = sagemaker.s3.S3Uploader.upload(local_path=local_path, desired_s3_uri=base_uri)
print(input_data_uri)

s3://hillaryabalone/abalone/abalone-dataset.csv


In [6]:
# download a second dataset for batch transformation after model creation

local_path = "data/abalone-dataset-batch"

s3 = boto3.resource("s3")
s3.Bucket(f"sagemaker-servicecatalog-seedcode-{region}").download_file("dataset/abalone-dataset-batch", local_path)

base_uri = f"s3://{default_bucket}/abalone"
batch_data_uri = sagemaker.s3.S3Uploader.upload(local_path=local_path, desired_s3_uri=base_uri)
print(batch_data_uri)

s3://hillaryabalone/abalone/abalone-dataset-batch


In [None]:
# Define Parameters to Parametrize Pipeline Execution
# ParameterString
# ParameterInteger
# ParameterFloat
# processing_instance_count - The instance count of the processing job.
# instance_type - The ml.* instance type of the training job.
# model_approval_status - The approval status to register with the trained model for CI/CD purposes (“PendingManualApproval” is the default).
# input_data - The S3 bucket URI location of the input data.
# batch_data - The S3 bucket URI location of the batch data.
# mse_threshold - The Mean Squared Error (MSE) threshold used to verify the accuracy of a model.

from sagemaker.workflow.parameters import (ParameterString, ParameterInteger, ParameterFloat)

processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge")
model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="PendingManualApproval")
input_data = ParameterString(name="InputData", default_value=input_data_uri)
batch_data = ParameterString(name="BatchData", default_value=batch_data_uri)
mse_threshold = ParameterString(name="MseThreshold", default_value=6.0)
