# Install dependencies

In [1]:
%pip install -U -r requirements.txt

Collecting scikit-learn==1.3.2 (from -r requirements.txt (line 2))
  Downloading scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting s3fs (from -r requirements.txt (line 3))
  Downloading s3fs-2024.6.0-py3-none-any.whl.metadata (1.6 kB)
Collecting fsspec==2024.6.0.* (from s3fs->-r requirements.txt (line 3))
  Downloading fsspec-2024.6.0-py3-none-any.whl.metadata (11 kB)
Downloading scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0mm
[?25hDownloading s3fs-2024.6.0-py3-none-any.whl (29 kB)
Downloading fsspec-2024.6.0-py3-none-any.whl (176 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.9/176.9 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, scikit-learn, s3fs
  Attempting uninstall: fsspec
    Found

# Import SageMaker Defaults Configurations

The Amazon SageMaker Python SDK supports setting of default values for AWS infrastructure primitive types, such as instance types, Amazon S3 folder locations, and IAM roles. You can override the default locations of these files by setting the `SAGEMAKER_USER_CONFIG_OVERRIDE` environment variables for the user-defined configuration file paths.

In [2]:
import os

# Use the current working directory as the location for SageMaker Python SDK config file
os.environ["SAGEMAKER_USER_CONFIG_OVERRIDE"] = os.getcwd()

# Download dataset

Download the dataset from the UCI website.

In [3]:
import urllib
import os

input_data_dir = 'data/'
if not os.path.exists(input_data_dir):
    os.makedirs(input_data_dir)
input_data_path = os.path.join(input_data_dir, 'predictive_maintenance_raw_data_header.csv')
dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00601/ai4i2020.csv"
urllib.request.urlretrieve(dataset_url, input_data_path)

('data/predictive_maintenance_raw_data_header.csv',
 <http.client.HTTPMessage at 0x7fa05de746a0>)

In [4]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv(input_data_path)

print('The shape of the dataset is:', df.shape)

The shape of the dataset is: (10000, 14)


# Test case 1: Deploy SageMaker Endpoint

## Expected result: The user should not be able to deploy an endpoint

### Important! Make sure to execute [01_SageMaker_Jobs.ipynb](./01_SageMaker_Jobs.ipynb)

In [10]:
import time
import os
import sagemaker
from sagemaker import get_execution_role, session
import boto3

In [11]:
sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name

role = get_execution_role()
s3_bucket_name = sagemaker_session.default_bucket()

sm_client = boto3.client('sagemaker', region_name=region)

Utility function for getting the last succeed job name

In [12]:
job_prefix = "amzn-sm-btd-train"

In [13]:
def get_last_job_name(job_name_prefix):
    import boto3
    sagemaker_client = boto3.client('sagemaker')

    search_response = sagemaker_client.search(
        Resource='TrainingJob',
        SearchExpression={
            'Filters': [
                {
                    'Name': 'TrainingJobName',
                    'Operator': 'Contains',
                    'Value': job_name_prefix
                },
                {
                    'Name': 'TrainingJobStatus',
                    'Operator': 'Equals',
                    'Value': "Completed"
                }
            ]
        },
        SortBy='CreationTime',
        SortOrder='Descending',
        MaxResults=1)

    return search_response['Results'][0]['TrainingJob']['TrainingJobName']

In [14]:
job_name = get_last_job_name(job_prefix)

job_name

'amzn-sm-btd-train-2024-06-19-10-33-37-466'

Create model package specifications

In [15]:
from sagemaker.image_uris import retrieve

In [16]:
image_uri = retrieve("xgboost", region=region, version="latest", image_scope="inference")

image_uri

'811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest'

In [17]:
from sagemaker.model import Model

In [19]:
# Specify the model source
model_url = f"s3://{s3_bucket_name}/{job_name}/{job_name}/output/model.tar.gz"

model = Model(image_uri=image_uri, model_data=model_url, role=role)

model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    endpoint_name=f"{job_prefix}-endpoint"
)

ClientError: An error occurred (AccessDeniedException) when calling the CreateEndpointConfig operation: User: arn:aws:sts::430368689283:assumed-role/AmazonSageMakerExecutionRole-data-science-us-east-1/SageMaker is not authorized to perform: sagemaker:CreateEndpointConfig on resource: arn:aws:sagemaker:us-east-1:430368689283:endpoint-config/amzn-sm-btd-train-endpoint because no identity-based policy allows the sagemaker:CreateEndpointConfig action

In [None]:
response = sm_client.delete_model_package(
    ModelPackageName=model_package_arn
)

response