In [2]:
# Import libraries
import os
import boto3
import sagemaker

from sagemaker import get_execution_role

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [3]:
# Preconfigurations
region = boto3.Session().region_name
role = get_execution_role()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [4]:
print(region)

us-west-2


In [5]:
print(role)

arn:aws:iam::031114635023:role/service-role/AmazonSageMaker-ExecutionRole-20231112T223041


In [6]:
# Create S3 bucket to store model artifact
bucket = 'sagemaker-tcga-catboost'
bucket_path = "https://s3-{}.amazonaws.com/{}".format(region, bucket)

print(bucket)
print(bucket_path)

sagemaker-tcga-catboost
https://s3-us-west-2.amazonaws.com/sagemaker-tcga-catboost


In [7]:
# Install same version of CatBoost as the model using conda-forge
!pip install catboost

Collecting catboost
  Obtaining dependency information for catboost from https://files.pythonhosted.org/packages/15/95/951b51229a9c8af767e48b1f9ef7baa87279b1f5847d2f85de0855578e5d/catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl.metadata
  Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.0/47.0 kB[0m [31m965.6 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: graphviz, catboost
Successfully installed catboost-1.2.2 graphviz-0.20.1
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -

In [8]:
model_file_name = 'tcga-catboost-classifier'

In [9]:
import joblib
from catboost import CatBoostClassifier

mymodel = joblib.load(model_file_name)

In [10]:
import pandas as pd

test_data = pd.read_csv('tcga-catboost-test.csv', sep=',')

mymodel.predict(test_data)

array([[0],
       [2],
       [2],
       [1],
       [3]])

In [12]:
# Create a model artifact
mymodel.save_model(model_file_name)

In [13]:
!tar czvf model.tar.gz $model_file_name

tcga-catboost-classifier


In [16]:
# Upload created model artifact to the S3 bucket

prefix = "sagemaker"

fObj = open("model.tar.gz", "rb")
key = os.path.join(prefix, model_file_name, "model.tar.gz")
print(key)
boto3.Session().resource("s3").Bucket(bucket).Object(key).upload_fileobj(fObj)

sagemaker/tcga-catboost-classifier/model.tar.gz


In [31]:
# Create a SageMaker model and using the prebuilt image to host our own model
from sagemaker import image_uris

train_model_id, train_model_version, train_scope = "catboost-classification-model", "1.2", "training"
training_instance_type = "ml.t3.medium"

container = image_uris.retrieve(
    region=region,
    framework=None,
    model_id=train_model_id,
    model_version=train_model_version,
    image_scope=train_scope,
    instance_type=training_instance_type
)

In [33]:
# Replacing built-in model with saved model artifact from S3 bucket
model_url = 'https://sagemaker-tcga-catboost.s3.us-west-2.amazonaws.com/sagemaker/tcga-catboost-classifier/model.tar.gz'

sm_client = boto3.client("sagemaker")

primary_container = {"Image": container, "ModelDataUrl": model_url}

create_model_response = sm_client.create_model(ModelName="tcga-catboost", ExecutionRoleArn=role, PrimaryContainer=primary_container)

print(create_model_response["ModelArn"])

arn:aws:sagemaker:us-west-2:031114635023:model/tcga-catboost


In [34]:
# Create endpoint configuration
endpoint_config_name = "tcga-catboost-classifier-endpointconfig"

print(endpoint_config_name)

create_endpoint_config_response = sm_client.create_endpoint_config(EndpointConfigName=endpoint_config_name, 
                                                                  ProductionVariants=[
                                                                      {
                                                                          "InstanceType":"ml.m4.xlarge",
                                                                          "InitialInstanceCount":1,
                                                                          "InitialVariantWeight":1,
                                                                          "ModelName":"tcga-catboost",
                                                                          "VariantName":"AllTraffic",
                                                                      }
                                                                  ],
                                                                )
print("Endpoint Config Arn: " + create_endpoint_config_response["EndpointConfigArn"])

tcga-catboost-classifier-endpointconfig
Endpoint Config Arn: arn:aws:sagemaker:us-west-2:031114635023:endpoint-config/tcga-catboost-classifier-endpointconfig


In [35]:
# Create endpoint to serve up the model
import time
endpoint_name = "tcga-catboost-classifier-endpoint"
print(endpoint_name)

create_endpoint_response = sm_client.create_endpoint(EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name)
print(create_endpoint_response["EndpointArn"])

resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]

while status == "Creating":
    time.sleep(60)
    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
    status = resp["EndpointStatus"]
    print("Status: " + status)
    
print("Arn: " + resp["EndpointArn"])
print("Status: " + status)

tcga-catboost-classifier-endpoint
arn:aws:sagemaker:us-west-2:031114635023:endpoint/tcga-catboost-classifier-endpoint
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Failed
Arn: arn:aws:sagemaker:us-west-2:031114635023:endpoint/tcga-catboost-classifier-endpoint
Status: Failed


In [28]:
# Validate model for usage
runtime_client = boto3.client("runtime.sagemaker")

In [None]:
import json

test_file_name = ("tcga-catboost-test.csv")

with open(test_file_name, "r") as f:
    payload = f.read().strip()
    
print ("Payload: \n")
print(payload)

response = runtime_client.invoke_endpoint(EndpointName=endpoint_name, ContentType="text/csv", Body=payload)

print("Results: \n")

result = response["Body"].read().decode("utf-8")

print("\nPredicted Class Probabilities: {}.".format(result))