Install sagemaker correct versions

In [80]:
!pip install sagemaker==2.140.1
!pip install sagemaker-experiments
from IPython.display import clear_output
clear_output()

Setup Sagemaker session

In [81]:
import boto3
import sagemaker
from sagemaker import get_execution_role
print(sagemaker.__version__)
import sys
import IPython

role = get_execution_role()
sess = sagemaker.Session()
region = boto3.session.Session().region_name
print("Region = {}".format(region))
sm = boto3.Session().client('sagemaker')

2.140.1
Region = us-east-1


get the Amazon S3 buckets and folders for the project

In [82]:
rawbucket= sess.default_bucket() # Alternatively you can use our custom bucket here. 

prefix = 'sagemaker-modelmonitor' # use this prefix to store all files pertaining to this workshop.

dataprefix = prefix + '/data'

Test the model on Endpoint for realtime inference

In [83]:
from sagemaker.model_monitor import DataCaptureConfig
from sagemaker.predictor import csv_serializer

sm_client = boto3.client('sagemaker')

latest_training_job = sm_client.list_training_jobs(MaxResults=1,
                                                SortBy='CreationTime',
                                                SortOrder='Descending')

training_job_name=TrainingJobName=latest_training_job['TrainingJobSummaries'][0]['TrainingJobName']

training_job_description = sm_client.describe_training_job(TrainingJobName=training_job_name)

model_data = training_job_description['ModelArtifacts']['S3ModelArtifacts']
container_uri = training_job_description['AlgorithmSpecification']['TrainingImage']

# create a model.
def create_model(role, model_name, container_uri, model_data):
    return sm_client.create_model(
        ModelName=model_name,
        PrimaryContainer={
        'Image': container_uri,
        'ModelDataUrl': model_data,
        },
        ExecutionRoleArn=role)
    

try:
    model = create_model(role, training_job_name, container_uri, model_data)
except Exception as e:
        sm_client.delete_model(ModelName=training_job_name)
        model = create_model(role, training_job_name, container_uri, model_data)
        

print('Model created: '+model['ModelArn'])

Model created: arn:aws:sagemaker:us-east-1:686810899741:model/sagemaker-xgboost-2023-04-25-18-33-28-097


In [84]:
# Endpoint Config name
endpoint_config_name = f"{training_job_name}-endpoint-config"

# Endpoint config parameters
production_variant_dict = {
                           "VariantName": "Alltraffic",
                           "ModelName": training_job_name,
                           "InitialInstanceCount": 1,
                           "InstanceType": "ml.m5.xlarge",
                           "InitialVariantWeight": 1
                          }

# Define bucket path
write_bucket = sess.default_bucket()
write_prefix = "breast-cancer-demo"
data_capture_key = f"{write_prefix}/data-capture"
data_capture_uri = f"s3://{write_bucket}/{data_capture_key}"

# Data capture config parameters
data_capture_config_dict = {
                            "EnableCapture": True,
                            "InitialSamplingPercentage": 100,
                            "DestinationS3Uri": data_capture_uri,
                            "CaptureOptions": [{"CaptureMode" : "Input"}, {"CaptureMode" : "Output"}]
                           }


# Create endpoint config if one with the same name does not exist
endpoint_config_matches = sm_client.list_endpoint_configs(NameContains=endpoint_config_name)["EndpointConfigs"]
if not endpoint_config_matches:
    endpoint_config_response = sm_client.create_endpoint_config(
                                                                EndpointConfigName=endpoint_config_name,
                                                                ProductionVariants=[production_variant_dict],
                                                                DataCaptureConfig=data_capture_config_dict
                                                               )
else:
    print(f"Endpoint config with name {endpoint_config_name} already exists! Change endpoint config name to create new")

Endpoint config with name sagemaker-xgboost-2023-04-25-18-33-28-097-endpoint-config already exists! Change endpoint config name to create new


In [85]:
endpoint_name = f"{training_job_name}-endpoint"

endpoint_matches = sm_client.list_endpoints(NameContains=endpoint_name)["Endpoints"]
if not endpoint_matches:
    endpoint_response = sm_client.create_endpoint(
                                                  EndpointName=endpoint_name,
                                                  EndpointConfigName=endpoint_config_name
                                                 )
else:
    print(f"Endpoint with name {endpoint_name} already exists! Change endpoint name to create new")

resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]
while status == "Creating":
    print(f"Endpoint Status: {status}...")
    time.sleep(60)
    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
    status = resp["EndpointStatus"]
print(f"Endpoint Status: {status}")

Endpoint with name sagemaker-xgboost-2023-04-25-18-33-28-097-endpoint already exists! Change endpoint name to create new
Endpoint Status: InService


In [86]:
sm_runtime_client = boto3.client("sagemaker-runtime")

# Fetch test data to run predictions with the endpoint
test_data_location = sess.upload_data('data/test_data.csv', bucket=rawbucket, key_prefix=dataprefix)
test_df = pd.read_csv(test_data_location, names = [str(x) for x in range(14)])
test_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,2,12.86,1.35,2.32,18.0,122.0,1.51,1.25,0.21,0.94,4.1,0.76,1.29,630.0
1,2,13.32,3.24,2.38,21.5,92.0,1.93,0.76,0.45,1.25,8.42,0.55,1.62,650.0
2,0,12.93,3.8,2.65,18.6,102.0,2.41,2.41,0.25,1.98,4.5,1.03,3.52,770.0
3,2,13.62,4.95,2.35,20.0,92.0,2.0,0.8,0.47,1.02,4.4,0.91,2.05,550.0
4,1,11.96,1.09,2.3,21.0,101.0,3.38,2.14,0.13,1.65,3.21,0.99,3.13,886.0


In [87]:
# For content type text/csv, payload should be a string with commas separating the values for each feature
# This is the inference request serialization step
# CSV serialization
csv_file = io.StringIO()
test_sample = test_df.drop(["0"], axis=1)
test_sample.to_csv(csv_file, sep=",", header=False, index=False)
payload = csv_file.getvalue()
response = sm_runtime_client.invoke_endpoint(
                                             EndpointName=endpoint_name,
                                             Body=payload,
                                             ContentType="text/csv",
                                             Accept="text/csv"
                                            )

# This is the inference response deserialization step
# This is a bytes object
result = response["Body"].read()
# Decoding bytes to a string
result = result.decode("utf-8")
# Converting to list of predictions
result = re.split(",|\n",result)
result.pop()
print("result: " + json.dumps(result))

result: ["2.0", "2.0", "0.0", "2.0", "1.0", "0.0", "0.0", "1.0", "1.0", "1.0", "2.0", "1.0", "0.0", "2.0", "1.0", "1.0", "0.0", "1.0", "0.0", "1.0", "0.0", "2.0", "0.0", "0.0", "0.0", "2.0", "2.0", "0.0", "1.0", "0.0", "2.0", "2.0", "1.0", "2.0", "2.0", "2.0"]


In [89]:
# result = [(int) (x) for x in result]
output_df = pd.DataFrame({"Predicted":result})
output_df['Label'] = test_df["0"].values
output_df

Unnamed: 0,Predicted,Label
0,2.0,2
1,2.0,2
2,0.0,0
3,2.0,2
4,1.0,1
5,0.0,0
6,0.0,0
7,1.0,1
8,1.0,1
9,1.0,1


In [90]:
from sklearn.metrics import confusion_matrix, accuracy_score
confusion_matrix = pd.crosstab(output_df['Predicted'], output_df['Label'], rownames=['Actual'], colnames=['Predicted'], margins = True)
confusion_matrix

Predicted,0,1,2,All
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,11,1,0,12
1.0,1,9,1,11
2.0,0,1,12,13
All,12,11,13,36


In [29]:
# Delete model
sm_client.delete_model(ModelName=training_job_name)

# Delete endpoint configuration
sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)

# Delete endpoint
sm_client.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': '43597a6c-9de6-4969-b712-a5ee26bce39b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '43597a6c-9de6-4969-b712-a5ee26bce39b',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Tue, 25 Apr 2023 19:54:30 GMT'},
  'RetryAttempts': 0}}

In [78]:
json_fields = {
    "alcohol":[14.23],
    "malic_acid":[1.71],
    "ash":[2.43],
    "alcalinity_of_ash":[15.6],
    "magnesium":[127.0],
    "total_phenols":[2.80],
    "flavanoids":[3.06],
    "nonflavanoid_phenols":[0.28],
    "proanthocyanins":[2.29],
    "color_intensity":[5.64],
    "hue":[1.04],
    "od280/od315_of_diluted_wines":[3.92],
    "proline":[1065.0]}

In [79]:
pd.DataFrame.from_dict(json_fields)

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
