<div style="background-color: darkgreen; font-size: 20px; color: white;">
Setup

In [17]:
!pip install --upgrade sagemaker

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [18]:
!pip install xgboost

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [19]:
import os
import boto3
import re
import sagemaker

role = sagemaker.get_execution_role()
region = sagemaker.Session().boto_region_name

bucket = 'sagemaker-us-east-1-717145514721'
prefix = 'nyc-taxi'

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


<div style="background-color: teal; font-size: 15px; color: white;">
Load Model from Stored Weights

In [35]:
xgboost_image = sagemaker.image_uris.retrieve("xgboost", region, "1.7-1")
weights_s3_path = 's3://sagemaker-us-east-1-717145514721/nyc-taxi/xgboost-runs/output/sagemaker-xgboost-2023-12-06-17-25-59-061/output/model.tar.gz'

xgb_model = sagemaker.model.Model(
    image_uri=xgboost_image,
    model_data=weights_s3_path,
    role=role)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


<div style="background-color: teal; font-size: 15px; color: white;">
Load Data Sample

In [22]:
import sagemaker

data_bucket_s3_uri = 's3://sagemaker-us-east-1-717145514721/nyc-taxi/data/processed/'

# Filter directory for csv files
csv_files = [
    x for x in sagemaker.s3.S3Downloader.list(data_bucket_s3_uri) if x[-4:] == ".csv"
]

# Download one csv file
sagemaker.s3.S3Downloader.download(csv_files[0], "demo_data")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


['demo_data/part-00000-eec8fbb0-c78f-49ce-bd23-9d83b6323664-c000.csv']

In [23]:
import glob
import pandas as pd

# Find the file that matches the directory
csv_file = glob.glob("demo_data/*.csv")[0]

column_headers = [
    "day_of_week",
    "month",
    "hour",
    "pickup_location_id",
    "dropoff_location_id",
    "trip_distance",
    "fare_amount",
]

raw_dataset = pd.read_csv(csv_file, names=column_headers)
raw_dataset.head()

Unnamed: 0,day_of_week,month,hour,pickup_location_id,dropoff_location_id,trip_distance,fare_amount
0,0.01,2,4,17,74,194,7.97
1,0.01,3,4,13,201,93,20.89
2,0.01,4,4,5,93,78,9.44
3,0.01,6,1,8,198,37,2.5
4,0.1,3,1,21,242,242,0.05


<div style="background-color: darkgreen; font-size: 20px; color: white;">
Inference Script

In [24]:
%%writefile scripts/xgb_inference.py

import json
import os
import pickle as pkl

import numpy as np

import sagemaker_xgboost_container.encoder as xgb_encoders


def model_fn(model_dir):
    """
    Deserialize and return fitted model.
    """
    model_file = "xgboost-model"
    booster = pkl.load(open(os.path.join(model_dir, model_file), "rb"))
    return booster


def input_fn(request_body, request_content_type):
    """
    The SageMaker XGBoost model server receives the request data body and the content type,
    and invokes the `input_fn`.

    Return a DMatrix (an object that can be passed to predict_fn).
    """
    if request_content_type == "text/csv":
        return xgb_encoders.libsvm_to_dmatrix(request_body)
    else:
        raise ValueError(
            "Content type {} is not supported.".format(request_content_type)
        )


def predict_fn(input_data, model):
    """
    SageMaker XGBoost model server invokes `predict_fn` on the return value of `input_fn`.

    Return a two-dimensional NumPy array where the first columns are predictions
    and the remaining columns are the feature contributions (SHAP values) for that prediction.
    """
    prediction = model.predict(input_data)
    feature_contribs = model.predict(input_data, pred_contribs=True, validate_features=False)
    output = np.hstack((prediction[:, np.newaxis], feature_contribs))
    return output


def output_fn(predictions, content_type):
    """
    After invoking predict_fn, the model server invokes `output_fn`.
    """
    if content_type == "text/csv":
        return ','.join(str(x) for x in predictions[0])
    else:
        raise ValueError("Content type {} is not supported.".format(content_type))


Overwriting scripts/xgb_inference.py


<div style="background-color: teal; font-size: 15px; color: white;">
Deploy

In [42]:
from sagemaker.xgboost.model import XGBoostModel

model_data = xgb_model.model_data
script_path = "scripts/xgb_inference.py"
print(model_data)

xgb_inference_model = XGBoostModel(
    model_data=model_data,
    role=role,
    entry_point=script_path,
    framework_version="1.7-1",
)

s3://sagemaker-us-east-1-717145514721/nyc-taxi/xgboost-runs/output/sagemaker-xgboost-2023-12-06-17-25-59-061/output/model.tar.gz
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [43]:
predictor = xgb_inference_model.deploy(
    initial_instance_count=1,
    instance_type="ml.c5.xlarge",
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
----!

<div style="background-color: teal; font-size: 15px; color: white;">
Delete Endpoint

In [44]:
predictor.delete_endpoint()