### Notebook Env Details
`kernel`: Python 3
`image`: Data Science 3.0
`instance`: ml.t3.medium

<div style="background-color: darkgreen; font-size: 20px; color: white;">
Setup

In [5]:
# Ensure updated SageMaker SDK version
%pip install -U -q sagemaker

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
awscli 1.29.63 requires botocore==1.31.63, but you have botocore 1.33.8 which is incompatible.
awscli 1.29.63 requires s3transfer<0.8.0,>=0.7.0, but you have s3transfer 0.8.2 which is incompatible.
distributed 2022.7.0 requires tornado<6.2,>=6.0.3, but you have tornado 6.3.3 which is incompatible.[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [6]:
import os
import boto3
import re
import sagemaker
import time

role = sagemaker.get_execution_role()
region = boto3.Session().region_name
s3_client = boto3.client("s3")

# Data S3 paths (for Data Subset)
data_bucket = f"sagemaker-us-east-1-717145514721"
train_data_prefix = "nyc-taxi/data/processed/troubleshoot-data-subset"
val_data_prefix = "nyc-taxi/data/processed/validation"
data_bucket_path = f"s3://{data_bucket}"

# S3 bucket for saving code and model artifacts.
output_bucket = sagemaker.Session().default_bucket()
output_prefix = "nyc-taxi/prototype-runs"
output_bucket_path = f"s3://{output_bucket}"

# for data_category in ["train", "test", "validation"]:
#     data_key = "{0}/{1}/abalone.{1}".format(data_prefix, data_category)
#     output_key = "{0}/{1}/abalone.{1}".format(output_prefix, data_category)
#     data_filename = "abalone.{}".format(data_category)
#     s3_client.download_file(data_bucket, data_key, data_filename)
#     s3_client.upload_file(data_filename, output_bucket, output_key)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


<div style="background-color: darkgreen; font-size: 20px; color: white;">
Model HP-Tuning

<div style="background-color: teal; font-size: 15px; color: white;">
Setting Variables

In [7]:
from time import gmtime, strftime, sleep

In [8]:
tuning_job_name = "xgb-HP-tune-run" + strftime("%d-%H-%M-%S", gmtime())

container = sagemaker.image_uris.retrieve("xgboost", region, "1.7-1")
client = boto3.client("sagemaker", region_name=region)

<div style="background-color: teal; font-size: 15px; color: white;">
Tuning Configs

In [31]:
tuning_job_config = {
    "ParameterRanges": {
        "CategoricalParameterRanges": [],
        "ContinuousParameterRanges": [
            {
                "MaxValue": "0.5",
                "MinValue": "0.1",
                "Name": "eta",
            },
            {
                "MaxValue": "5",
                "MinValue": "0",
                "Name": "gamma",
            },
            {
                "MaxValue": "120",
                "MinValue": "0",
                "Name": "min_child_weight",
            },
            {
                "MaxValue": "1",
                "MinValue": "0.5",
                "Name": "subsample",
            },
            {
                "MaxValue": "2",
                "MinValue": "0",
                "Name": "alpha",
            },
        ],
        "IntegerParameterRanges": [
            {
                "MaxValue": "10",
                "MinValue": "0",
                "Name": "max_depth",
            },
            {
                "MaxValue": "4000",
                "MinValue": "1",
                "Name": "num_round",
            },
        ],
    },
    # SageMaker sets the following default limits for resources used by automatic model tuning:
    # https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning-limits.html
    "ResourceLimits": {
        # Increase the max number of training jobs for increased accuracy (and training time).
        "MaxNumberOfTrainingJobs": 3,
        # Change parallel training jobs run by AMT to reduce total training time. Constrained by your account limits.
        # if max_jobs=max_parallel_jobs then Bayesian search turns to Random.
        "MaxParallelTrainingJobs": 1,
    },
    "Strategy": "Bayesian",
    "HyperParameterTuningJobObjective": {"MetricName": "validation:rmse", "Type": "Minimize"},
}

<div style="background-color: teal; font-size: 15px; color: white;">
Train Job Configs

In [32]:
training_job_definition = {
    "AlgorithmSpecification": {"TrainingImage": container, "TrainingInputMode": "File"},
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": f"{data_bucket_path}/{train_data_prefix}",
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "csv",
            "CompressionType": "None",
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": f"{data_bucket_path}/{val_data_prefix}",
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "csv",
            "CompressionType": "None",
        },
    ],
    "OutputDataConfig": {"S3OutputPath": f"{output_bucket_path}/{output_prefix}/single-xgboost"},
    "ResourceConfig": {"InstanceCount": 1, "InstanceType": "ml.m5.2xlarge", "VolumeSizeInGB": 5},
    "RoleArn": role,
    "StaticHyperParameters": {
        "objective": "reg:linear",
        "verbosity": "2",
    },
    "StoppingCondition": {"MaxRuntimeInSeconds": 43200},
}

<div style="background-color: teal; font-size: 15px; color: white;">
Run HP-Tuning

In [33]:
print(
    f"Creating a tuning job with name: {tuning_job_name}. It will take between 12 and 17 minutes to complete."
)
client.create_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=tuning_job_name,
    HyperParameterTuningJobConfig=tuning_job_config,
    TrainingJobDefinition=training_job_definition,
)

status = client.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)[
    "HyperParameterTuningJobStatus"
]
print(status)
while status != "Completed" and status != "Failed":
    time.sleep(60)
    status = client.describe_hyper_parameter_tuning_job(
        HyperParameterTuningJobName=tuning_job_name
    )["HyperParameterTuningJobStatus"]
    print(status)

Creating a tuning job with name: xgb-HP-tune-run05-23-08-02. It will take between 12 and 17 minutes to complete.
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
Completed


<div style="background-color: teal; font-size: 15px; color: white;">
Inspecting Model

Now we can go to SageMaker Console -> Training -> Hyperparameter Tuning Jobs - and look for the completed job which will show the best-run HP set from the job. We will use these configurations when we scale training on the entire dataset.