In [13]:
import os
import boto3
import sagemaker
import pandas as pd
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput
from sagemaker.image_uris import retrieve

sess = sagemaker.Session()
role = get_execution_role()
bucket = sess.default_bucket()
training_image = retrieve("linear-learner", boto3.Session().region_name, "1")
print(training_image)

382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:1


In [14]:
%store -r X_train
%store -r X_test
%store -r X_val
%store -r Y_train
%store -r Y_test
%store -r Y_val

The very first thing we need to do is get back a reference of the training inputs. If we uploaded to our S3 then we would only get a reference.

In [15]:
data_dir = "data/"
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

prefix = "boston-dataset"
pd.concat([Y_train, X_train], axis=1).to_csv(
    os.path.join(data_dir, "train.csv"), header=False, index=False
)
pd.concat([Y_val, X_val], axis=1).to_csv(
    os.path.join(data_dir, "validation.csv"), header=False, index=False
)

val_location = sess.upload_data(os.path.join(data_dir, "validation.csv"), key_prefix=prefix)
train_location = sess.upload_data(os.path.join(data_dir, "train.csv"), key_prefix=prefix)
s3_input_train = TrainingInput(s3_data=train_location, content_type="text/csv")
s3_input_validation = TrainingInput(s3_data=val_location, content_type="text/csv")


In [16]:
s3_input_train

<sagemaker.inputs.TrainingInput at 0x7fddf37deeb8>

Now we need to do the same as before and configure the training job!

In [None]:



model = sagemaker.estimator.Estimator(
    training_image,
    role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path="s3://{}/{}/output".format(sess.default_bucket(), prefix),
    sagemaker_session=sess,
)
model.set_hyperparameters(
    feature_dim=X_train.shape[1], predictor_type="regressor", mini_batch_size=X_train.shape[0]
)


And now the beautiful part. In SageMaker you only need configure a HyperparameterTuner class sending the ranges dict. This dict specifies for each tunable parameter what are the ranges, not all algorithms allow you to tune whatever parameter.
Finally we define an objective metric and the optimization we want to do. If this were a built-in algorithm, then we would need to specify the metric from a regex on the stdout

In [18]:
from time import gmtime, strftime
from sagemaker.tuner import (
    IntegerParameter,
    CategoricalParameter,
    ContinuousParameter,
    HyperparameterTuner,
)

tuning_job_name = "linear-boston-{}".format(strftime("%d-%H-%M-%S", gmtime()))

hyperparameter_ranges = {
    "learning_rate": ContinuousParameter(0.00001, 1.0),
    "l1": ContinuousParameter(0.00001, 1.0)
}

objective_metric_name = "validation:objective_loss:final"

tuner = HyperparameterTuner(
    model,
    objective_metric_name,
    hyperparameter_ranges,
    objective_type="Minimize",
    max_jobs=4,
    max_parallel_jobs=2,
)


Now we are ready. We just fit the tuner and wait for it to run, the result will be many trained models, one per


In [19]:
tuner.fit(
    {"train": s3_input_train, "validation": s3_input_validation},
    job_name=tuning_job_name,
    include_cls_metadata=False,
)
tuner.wait()


.................................................!
!


SageMaker exposes a HyperparameterTuningJobAnalytics class to get back a dataframe with information on the job! Let's analise it.

In [20]:
tuner_metrics = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)
tuner_metrics.dataframe().sort_values(["FinalObjectiveValue"], ascending=False).head(5)

Unnamed: 0,l1,learning_rate,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,0.065174,4.5e-05,linear-boston-07-11-56-22-002-69ce0863,Completed,1.062063,2021-09-07 11:59:06+00:00,2021-09-07 12:00:03+00:00,57.0
1,0.074492,0.018764,linear-boston-07-11-56-22-001-1796d7e1,Completed,0.424222,2021-09-07 11:59:16+00:00,2021-09-07 12:00:18+00:00,62.0


In [21]:
total_time = tuner_metrics.dataframe()["TrainingElapsedTimeSeconds"].sum() / 3600
print("The total training time is {:.2f} hours".format(total_time))
tuner_metrics.dataframe()["TrainingJobStatus"].value_counts()


The total training time is 0.03 hours


Completed    2
Name: TrainingJobStatus, dtype: int64