In [1]:
import datetime
import os
import sagemaker
from sagemaker import get_execution_role
from sagemaker.tensorflow import TensorFlow
from sagemaker.tuner import HyperparameterTuner, IntegerParameter, CategoricalParameter, ContinuousParameter

In [2]:
dt_now = datetime.datetime.now()
TUNING_JOB_NAME = 'tuning-job-' + dt_now.strftime('%Y-%m-%d-%H-%M-%S')
print(TUNING_JOB_NAME)

tuning-job-2022-02-23-08-52-26


In [3]:
sagemaker_session = sagemaker.Session()
role = get_execution_role()
region = sagemaker_session.boto_session.region_name

In [4]:
training_data_uri = 's3://sensor-data-keisuke-nakata/sample=64/'

In [5]:
estimator = TensorFlow(entry_point='jobs/train.py',
                             role=role,
                             instance_count=2,
                             instance_type='ml.m5.large',
                             framework_version='2.2',
                             py_version='py37',
                             metric_definitions=[
                                   {'Name': 'test_loss', 'Regex': 'test_loss=(.*?);'},
                                   {'Name': 'test_acc', 'Regex': 'test_acc=(.*?);'}
                             ]
                      )

In [6]:
# estimator.fit(training_data_uri)

In [7]:
hyperparameter_ranges = {
    'batch_size': IntegerParameter(4, 64),
    "learning_rate": ContinuousParameter(0.00001, 0.001),
    "num_epoch": IntegerParameter(20, 100),
}

objective_metric_name = "test_loss"
objective_type = "Minimize"
metric_definitions = [{"Name": "test_loss", "Regex": "test_loss= ([0-9\\.]+)"}]

In [8]:
# Enable early stopping
tuner = HyperparameterTuner(estimator=estimator,                  
                               objective_metric_name=objective_metric_name,
                               hyperparameter_ranges=hyperparameter_ranges,
                               metric_definitions=metric_definitions,
                               max_jobs=100,
                               max_parallel_jobs=10,
                               objective_type=objective_type,
                               early_stopping_type='Auto')

In [9]:
# Start hyperparameter tuning job
tuner.fit(training_data_uri, job_name=TUNING_JOB_NAME)

.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................!


# Hyper parameter tuning

In [10]:
import boto3
import sagemaker
import os

region = boto3.Session().region_name
sage_client = boto3.Session().client("sagemaker")

## You must have already run a hyperparameter tuning job to analyze it here.
## The Hyperparameter tuning jobs you have run are listed in the Training section on your SageMaker dashboard.
## Copy the name of a completed job you want to analyze from that list.
## For example: tuning_job_name = 'mxnet-training-201007-0054'.
tuning_job_name = TUNING_JOB_NAME

In [11]:
# run this cell to check current status of hyperparameter tuning job
tuning_job_result = sage_client.describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=tuning_job_name
)

status = tuning_job_result["HyperParameterTuningJobStatus"]
if status != "Completed":
    print("Reminder: the tuning job has not been completed.")

job_count = tuning_job_result["TrainingJobStatusCounters"]["Completed"]
print("%d training jobs have completed" % job_count)

objective = tuning_job_result["HyperParameterTuningJobConfig"]["HyperParameterTuningJobObjective"]
is_minimize = objective["Type"] != "Maximize"
objective_name = objective["MetricName"]

96 training jobs have completed


In [12]:
from pprint import pprint

if tuning_job_result.get("BestTrainingJob", None):
    print("Best model found so far:")
    pprint(tuning_job_result["BestTrainingJob"])
else:
    print("No training jobs have reported results yet.")

Best model found so far:
{'CreationTime': datetime.datetime(2022, 2, 23, 9, 16, 41, tzinfo=tzlocal()),
 'FinalHyperParameterTuningJobObjectiveMetric': {'MetricName': 'test_loss',
                                                 'Value': 0.3200856149196625},
 'ObjectiveStatus': 'Succeeded',
 'TrainingEndTime': datetime.datetime(2022, 2, 23, 9, 21, 11, tzinfo=tzlocal()),
 'TrainingJobArn': 'arn:aws:sagemaker:ap-northeast-1:410464928824:training-job/tuning-job-2022-02-23-08-52-26-057-f9175735',
 'TrainingJobName': 'tuning-job-2022-02-23-08-52-26-057-f9175735',
 'TrainingJobStatus': 'Completed',
 'TrainingStartTime': datetime.datetime(2022, 2, 23, 9, 19, 26, tzinfo=tzlocal()),
 'TunedHyperParameters': {'batch_size': '17',
                          'learning_rate': '0.0004857053242728049',
                          'num_epoch': '99'}}


In [13]:
import pandas as pd

tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)

full_df = tuner.dataframe()

if len(full_df) > 0:
    df = full_df[full_df["FinalObjectiveValue"] > -float("inf")]
    if len(df) > 0:
        df = df.sort_values("FinalObjectiveValue", ascending=is_minimize)
        print("Number of training jobs with valid objective: %d" % len(df))
        print({"lowest": min(df["FinalObjectiveValue"]), "highest": max(df["FinalObjectiveValue"])})
        pd.set_option("display.max_colwidth", None)  # Don't truncate TrainingJobName
    else:
        print("No training jobs have reported valid results yet.")

df

Number of training jobs with valid objective: 100
{'lowest': 0.3200856149196625, 'highest': 1.086159348487854}


Unnamed: 0,batch_size,learning_rate,num_epoch,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
43,17.0,0.000486,99.0,tuning-job-2022-02-23-08-52-26-057-f9175735,Completed,0.320086,2022-02-23 09:19:26+00:00,2022-02-23 09:21:11+00:00,105.0
97,16.0,0.000783,31.0,tuning-job-2022-02-23-08-52-26-003-dadefd95,Completed,0.322759,2022-02-23 08:55:11+00:00,2022-02-23 08:57:26+00:00,135.0
27,16.0,0.000344,88.0,tuning-job-2022-02-23-08-52-26-073-d5b137b4,Completed,0.327311,2022-02-23 09:28:13+00:00,2022-02-23 09:29:51+00:00,98.0
47,14.0,0.000497,100.0,tuning-job-2022-02-23-08-52-26-053-fbc9a4c1,Completed,0.330481,2022-02-23 09:18:23+00:00,2022-02-23 09:20:11+00:00,108.0
54,17.0,0.000382,99.0,tuning-job-2022-02-23-08-52-26-046-3ac3d664,Completed,0.332115,2022-02-23 09:14:55+00:00,2022-02-23 09:16:31+00:00,96.0
...,...,...,...,...,...,...,...,...,...
77,4.0,0.000760,65.0,tuning-job-2022-02-23-08-52-26-023-2a81827c,Completed,0.751077,2022-02-23 09:04:26+00:00,2022-02-23 09:06:52+00:00,146.0
73,4.0,0.000646,58.0,tuning-job-2022-02-23-08-52-26-027-529490fd,Completed,0.851517,2022-02-23 09:05:11+00:00,2022-02-23 09:07:44+00:00,153.0
91,28.0,0.000018,83.0,tuning-job-2022-02-23-08-52-26-009-507807cd,Completed,0.981569,2022-02-23 08:55:28+00:00,2022-02-23 08:56:55+00:00,87.0
98,63.0,0.000031,29.0,tuning-job-2022-02-23-08-52-26-002-0fc73699,Completed,0.983553,2022-02-23 08:55:24+00:00,2022-02-23 08:56:35+00:00,71.0


In [14]:
import bokeh
import bokeh.io

bokeh.io.output_notebook()
from bokeh.plotting import figure, show
from bokeh.models import HoverTool


class HoverHelper:
    def __init__(self, tuning_analytics):
        self.tuner = tuning_analytics

    def hovertool(self):
        tooltips = [
            ("FinalObjectiveValue", "@FinalObjectiveValue"),
            ("TrainingJobName", "@TrainingJobName"),
        ]
        for k in self.tuner.tuning_ranges.keys():
            tooltips.append((k, "@{%s}" % k))

        ht = HoverTool(tooltips=tooltips)
        return ht

    def tools(self, standard_tools="pan,crosshair,wheel_zoom,zoom_in,zoom_out,undo,reset"):
        return [self.hovertool(), standard_tools]


hover = HoverHelper(tuner)

p = figure(plot_width=1400, plot_height=600, tools=hover.tools(), x_axis_type="datetime")
p.circle(source=df, x="TrainingStartTime", y="FinalObjectiveValue")
show(p)

In [15]:
ranges = tuner.tuning_ranges
figures = []
for hp_name, hp_range in ranges.items():
    categorical_args = {}
    if hp_range.get("Values"):
        # This is marked as categorical.  Check if all options are actually numbers.
        def is_num(x):
            try:
                float(x)
                return 1
            except:
                return 0

        vals = hp_range["Values"]
        if sum([is_num(x) for x in vals]) == len(vals):
            # Bokeh has issues plotting a "categorical" range that's actually numeric, so plot as numeric
            print("Hyperparameter %s is tuned as categorical, but all values are numeric" % hp_name)
        else:
            # Set up extra options for plotting categoricals.  A bit tricky when they're actually numbers.
            categorical_args["x_range"] = vals

    # Now plot it
    p = figure(
        plot_width=500,
        plot_height=500,
        title="Objective vs %s" % hp_name,
        tools=hover.tools(),
        x_axis_label=hp_name,
        y_axis_label=objective_name,
        **categorical_args,
    )
    p.circle(source=df, x=hp_name, y="FinalObjectiveValue")
    figures.append(p)
show(bokeh.layouts.Column(*figures))