# NOTE:  THIS NOTEBOOK WILL TAKE ABOUT 30 MINUTES TO COMPLETE.

# PLEASE BE PATIENT.

# Optimize Models using Automatic Model Tuning

<img src="img/hpt.png" width="90%" align="left">

In [None]:
import boto3
import sagemaker
import pandas as pd

sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name="sagemaker", region_name=region)

# Copy training and validation data to S3

In [None]:
!aws s3 cp --recursive data-tfrecord s3://$bucket/data-tfrecord/

# Setup S3 uri's for uploaded training and validation data

In [None]:
processed_train_data_s3_uri = 's3://{}/data-tfrecord/bert-train'.format(bucket)
processed_validation_data_s3_uri = 's3://{}/data-tfrecord/bert-validation'.format(bucket)
processed_test_data_s3_uri = 's3://{}/data-tfrecord/bert-test'.format(bucket)

In [None]:
print(processed_train_data_s3_uri)
!aws s3 ls --recursive $processed_train_data_s3_uri/

In [None]:
print(processed_validation_data_s3_uri)
!aws s3 ls --recursive $processed_validation_data_s3_uri/

In [None]:
print(processed_test_data_s3_uri)
!aws s3 ls --recursive $processed_test_data_s3_uri/

In [None]:
from sagemaker.inputs import TrainingInput

s3_input_train_data = TrainingInput(s3_data=processed_train_data_s3_uri, distribution="ShardedByS3Key")
s3_input_validation_data = TrainingInput(s3_data=processed_validation_data_s3_uri, distribution="ShardedByS3Key")
s3_input_test_data = TrainingInput(s3_data=processed_test_data_s3_uri, distribution="ShardedByS3Key")

print(s3_input_train_data.config)
print(s3_input_validation_data.config)
print(s3_input_test_data.config)

In [None]:
!cat src/tf_bert_reviews.py

# Setup Static Hyper-Parameters for Classification Layer

In [None]:
max_seq_length=64
epochs = 1
epsilon = 0.00000001
validation_batch_size = 128
test_batch_size = 128
train_steps_per_epoch = 50
validation_steps = 50
test_steps = 50
train_instance_count = 1
train_instance_type = "ml.c5.4xlarge"
train_volume_size = 256
use_xla = True
use_amp = True
enable_sagemaker_debugger = False
enable_checkpointing = False
enable_tensorboard = False
input_mode = "File"
run_validation = True
run_test = False
run_sample_predictions = False

# Setup Dynamic Hyper-Parameter Ranges to Explore


In [None]:
from sagemaker.tuner import IntegerParameter
from sagemaker.tuner import ContinuousParameter
from sagemaker.tuner import CategoricalParameter
from sagemaker.tuner import HyperparameterTuner

hyperparameter_ranges = {
    "learning_rate": ContinuousParameter(0.00001, 0.00005, scaling_type="Linear"),
    "train_batch_size": CategoricalParameter([128, 256]),
    "freeze_bert_layer": CategoricalParameter([True, False]),
}

# Setup Metrics

In [None]:
metrics_definitions = [
    {"Name": "train:loss", "Regex": "loss: ([0-9\\.]+)"},
    {"Name": "train:accuracy", "Regex": "accuracy: ([0-9\\.]+)"},
    {"Name": "validation:loss", "Regex": "val_loss: ([0-9\\.]+)"},
    {"Name": "validation:accuracy", "Regex": "val_accuracy: ([0-9\\.]+)"},
]

In [None]:
from sagemaker.tensorflow import TensorFlow

estimator = TensorFlow(
    entry_point="tf_bert_reviews.py",
    source_dir="src",
    role=role,
    instance_count=train_instance_count,
    instance_type=train_instance_type,
    volume_size=train_volume_size,
    py_version="py37",
    framework_version="2.3.1",
    hyperparameters={
        "epochs": epochs,
        "epsilon": epsilon,
        "validation_batch_size": validation_batch_size,
        "test_batch_size": test_batch_size,
        "train_steps_per_epoch": train_steps_per_epoch,
        "validation_steps": validation_steps,
        "test_steps": test_steps,
        "use_xla": use_xla,
        "use_amp": use_amp,
        "max_seq_length": max_seq_length,
        "enable_sagemaker_debugger": enable_sagemaker_debugger,
        "enable_checkpointing": enable_checkpointing,
        "enable_tensorboard": enable_tensorboard,
        "run_validation": run_validation,
        "run_test": run_test,
        "run_sample_predictions": run_sample_predictions,
    },
    input_mode=input_mode,
    metric_definitions=metrics_definitions,
)

# Setup HyperparameterTuner with Estimator and Hyper-Parameter Ranges

In [None]:
objective_metric_name = "train:accuracy"

tuner = HyperparameterTuner(
    estimator=estimator,
    objective_type="Maximize",
    objective_metric_name=objective_metric_name,
    hyperparameter_ranges=hyperparameter_ranges,
    metric_definitions=metrics_definitions,
    max_jobs=2,
    max_parallel_jobs=1,
    strategy="Bayesian",
    early_stopping_type="Auto",
)

# Start Tuning Job

In [None]:
tuner.fit(
    inputs={"train": s3_input_train_data, "validation": s3_input_validation_data, "test": s3_input_test_data},
    include_cls_metadata=False,
    wait=False,
)

# Check Tuning Job Status
Re-run this cell to track the status.

In [None]:
from pprint import pprint

tuning_job_name = tuner.latest_tuning_job.job_name

In [None]:
from IPython.core.display import display, HTML

display(
    HTML(
        '<b>Review <a target="blank" href="https://console.aws.amazon.com/sagemaker/home?region={}#/hyper-tuning-jobs/{}">Hyper-Parameter Tuning Job</a></b>'.format(
            region  , tuning_job_name
        )
    )
)

# _Please Wait for the ^^ Tuning Job ^^ to Complete Above_

In [None]:
%%time

tuner.wait()

# [INFO] _Feel free to continue to the next workshop section while this notebook is running._

# Show the Tuning Job
### _Note:  This will fail at first.  Please wait about 15-30 seconds and re-run._

In [None]:
from sagemaker.analytics import HyperparameterTuningJobAnalytics

hp_results = HyperparameterTuningJobAnalytics(sagemaker_session=sess, hyperparameter_tuning_job_name=tuning_job_name)

df_results = hp_results.dataframe()
df_results.shape

In [None]:
df_results.sort_values("FinalObjectiveValue", ascending=0)

# Show the Best Candidate

In [None]:
df_results.sort_values("FinalObjectiveValue", ascending=0).head(1)

# Log the Best Hyper-Parameter and Objective Metric in the Experiment

Logging `learning_rate` parameter and `accuracy` metric

In [None]:
best_learning_rate = df_results.sort_values("FinalObjectiveValue", ascending=0).head(1)["learning_rate"]
print(best_learning_rate)

In [None]:
best_accuracy = df_results.sort_values("FinalObjectiveValue", ascending=0).head(1)["FinalObjectiveValue"]
print(best_accuracy)

# Release Resources

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>