In [None]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

# Specify the S3 Location of the Features

In [None]:
%store -r processed_train_data_s3_uri

In [None]:
print(processed_train_data_s3_uri)

In [None]:
%store -r processed_validation_data_s3_uri

In [None]:
print(processed_validation_data_s3_uri)

In [None]:
%store -r processed_test_data_s3_uri

In [None]:
print(processed_test_data_s3_uri)

In [None]:
print(processed_train_data_s3_uri)
!aws s3 ls $processed_train_data_s3_uri/

In [None]:
print(processed_validation_data_s3_uri)
!aws s3 ls $processed_validation_data_s3_uri/

In [None]:
print(processed_test_data_s3_uri)
!aws s3 ls $processed_test_data_s3_uri/

In [None]:
s3_input_train_data = sagemaker.s3_input(s3_data=processed_train_data_s3_uri, 
                                         distribution='ShardedByS3Key') 
s3_input_validation_data = sagemaker.s3_input(s3_data=processed_validation_data_s3_uri, 
                                              distribution='ShardedByS3Key')
s3_input_test_data = sagemaker.s3_input(s3_data=processed_test_data_s3_uri, 
                                        distribution='ShardedByS3Key')

print(s3_input_train_data.config)
print(s3_input_validation_data.config)
print(s3_input_test_data.config)

In [None]:
!cat src/tf_bert_reviews.py

# Setup Hyper-Parameters for Classification Layer
First, retrieve `max_seq_length` from the prepare phase.

In [None]:
%store -r max_seq_length

In [None]:
print(max_seq_length)

In [None]:
epochs=1
learning_rate=0.00001
epsilon=0.00000001
train_batch_size=128
validation_batch_size=128
test_batch_size=128
train_steps_per_epoch=1000
validation_steps=1000
test_steps=1000
train_instance_count=1
train_instance_type='ml.p3.8xlarge'
train_volume_size=1024
use_xla=True
use_amp=True
freeze_bert_layer=True
input_mode='Pipe'
run_validation=True
run_test=True
run_sample_predictions=True

# Setup Metrics

In [None]:
metrics_definitions = [
     {'Name': 'train:loss', 'Regex': 'loss: ([0-9\\.]+)'},
     {'Name': 'train:accuracy', 'Regex': 'accuracy: ([0-9\\.]+)'},
     {'Name': 'validation:loss', 'Regex': 'val_loss: ([0-9\\.]+)'},
     {'Name': 'validation:accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'},
]

In [None]:
from sagemaker.tensorflow import TensorFlow

estimator = TensorFlow(entry_point='tf_bert_reviews.py',
                       source_dir='src',
                       role=role,
                       train_instance_count=train_instance_count, # Make sure you have at least this number of input files or the ShardedByS3Key distibution strategy will fail the job due to no data available
                       train_instance_type=train_instance_type,
                       train_volume_size=train_volume_size,
                       py_version='py3',
                       framework_version='2.1.0',
                       hyperparameters={'epochs': epochs,
                                        'learning_rate': learning_rate,
                                        'epsilon': epsilon,
                                        'train_batch_size': train_batch_size,
                                        'validation_batch_size': validation_batch_size,
                                        'test_batch_size': test_batch_size,                                             
                                        'train_steps_per_epoch': train_steps_per_epoch,
                                        'validation_steps': validation_steps,
                                        'test_steps': test_steps,
                                        'use_xla': use_xla,
                                        'use_amp': use_amp,                                             
                                        'max_seq_length': max_seq_length,
                                        'freeze_bert_layer': freeze_bert_layer,
                                        'run_validation': run_validation,
                                        'run_test': run_test,
                                        'run_sample_predictions': run_sample_predictions},
                       input_mode=input_mode,
                       metric_definitions=metrics_definitions,
                       train_max_run=7200 # max 2 hours * 60 minutes seconds per hour * 60 seconds per minute
                      )

# Setup Hyper-Parameter Ranges to Explore


In [None]:
from sagemaker.tuner import IntegerParameter
from sagemaker.tuner import ContinuousParameter
from sagemaker.tuner import CategoricalParameter
from sagemaker.tuner import HyperparameterTuner
                                                
hyperparameter_ranges = {
    'epochs': IntegerParameter(2, 16, scaling_type='Logarithmic'),
    'learning_rate': ContinuousParameter(0.00001, 0.00005, scaling_type='Linear'),
    'train_batch_size': CategoricalParameter([128, 256, 512]),
    'freeze_bert_layer': CategoricalParameter([True, False])
}

objective_metric_name = 'validation:accuracy'

tuner = HyperparameterTuner(
    estimator=estimator,
    objective_type='Maximize',
    objective_metric_name=objective_metric_name,
    hyperparameter_ranges=hyperparameter_ranges,
    metric_definitions=metrics_definitions,
    max_jobs=2,
    max_parallel_jobs=1,
    strategy='Bayesian',
    early_stopping_type='Auto'
)

In [None]:
tuner.fit(inputs={'train': s3_input_train_data, 
                  'validation': s3_input_validation_data,
                  'test': s3_input_test_data
          }, 
          include_cls_metadata=False)

# Check Tuning Job Status
Re-run this cell to track the status.

In [None]:
from pprint import pprint

tuning_job_name = tuner.latest_tuning_job.job_name

job_description = sm.describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=tuning_job_name
)

status = job_description['HyperParameterTuningJobStatus']

print('\n')
print(status)
print('\n')
pprint(job_description)

if status != 'Completed':
    job_count = job_description['TrainingJobStatusCounters']['Completed']
    print('Not yet complete, but {} jobs have completed.')
    
    if job_description.get('BestTrainingJob', None):
        print("Best candidate:")
        pprint(job_description['BestTrainingJob']['TrainingJobName'])
        pprint(job_description['BestTrainingJob']['FinalHyperParameterTuningJobObjectiveMetric'])
    else:
        print("No training jobs have reported results yet.")    

In [None]:
from IPython.core.display import display, HTML
    
display(HTML('<b>Review <a href="https://console.aws.amazon.com/sagemaker/home?region={}#/hyper-tuning-jobs/{}">Hyper-Parameter Tuning Job</a></b>'.format(region, tuning_job_name)))

# _Please Wait for the ^^ Tuning Job ^^ to Complete Above_

In [None]:
tuner.wait()

# Show the Tuning Job
### _Note:  This will fail at first.  Please wait about 15-30 seconds and re-run._

In [None]:
# Note:  This will fail at first.  Please wait about 15-30 seconds and re-run.
    
from sagemaker.analytics import HyperparameterTuningJobAnalytics

hp_results = HyperparameterTuningJobAnalytics(
    sagemaker_session=sess, 
    hyperparameter_tuning_job_name=tuning_job_name
)

df_results = hp_results.dataframe()
df_results.shape

In [None]:
df_results.sort_values('FinalObjectiveValue', ascending=0)

# Show the Best Candidate

In [None]:
df_results.sort_values('FinalObjectiveValue', ascending=0).head(1)

# Pass `tuning_job_name` to the Next Notebook

In [None]:
%store tuning_job_name

In [None]:
%store