In [1]:
import sagemaker
from sagemaker import get_execution_role
import json
import boto3

from sagemaker.tuner import (
    IntegerParameter, CategoricalParameter, 
    ContinuousParameter, HyperparameterTuner)

sess = sagemaker.Session()

role = get_execution_role()
print(
    role
)  # This is the role that SageMaker would use to leverage AWS resources (S3, CloudWatch) on your behalf

bucket = "erasolon-ml-output" 
prefix_input = "emr/goemotions"  
prefix_output = "blazingtext/goemotions"  

arn:aws:iam::861737859161:role/service-role/AmazonSageMaker-ExecutionRole-20210405T205521


In [10]:
%%time

train_channel = prefix_input + "/train_output/part-00000-e9a1aa42-f613-4d21-9676-43cc59034fb4-c000.txt"
validation_channel = prefix_input + "/validation_output/part-00000-25d0f1c2-f51a-40f2-9012-764ead050f7d-c000.txt"
test_channel = prefix_input + "/test_output/part-00000-f9913b12-88b1-4946-8f89-a2ce470b212b-c000.txt"


s3_train_data = "s3://{}/{}".format(bucket, train_channel)
s3_validation_data = "s3://{}/{}".format(bucket, validation_channel)
s3_test_data = "s3://{}/{}".format(bucket, test_channel)

s3_output_location = "s3://{}/{}/output".format(bucket, prefix_output)

CPU times: user 8 µs, sys: 2 µs, total: 10 µs
Wall time: 13.4 µs


In [11]:
region_name = boto3.Session().region_name
container = sagemaker.amazon.amazon_estimator.get_image_uri(region_name, "blazingtext", "latest")
print("Using SageMaker BlazingText container: {} ({})".format(container, region_name))

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: latest.


Using SageMaker BlazingText container: 811284229777.dkr.ecr.us-east-1.amazonaws.com/blazingtext:1 (us-east-1)


In [12]:
bt_model = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type="ml.c4.4xlarge",
    volume_size=30,
    max_run=360000,
    input_mode="File",
    output_path=s3_output_location,base_job_name="GoEmotions-BlazingText",
    sagemaker_session=sess,
    hyperparameters={
        "mode": "supervised",
        "epochs": 5,
        "min_count": 3,
        "learning_rate": 0.05,
        "vector_dim": 10,
        "early_stopping": False,
        "patience": 4,
        "min_epochs": 2,
        "word_ngrams": 2,
        
    },
)

In [13]:
train_data = sagemaker.inputs.TrainingInput(
    s3_train_data,
    distribution="FullyReplicated",
    s3_data_type="S3Prefix",
    input_mode='File',
    content_type='text/plain',
    compression='None',
)
validation_data = sagemaker.inputs.TrainingInput(
    s3_validation_data,
    distribution="FullyReplicated",
    s3_data_type="S3Prefix",
    input_mode='File',
    content_type='text/plain',
    compression='None',
)
test_data = sagemaker.inputs.TrainingInput(
    s3_test_data,
    distribution="FullyReplicated",
    s3_data_type="S3Prefix",
    input_mode='File',
    content_type='text/plain',
    compression='None',
)
data_channels = {"train": train_data, "validation": validation_data, "test": test_data}

In [14]:
%%time

bt_model.fit(inputs=data_channels, logs=True)

2022-01-15 16:36:37 Starting - Starting the training job...
2022-01-15 16:36:39 Starting - Launching requested ML instancesProfilerReport-1642264597: InProgress
......
2022-01-15 16:37:57 Starting - Preparing the instances for training.........
2022-01-15 16:39:38 Downloading - Downloading input data
2022-01-15 16:39:38 Training - Downloading the training image.[34mArguments: train[0m
[34m[01/15/2022 16:39:36 INFO 140392150271360] nvidia-smi took: 0.025350570678710938 secs to identify 0 gpus[0m
[34m[01/15/2022 16:39:36 INFO 140392150271360] Running single machine CPU BlazingText training using supervised mode.[0m
[34mNumber of CPU sockets found in instance is  1[0m
[34m[01/15/2022 16:39:36 INFO 140392150271360] Processing /opt/ml/input/data/train/part-00000-e9a1aa42-f613-4d21-9676-43cc59034fb4-c000.txt . File size: 2.665308952331543 MB[0m
[34m[01/15/2022 16:39:36 INFO 140392150271360] Processing /opt/ml/input/data/validation/part-00000-25d0f1c2-f51a-40f2-9012-764ead050f7d-c0

In [15]:
objective_metric_name = 'validation:accuracy'
objective_type = 'Maximize'

In [16]:
%%time

hyperparameter_ranges = {
    "epochs": IntegerParameter(5, 10, scaling_type="Linear"),
    "learning_rate": ContinuousParameter(0.01, 0.05, scaling_type="Auto"),
    "min_count": IntegerParameter(2, 5, scaling_type="Linear"),
    "vector_dim": IntegerParameter(32, 44, scaling_type="Linear"),
    "word_ngrams": IntegerParameter(1, 3, scaling_type="Linear")
}

tuner = HyperparameterTuner(bt_model,
                            objective_metric_name,
                            hyperparameter_ranges,
                            base_tuning_job_name ="GoEmotions-BlazingText",
                            max_jobs=4,
                            max_parallel_jobs=4,
                            strategy="Random",
                            early_stopping_type='Auto',
                            objective_type=objective_type)


data_channels_tune = {"train": train_data, "test": test_data, "validation": validation_data}

tuner.fit(inputs=data_channels_tune, logs=True)

.........................................................!
CPU times: user 289 ms, sys: 21.2 ms, total: 311 ms
Wall time: 4min 48s


In [17]:
boto3.client("sagemaker").describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=tuner.latest_tuning_job.job_name
)["HyperParameterTuningJobStatus"]

'Completed'

In [18]:
# run this cell to check current status of hyperparameter tuning job
tuning_job_result = boto3.client("sagemaker").describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=tuner.latest_tuning_job.job_name
)

status = tuning_job_result["HyperParameterTuningJobStatus"]
if status != "Completed":
    print("Reminder: the tuning job has not been completed.")

job_count = tuning_job_result["TrainingJobStatusCounters"]["Completed"]
print("%d training jobs have completed" % job_count)

objective = tuning_job_result["HyperParameterTuningJobConfig"]["HyperParameterTuningJobObjective"]
is_minimize = objective["Type"] != "Maximize"
objective_name = objective["MetricName"]

4 training jobs have completed


In [19]:
from pprint import pprint

if tuning_job_result.get("BestTrainingJob", None):
    print("Best model found so far:")
    pprint(tuning_job_result["BestTrainingJob"])
else:
    print("No training jobs have reported results yet.")

Best model found so far:
{'CreationTime': datetime.datetime(2022, 1, 15, 16, 41, 3, tzinfo=tzlocal()),
 'FinalHyperParameterTuningJobObjectiveMetric': {'MetricName': 'validation:accuracy',
                                                 'Value': 0.6118000149726868},
 'ObjectiveStatus': 'Succeeded',
 'TrainingEndTime': datetime.datetime(2022, 1, 15, 16, 44, 35, tzinfo=tzlocal()),
 'TrainingJobArn': 'arn:aws:sagemaker:us-east-1:861737859161:training-job/goemotions-blazingte-220115-1640-001-7c12e047',
 'TrainingJobName': 'GoEmotions-BlazingTe-220115-1640-001-7c12e047',
 'TrainingJobStatus': 'Completed',
 'TrainingStartTime': datetime.datetime(2022, 1, 15, 16, 43, 39, tzinfo=tzlocal()),
 'TunedHyperParameters': {'epochs': '6',
                          'learning_rate': '0.04354004683092508',
                          'min_count': '3',
                          'vector_dim': '37',
                          'word_ngrams': '1'}}


tuner.deploy(
    initial_instance_count=1,
    instance_type='ml.p3.2xlarge'
)