# Setup and run a hyperparameter tuning job with the SageMaker SDK and HuggingFace container

In [3]:
#!pip install "sagemaker>=2.48.0" "transformers==4.12.3" "datasets[s3]==1.18.3" --upgrade

In [22]:
#!pip install sagemaker==2.128.0

In [4]:
import sagemaker
import boto3
import sagemaker.huggingface
from sagemaker.tuner import (
    IntegerParameter,
    CategoricalParameter,
    ContinuousParameter,
    HyperparameterTuner
)

from sagemaker.huggingface import HuggingFace

In [5]:
session = sagemaker.Session()

role = sagemaker.get_execution_role()
role_name = role.split('/')[-1]

sagemaker_session_bucket = session.default_bucket()

In [6]:
session = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {session.default_bucket()}")
print(f"sagemaker session region: {session.boto_region_name}")

sagemaker role arn: arn:aws:iam::264639154954:role/aaca-ani-cogsci-sagemaker-studio-role
sagemaker bucket: sagemaker-us-east-1-264639154954
sagemaker session region: us-east-1


In [7]:
s3_prefix = "samples/datasets/imdb"

training_input_path = f"s3://{session.default_bucket()}/{s3_prefix}/train"
test_input_path = f"s3://{session.default_bucket()}/{s3_prefix}/test"

print(training_input_path)
print(test_input_path)

s3://sagemaker-us-east-1-264639154954/samples/datasets/imdb/train
s3://sagemaker-us-east-1-264639154954/samples/datasets/imdb/test


In [8]:
hyperparameters = {"epochs": 1,
                   "train_batch_size": 16,
                   "model_name": "distilbert-base-uncased"
                  }

## Setup hyperparameter tuning job

In [7]:
# estimator = HuggingFace(
#     entry_point='bespoke_training.py',
#     source_dir='./scripts',
#     session=session,
#     role=role,
#     instance_type='ml.m4.xlarge',
#     instance_count=1,
#     transformers_version='4.12',
#     py_version='py38',
#     pytorch_version='1.9',
#     base_job_name='hpo-HF',
#     hyperparameters=hyperparameters,
# )

In [19]:
TAGS = [{"Key": "Owner", "Value": "ccooney@aflac.com"},
        {"Key": "Environment", "Value": "Dev"}]

In [14]:
huggingface_estimator = HuggingFace(entry_point='bespoke_training.py',
                                    source_dir='./scripts',
                                    sagemaker_session=session,
                                    instance_type='ml.p3.2xlarge',
                                    instance_count=1,
                                    role=role,
                                    transformers_version='4.12',
                                    py_version='py38',
                                    pytorch_version='1.9',
                                    hyperparameters=hyperparameters)

In [8]:
#huggingface_estimator.fit({"train": training_input_path, "test": test_input_path})

In [15]:
hyperparameter_ranges = {"learning_rate": ContinuousParameter(0.0001, 0.1),
                         "warmup_steps": IntegerParameter(100, 500)}

objective_metric = "loss"
objective_type = "Minimize"
metric_definitions = [{"Name": "loss", "Regex": "loss = ([0-9\\.]+)"}]

In [20]:
tuner = HyperparameterTuner(
    huggingface_estimator,
    objective_metric,
    hyperparameter_ranges,
    metric_definitions,
    max_jobs=3,
    max_parallel_jobs=1,
    objective_type=objective_type,
    tags=TAGS
)

In [21]:
tuner.fit(inputs={"train": training_input_path, "test": test_input_path})

No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config
No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config


..................................................................................................................................................................................................................................................................................................................................................................................................................................................................!


In [23]:
tuner.best_training_job()

'huggingface-pytorch--230117-1147-003-93ce057a'

In [24]:
best_estimator = tuner.best_estimator()


2023-01-17 12:24:14 Starting - Found matching resource for reuse
2023-01-17 12:24:14 Downloading - Downloading input data
2023-01-17 12:24:14 Training - Training image download completed. Training in progress.
2023-01-17 12:24:14 Uploading - Uploading generated training model
2023-01-17 12:24:14 Completed - Resource retained for reuse


In [22]:
tuner.describe()

{'HyperParameterTuningJobName': 'huggingface-pytorch--230117-1147',
 'HyperParameterTuningJobArn': 'arn:aws:sagemaker:us-east-1:264639154954:hyper-parameter-tuning-job/huggingface-pytorch--230117-1147',
 'HyperParameterTuningJobConfig': {'Strategy': 'Bayesian',
  'HyperParameterTuningJobObjective': {'Type': 'Minimize',
   'MetricName': 'loss'},
  'ResourceLimits': {'MaxNumberOfTrainingJobs': 3,
   'MaxParallelTrainingJobs': 1},
  'ParameterRanges': {'IntegerParameterRanges': [{'Name': 'warmup_steps',
     'MinValue': '100',
     'MaxValue': '500',
     'ScalingType': 'Auto'}],
   'ContinuousParameterRanges': [{'Name': 'learning_rate',
     'MinValue': '0.0001',
     'MaxValue': '0.1',
     'ScalingType': 'Auto'}],
   'CategoricalParameterRanges': []},
  'TrainingJobEarlyStoppingType': 'Off'},
 'TrainingJobDefinition': {'StaticHyperParameters': {'_tuning_objective_metric': 'loss',
   'epochs': '1',
   'model_name': '"distilbert-base-uncased"',
   'sagemaker_container_log_level': '20',
 

In [12]:
#!pip install -U sagemaker

In [None]:
boto3.client("sagemaker").describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=tuner.latest_tuning_job.job_name
)["HyperParameterTuningJobStatus"]