In [1]:
%load_ext tensorboard
import boto3
import sagemaker
from sagemaker.tensorflow import TensorFlow
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner
import json
print("boto3 : ", boto3.__version__)
print("sagemaker : ", sagemaker.__version__)

pandas failed to import. Analytics features will be impaired or broken.


boto3 :  1.14.44
sagemaker :  2.16.1


In [2]:
# utilities
get_s3_loc = lambda *args: "s3://" + "/".join(args)

In [8]:
# load secret values
with open("./secrets.json", "r", encoding="utf-8") as fp:
    secrets = json.load(fp)

role=secrets["RoleArn"]
s3_bucket=secrets["S3Bucket"]

In [9]:
# create session
boto_session = boto3.Session(region_name="ap-northeast-1")
sess = sagemaker.Session(
    boto_session=boto_session,
    default_bucket=s3_bucket,
)

In [10]:
# prepare hyperparameters
s3_train_loc = get_s3_loc(sess.default_bucket(), "train")
instance_types = {"CPU" : "ml.m5.large", "GPU" : "ml.g4dn.xlarge", "LOCAL" : "local"}

hyperparameters = {
    "EPOCH" : 50,
    "STEP" : 10,
    "MAX_IMAGE_SIZE" : 1024,
    "TB_BUCKET" : s3_train_loc,
    "MAX_TRIAL" : 2,
    
}

In [11]:
# create estimator
est = TensorFlow(
    entry_point='train.py',
    role=role,
    instance_count=1,
    instance_type=instance_types["GPU"],
    framework_version='2.3.0',
    py_version='py37',
    debugger_hook_config=False,
    hyperparameters=hyperparameters,
    sagemaker_session=sess,
    use_spot_instances=True,
    max_run=3600,
    max_wait=3600,
)

In [None]:
# run training
est.fit(s3_train_loc)

2020-11-14 01:50:22 Starting - Starting the training job...
2020-11-14 01:50:25 Starting - Launching requested ML instances......
2020-11-14 01:51:39 Starting - Preparing the instances for training...

In [8]:
hyperparameter_ranges = {
#     "CONTENT_WEIGHTS" : ContinuousParameter(
#         min_value=5000, max_value=15000,
#     ),
#     "STYLE_WEIGHTS" : ContinuousParameter(
#         min_value=0.001, max_value=0.1,
#     ),
#     "TOTAL_VARIATION_WEIGHTS" : ContinuousParameter(
#         min_value=10, max_value=50,
#     ),
#     "LEARNING_RATE" : ContinuousParameter(
#         min_value=0.01, max_value=0.1,
#     ),
    "STYLE_RESIZE_METHOD": CategoricalParameter(
        ["original", "imagenet", "content", "medium"]
    ),
}
objective_metric_name = 'loss'
objective_type = 'Minimize'
metric_definitions = [
    {'Name': 'loss','Regex': 'FinalMeanLoss=([0-9\\.]+)'}
]

tuner = HyperparameterTuner(
    estimator=est,
    objective_metric_name=objective_metric_name,
    hyperparameter_ranges=hyperparameter_ranges,
    metric_definitions=metric_definitions,
    max_jobs=4,
    max_parallel_jobs=2,
    objective_type=objective_type
)



In [13]:
tuner.fit(s3_train_loc, wait=False)