## Import Packages

In [None]:
from sagemaker.image_uris import get_training_image_uri
from sagemaker.tuner import HyperparameterTuner, ContinuousParameter

Review image uris to choose from for DLC

In [None]:
image_uri = get_training_image_uri(framework="tensorflow", 
                       region="us-east-1",
                       py_version="py39",
                       framework_version="2.8",
                       instance_type="ml.m5.24xlarge"
                      )

print(f'image uri: {image_uri}')

In [None]:
import json
from pathlib import Path

## Train model with PyTorch and TensorFlow

Change bucket name below

In [None]:
import boto3
import uuid
import shutil
import sagemaker
from sagemaker.tensorflow import TensorFlow

sess = sagemaker.session.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker.session.Session().default_bucket() # change bucket name here if needed
key_prefix = f"{uuid.getnode()}/distributed_rl"

In [None]:
metric_definitions = [
    {'Name': 'episode_reward_mean', 'Regex': 'episode_reward_max: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'episode_reward_max', 'Regex': 'episode_reward_mean: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'episode_reward_min', 'Regex': 'episode_reward_min: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'episodes_total', 'Regex': 'episodes_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'timesteps_total', 'Regex': 'timesteps_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}
]

In [14]:
# Training with TensorFlow
tb_logging_path = f"s3://{bucket}/{key_prefix}/tb_logs/tf"
tf_estimator = TensorFlow(
    source_dir = "src",
    entry_point="train_cart_pole.py",
    role=role,
    instance_count=2,
    metric_definitions=metric_definitions,
    hyperparameters={"num-workers":"191", 
                     "framework":"tf",
                     "train-iterations": "20",
                     "lr": ".001"
                    },
    instance_type="ml.m5.24xlarge", # try with m5.24xlarge
    framework_version="2.8",
    py_version="py39",
    checkpoint_s3_uri=tb_logging_path,
    keep_alive_period_in_seconds=1800
)

# tf_estimator.fit(wait=True) # change wait=True if you wnat to see the logs

## HPO Job

In [15]:
hp_ranges = {"lr": ContinuousParameter(0.001, 0.01)}

In [16]:
tuner = HyperparameterTuner(
    estimator=tf_estimator,
    objective_metric_name='episode_reward_mean',
    objective_type='Maximize',
    metric_definitions=metric_definitions,
    hyperparameter_ranges=hp_ranges,
    max_jobs=8,
    max_parallel_jobs=2,
    base_tuning_job_name='byoc-cart-pole'
)

In [17]:
tuner.fit(wait=False)