In [None]:
# Download COCO dataset and weight, convert data to TFRecords and upload to S3
# Guide for preparing data and weight https://github.com/HerringForks/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN#quick-start-guide

In [None]:
import os
import subprocess
from datetime import datetime

from sagemaker import get_execution_role
from sagemaker.tensorflow import TensorFlow
from sagemaker.inputs import FileSystemInput

In [None]:
# Set the default region
region = "us-west-2"
os.environ['AWS_DEFAULT_REGION'] = region
role = get_execution_role()

user_id = "johndoe"
time_str = datetime.now().strftime("%H-%M-%S")

instance_type = "ml.p4d.24xlarge"
instance_count = 1

# single node training with total batch size of 64
# parameters are only for illustration purpose, user may need to tune to fit their workload
config_file = "configs/mrcnn_bs64.yaml"
hyperparameters = {"config": config_file}

# enable SMDDP
distribution = {"smdistributed": {"dataparallel": {"enabled": True}}}

job_name = f'{user_id}-maskrcnn-keras-p4d-{instance_count}-{time_str}'

source_dir = "."
entry_point = "train_keras.py"

# shared SMDDP Keras docker image
docker_image = "<image_uri>"

# all files under the directory will be mounted under /opt/ml/input/data/train/, make sure it aligns with config file
s3_data_dir = "<s3-path-to-dataset-directory>"
channels = {'train': s3_data_dir}

In [None]:
estimator = TensorFlow(
            entry_point=entry_point,
            role=role,
            image_uri=docker_image,
            source_dir=source_dir,
            instance_count=instance_count,
            instance_type=instance_type,
            hyperparameters=hyperparameters,
            disable_profiler=True,
            debugger_hook_config=False,
            distribution=distribution)

In [None]:
estimator.fit(inputs=channels, job_name=job_name)