# Train an object detection model using Tensorflow on SageMaker

## Setup environment

In [None]:
import os
import sagemaker
from sagemaker.estimator import Framework, Estimator

role = sagemaker.get_execution_role()
inputs = {'train': 's3://'} # define s3 training data inputs
tensorboard_s3_prefix = 's3://' # s3 path for tensorboard events

## Build and push container

In [None]:
%%bash
git clone https://github.com/tensorflow/models.git docker/models
# get model_main and exporter_main files from TF2 Object Detection GitHub repository
cp docker/models/research/object_detection/exporter_main_v2.py source_dir 
cp docker/models/research/object_detection/model_main_tf2.py source_dir

In [None]:
image_name = 'sagemaker-studio-tf2-object-detection'

In [None]:
# !sh ./docker/build_and_push.sh $image_name
!cd docker && sm-docker build .  --repository $image_name:latest

In [None]:
# with open (os.path.join('docker', 'ecr_image_fullname.txt'), 'r') as f:
#     container = f.readlines()[0][:-1]

# print(container)

## Get pre-trained model from model zoo

Download the base model and extract locally

In [None]:
%%bash
mkdir /tmp/checkpoint
mkdir source_dir/checkpoint
wget -O /tmp/efficientdet.tar.gz http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d1_coco17_tpu-32.tar.gz
tar -zxvf /tmp/efficientdet.tar.gz --strip-components 2 --directory source_dir/checkpoint efficientdet_d1_coco17_tpu-32/checkpoint --no-same-owner

## Create SageMaker Custom Framework and Launch Training job

Here we define a custom framework estimator using the Amazon SageMaker Python SDK and run training with that class, which will take care of managing these tasks.

In [None]:
class CustomFramework(Framework):
    def __init__(
        self,
        entry_point,
        source_dir=None,
        hyperparameters=None,
        distributions=None,
        **kwargs
    ):
        super(CustomFramework, self).__init__(
            entry_point, source_dir, hyperparameters,
#             image_uri=image_name,
            **kwargs
        )
    
    def _configure_distribution(self, distributions):
        return
    
    def create_model(
        self,
        model_server_workers=None,
        role=None,
        vpc_config_override=None,
        entry_point=None,
        source_dir=None,
        dependencies=None,
        image_uri=None,
        **kwargs
    ):
        return None

In [None]:
from sagemaker.debugger import TensorBoardOutputConfig

hyperparameters = {
    "model_dir":"/opt/training",        
    "pipeline_config_path": "pipeline.config",
    "num_train_steps": 1000,    
    "sample_1_of_n_eval_examples": 1
}

tensorboard_output_config = TensorBoardOutputConfig(
    s3_output_path=tensorboard_s3_prefix,
    container_local_output_path='/opt/training/'
)

estimator = CustomFramework(
    image_uri=container,
    role=role,
    entry_point='run_training.sh',
    source_dir='source_dir/',
    instance_count=1,
    instance_type='ml.p3.2xlarge',
    hyperparameters=hyperparameters,
    tensorboard_output_config=tensorboard_output_config,
    disable_profiler=True,
    base_job_name='tf2-object-detection'
)

In [None]:
estimator.fit(inputs)

## Visualize training metrics with Tensorboard

ToDo enable on studio

In [None]:
job_artifacts_path = estimator.latest_job_tensorboard_artifacts_path()
job_artifacts_path

In [None]:
tensorboard_s3_output_path = f'{job_artifacts_path}/train'
!F_CPP_MIN_LOG_LEVEL=3 AWS_REGION=ap-southeast-1 tensorboard --logdir=$tensorboard_s3_output_path