### Build and push docker image from ECR 
* Get aws account related variables such as account id and region and compose the full docker image name 
* Dockerfile takes the fullname as a parameter to build docker image 
* Push to ecr

In [None]:
%%bash
./build-and-push.sh

* changed to your own image uri

In [None]:
%env fullname=230755935769.dkr.ecr.us-east-1.amazonaws.com/yolo-tensorflow

In [None]:
!echo $fullname 

In [None]:
!pygmentize build-and-push.sh

In [None]:
!pygmentize container/Dockerfile

In [None]:
!pygmentize container/main.py

#### training data file structure 
* files under cfg - data descriptors and model config 
* files under dinfo - paths to training images 
* files under train2014 - image files and labels 

In [None]:
!wget -O demo.zip "https://tinyurl.com/y2lyes4z"
!unzip demo.zip 
!mv sagemaker_training_job /home/ec2-user/sagemaker_training_job

In [None]:
import os 
def list_files(startpath):
    for root, dirs, files in os.walk(startpath):
        level = root.replace(startpath, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print('{}{}/'.format(indent, os.path.basename(root)))
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            print('{}{}'.format(subindent, f))
            
list_files('/home/ec2-user/sagemaker_training_job/')            

* create your own s3 bucket! 

### create or reuse your own s3
* note, the name of each s3 bucket has to be unique 

In [None]:
!aws s3 mb s3://ws-yolov4-yianc/ --region us-east-1

In [None]:
!aws s3 cp --recursive /home/ec2-user/sagemaker_training_job s3://ws-yolov4-yianc/sagemaker_training_job

In [None]:
import boto3
region = boto3.session.Session().region_name
bucket = 'ws-yolov4-yianc'


In [None]:
account=boto3.client('sts').get_caller_identity().get('Account')
repositoryUri="{}.dkr.ecr.{}.amazonaws.com/yolo-tensorflow".format(account, region)
repositoryUri

In [None]:
from datetime import datetime
now = datetime.now()
job_name = 'yolov4-' + now.strftime("%Y-%m-%d-%H-%M-%S")
job_name

In [None]:
import sagemaker
from sagemaker import get_execution_role
role = get_execution_role()

In [None]:
cfg='s3://{}/sagemaker_training_job/cfg/'.format(bucket)
dinfo='s3://{}/sagemaker_training_job/dinfo/'.format(bucket)
yolo_model='s3://{}/sagemaker_training_job/yolo_model/'.format(bucket)
train='s3://{}/sagemaker_training_job/train/'.format(bucket)
config_path='s3://{}/sagemaker_training_job/config_path/'.format(bucket)
outpath='s3://{}/model/'.format(bucket)
(cfg, dinfo, train, config_path, yolo_model, outpath) 


### Install sagemaker experiments sdk 

In [None]:
import sys
!{sys.executable} -m pip install sagemaker-experiments==0.1.24

### Create an Experiment 

In [None]:
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from datetime import datetime

# from smexperiments.trial_component import TrialComponent
# from smexperiments.tracker import Tracker
sess = boto3.Session()
sm = sess.client('sagemaker')
now = datetime.now()
yolov4_experiment = Experiment.create(
    experiment_name="yolov4-parameter-tuning-"+now.strftime("%Y-%m-%d-%H-%M-%S"), 
    description="try to tune yolov4 parameters", 
    sagemaker_boto_client=sm)
print(yolov4_experiment)

### Add training jobs to experiment 

In [None]:

import time 


for learning_rate in [0.001, 0.002, 0.005, 0.1]: 
    now = datetime.now()
    job_name = 'yolov4-' + now.strftime("%Y-%m-%d-%H-%M-%S")
    trial_name = f"yolo-v4-training-job-{str(learning_rate).replace('.', '-')}-learning-rate-"+now.strftime("%Y-%m-%d-%H-%M-%S")
    yolov4_trial = Trial.create(
        trial_name=trial_name, 
        experiment_name=yolov4_experiment.experiment_name,
        sagemaker_boto_client=sm,
    )
    
    response = sm.create_training_job(
      TrainingJobName=job_name,
      HyperParameters={'max_batches':'50', 'learning_rate':str(learning_rate)},
      AlgorithmSpecification={
          'TrainingImage': repositoryUri,
          'TrainingInputMode': 'File',
          "MetricDefinitions" : [
            {
            "Name": "IOU",
            "Regex": "\(IOU: (.*?),"
            },
             {
            "Name": "GIOU",
            "Regex": "GIOU: (.*?)\)"
            }]
      },
      RoleArn=role,
      InputDataConfig=[
          {
              'ChannelName': 'cfg',
              'DataSource': {
                  'S3DataSource': {
                      'S3DataType': 'S3Prefix',
                      'S3Uri': cfg,
                      'S3DataDistributionType': 'FullyReplicated',
                  },
              },
              'InputMode': 'File'
          },
          {
              'ChannelName': 'train',
              'DataSource': {
                  'S3DataSource': {
                      'S3DataType': 'S3Prefix',                      
                      'S3Uri': train,
                      'S3DataDistributionType': 'FullyReplicated',
                  },
              },
              'InputMode': 'File'
          },
          {
              'ChannelName': 'config_path',
              'DataSource': {
                  'S3DataSource': {
                      'S3DataType': 'S3Prefix',                      
                      'S3Uri': config_path,
                      'S3DataDistributionType': 'FullyReplicated',
                  },
              },
              'InputMode': 'File'
          },
          {
              'ChannelName': 'dinfo',
              'DataSource': {
                  'S3DataSource': {
                      'S3DataType': 'S3Prefix',                      
                      'S3Uri': dinfo,
                      'S3DataDistributionType': 'FullyReplicated',
                  },
              },
              'InputMode': 'File'
          },
          {
              'ChannelName': 'yolo_model',
              'DataSource': {
                  'S3DataSource': {
                      'S3DataType': 'S3Prefix',                      
                      'S3Uri': yolo_model,
                      'S3DataDistributionType': 'FullyReplicated',
                  },
              },
              'InputMode': 'File'
          },
      ],
      OutputDataConfig={
          'S3OutputPath': outpath
      },
      ResourceConfig={
          'InstanceType': 'ml.p3.2xlarge',
          'InstanceCount': 1,
          'VolumeSizeInGB': 10,
      },
      StoppingCondition={
        'MaxRuntimeInSeconds': 60*60*5,
      }, 
      ExperimentConfig={
            "TrialName": yolov4_trial.trial_name,
            "TrialComponentDisplayName": "Training",
      },  
    )
    print(response) 
    response = sm.describe_training_job(
        TrainingJobName=job_name
    )
    while response['TrainingJobStatus'] == 'InProgress': 
        response = sm.describe_training_job(
        TrainingJobName=job_name)
        time.sleep(10)

### Analyze the results 

In [None]:
search_expression = {
    "Filters":[
        {
            "Name": "DisplayName",
            "Operator": "Equals",
            "Value": "Training",
        }
    ],
}

In [None]:
from sagemaker.analytics import ExperimentAnalytics
from sagemaker.session import Session
trial_component_analytics = ExperimentAnalytics(
    sagemaker_session=Session(sess, sm), 
    experiment_name=yolov4_experiment.experiment_name,
    search_expression=search_expression,
    sort_by="metrics.IOU:IOU.max",
    sort_order="Descending",
    metric_names=['IOU', 'GIOU'],
    parameter_names=['learning_rate']
)

In [None]:
trial_component_analytics.dataframe()