<h1>Framework Container example</h1>

This example demonstrates how to build a framework container for training with Amazon SageMaker by leveraging on the sagemaker-containers library.

In [23]:
import boto3
import sagemaker
from sagemaker import get_execution_role

prefix = 'framework-container'
ecr_repository_name = 'gianpo-ecr/' + prefix
account_id = 825935527263

role = get_execution_role()
region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
bucket = sagemaker_session.default_bucket()

print(account_id)
print(region)
print(role)
print(bucket)

825935527263
eu-west-1
arn:aws:iam::825935527263:role/service-role/AmazonSageMaker-ExecutionRole-endtoendml
sagemaker-eu-west-1-825935527263


In [24]:
! ./build_and_push.sh $account_id $region $ecr_repository_name

Sending build context to Docker daemon  10.24kB
Step 1/15 : FROM ubuntu:16.04
 ---> b9409899fe86
Step 2/15 : LABEL maintainer="Amazon AI"
 ---> Using cache
 ---> bab228941513
Step 3/15 : ARG PYTHON=python3
 ---> Using cache
 ---> 753bc9f6b601
Step 4/15 : ARG PYTHON_PIP=python3-pip
 ---> Using cache
 ---> 1d2afc099c45
Step 5/15 : ARG PIP=pip3
 ---> Using cache
 ---> 4637544f83e5
Step 6/15 : ARG PYTHON_VERSION=3.6.6
 ---> Using cache
 ---> f16297f44d34
Step 7/15 : RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common &&     add-apt-repository ppa:deadsnakes/ppa -y &&     apt-get update && apt-get install -y --no-install-recommends         build-essential         ca-certificates         curl         wget         git         libopencv-dev         openssh-client         openssh-server         vim         zlib1g-dev &&     rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> a2784e936cf9
Step 8/15 : RUN wget https://www.python.org/ftp/python/$PYTHON_VERSI

In [25]:
container_image_uri = '{0}.dkr.ecr.{1}.amazonaws.com/{2}:latest'.format(account_id, region, ecr_repository_name)
print(container_image_uri)

825935527263.dkr.ecr.eu-west-1.amazonaws.com/gianpo-ecr/framework-container:latest


In [26]:
sagemaker_session.upload_data('dummy.csv', bucket, prefix + '/train')
sagemaker_session.upload_data('dummy.csv', bucket, prefix + '/val')

's3://sagemaker-eu-west-1-825935527263/framework-container/val/dummy.csv'

In [52]:
import tarfile
import os

def create_tar_file(source_files, target=None):
    """Create a tar file containing all the source_files
    Args:
        source_files: (List[str]): List of file paths that will be contained in the tar file
        target:
    Returns:
        (str): path to created tar file
    """
    if target:
        filename = target
    else:
        _, filename = tempfile.mkstemp()

    with tarfile.open(filename, mode="w:gz") as t:
        for sf in source_files:
            # Add all files from the directory into the root of the directory structure of the tar
            t.add(sf, arcname=os.path.basename(sf))
    return filename

create_tar_file(["source_dir/train.py", "source_dir/utils.py"], "sourcedir.tar.gz")


'sourcedir.tar.gz'

In [53]:
sources = sagemaker_session.upload_data('sourcedir.tar.gz', bucket, prefix + '/code')
print(sources)

s3://sagemaker-eu-west-1-825935527263/framework-container/code/sourcedir.tar.gz


In [54]:
import sagemaker

est = sagemaker.estimator.Estimator(container_image_uri,
                                    role,
                                    train_instance_count=1, 
                                    train_instance_type='ml.m5.xlarge',
                                    base_job_name=prefix)

est.set_hyperparameters(sagemaker_program="train",
                        sagemaker_submit_directory=sources,
                        hp1="value1",
                        hp2=300,
                        hp3=0.001)

train_config = sagemaker.session.s3_input('s3://{0}/{1}/train/'.format(bucket, prefix), content_type='text/csv')
val_config = sagemaker.session.s3_input('s3://{0}/{1}/val/'.format(bucket, prefix), content_type='text/csv')

est.fit({'train': train_config, 'validation': val_config })

2019-10-23 17:07:22 Starting - Starting the training job...
2019-10-23 17:07:24 Starting - Launching requested ML instances...
2019-10-23 17:08:23 Starting - Preparing the instances for training......
2019-10-23 17:09:17 Downloading - Downloading input data
2019-10-23 17:09:17 Training - Downloading the training image.....[31m2019-10-23 17:09:58,986 sagemaker-containers INFO     Imported framework custom_framework_container.training[0m
[31m2019-10-23 17:09:58,987 sagemaker-containers INFO     Failed to parse hyperparameter hp1 value value1 to Json.[0m
[31mReturning the value itself[0m
[31m2019-10-23 17:09:58,987 sagemaker-containers INFO     Failed to parse hyperparameter sagemaker_program value train to Json.[0m
[31mReturning the value itself[0m
[31m2019-10-23 17:09:58,987 sagemaker-containers INFO     Failed to parse hyperparameter sagemaker_submit_directory value s3://sagemaker-eu-west-1-825935527263/framework-container/code/sourcedir.tar.gz to Json.[0m
[31mReturning th