In [1]:
data_bucket = 'ht6-1'
dataset_name = 'image_data'

In [2]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

role = get_execution_role()
sess = sagemaker.Session()

training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version='latest')
print(training_image)

825641698319.dkr.ecr.us-east-2.amazonaws.com/image-classification:latest


In [3]:
base_dir='/tmp'

%env BASE_DIR=$base_dir
%env S3_DATA_BUCKET_NAME = $data_bucket
%env DATASET_NAME = $dataset_name

import sys,os

suffix='/mxnet/tools/im2rec.py'
im2rec = list(filter( (lambda x: os.path.isfile(x + suffix )), sys.path))[0] + suffix
%env IM2REC=$im2rec

env: BASE_DIR=/tmp
env: S3_DATA_BUCKET_NAME=ht6-1
env: DATASET_NAME=image_data
env: IM2REC=/home/ec2-user/anaconda3/envs/mxnet_p36/lib/python3.6/site-packages/mxnet/tools/im2rec.py


In [4]:
!aws s3 sync s3://$S3_DATA_BUCKET_NAME/$DATASET_NAME $BASE_DIR/$DATASET_NAME --quiet

In [5]:
!ls /tmp/image_data

damaged  undamaged


In [6]:
%%bash

cd $BASE_DIR
rm *.rec
rm *.lst

echo "Creating LST files"
python $IM2REC --list --recursive --pass-through --test-ratio=0.3 --train-ratio=0.7 $DATASET_NAME $DATASET_NAME > ${DATASET_NAME}_classes

echo "Label classes:"
cat ${DATASET_NAME}_classes

echo "Creating RecordIO files"
python $IM2REC --num-thread=4 ${DATASET_NAME}_train.lst $DATASET_NAME
python $IM2REC --num-thread=4 ${DATASET_NAME}_test.lst $DATASET_NAME
ls -lh *.rec

Creating LST files
Label classes:
damaged 0
undamaged 1
Creating RecordIO files
Creating .rec file from /tmp/image_data_train.lst in /tmp
time: 0.256791353225708  count: 0
time: 1.7051382064819336  count: 1000
time: 1.6897778511047363  count: 2000
Creating .rec file from /tmp/image_data_test.lst in /tmp
time: 0.0076885223388671875  count: 0
time: 1.757917881011963  count: 1000
-rw-rw-r-- 1 ec2-user ec2-user 25M Aug 25 09:18 image_data_test.rec
-rw-rw-r-- 1 ec2-user ec2-user 58M Aug 25 09:18 image_data_train.rec


rm: cannot remove ‘*.rec’: No such file or directory
rm: cannot remove ‘*.lst’: No such file or directory


In [7]:
bucket = sess.default_bucket()

s3train_path = 's3://{}/{}/train/'.format(bucket, dataset_name)
s3validation_path = 's3://{}/{}/validation/'.format(bucket, dataset_name)

!aws s3 rm s3://{bucket}/{dataset_name}/train --recursive
!aws s3 rm s3://{bucket}/{dataset_name}/validation --recursive

!aws s3 cp /tmp/{dataset_name}_train.rec $s3train_path
!aws s3 cp /tmp/{dataset_name}_test.rec $s3validation_path

delete: s3://sagemaker-us-east-2-205005380838/image_data/train/image_data_train.rec
delete: s3://sagemaker-us-east-2-205005380838/image_data/validation/image_data_test.rec
upload: ../../../tmp/image_data_train.rec to s3://sagemaker-us-east-2-205005380838/image_data/train/image_data_train.rec
upload: ../../../tmp/image_data_test.rec to s3://sagemaker-us-east-2-205005380838/image_data/validation/image_data_test.rec


In [8]:
train_data = sagemaker.session.s3_input(
    s3train_path, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

validation_data = sagemaker.session.s3_input(
    s3validation_path, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

data_channels = {'train': train_data, 'validation': validation_data}

In [9]:
s3_output_location = 's3://{}/{}/output'.format(bucket, dataset_name)

image_classifier = sagemaker.estimator.Estimator(
    training_image,
    role, 
    train_instance_count=1, 
    train_instance_type='ml.p2.xlarge',
    output_path=s3_output_location,
    sagemaker_session=sess
)

In [10]:
num_classes = 2

num_training_samples = 3149

base_hyperparameters=dict(
    use_pretrained_model=1,
    image_shape='3,224,224',
    num_classes=num_classes,
    num_training_samples=num_training_samples,
    augmentation_type = 'crop_color_transform',
    optimizer='adam',
    resize=224,
    epochs=5
)

hyperparameters={
    **base_hyperparameters, 
    **dict(
        learning_rate=0.001,
        mini_batch_size=2,
    )
}


image_classifier.set_hyperparameters(**hyperparameters)

In [11]:
%%time

import time
now = str(int(time.time()))
training_job_name = 'IC-' + dataset_name.replace('_', '-') + '-' + now

image_classifier.fit(inputs=data_channels, job_name=training_job_name, logs=True)

job = image_classifier.latest_training_job
model_path = f"{base_dir}/{job.name}"

print(f"Location: {image_classifier.output_path}/{job.name}/output/model.tar.gz")

2019-08-25 09:19:04 Starting - Starting the training job...
2019-08-25 09:19:05 Starting - Launching requested ML instances...
2019-08-25 09:20:00 Starting - Preparing the instances for training......
2019-08-25 09:20:53 Downloading - Downloading input data...
2019-08-25 09:21:26 Training - Downloading the training image.........
2019-08-25 09:22:49 Training - Training image download completed. Training in progress.
[31mDocker entrypoint called with argument(s): train[0m
[31m[08/25/2019 09:22:51 INFO 140095236765504] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/image_classification/default-input.json: {u'beta_1': 0.9, u'gamma': 0.9, u'beta_2': 0.999, u'optimizer': u'sgd', u'use_pretrained_model': 0, u'eps': 1e-08, u'epochs': 30, u'lr_scheduler_factor': 0.1, u'num_layers': 152, u'image_shape': u'3,224,224', u'precision_dtype': u'float32', u'mini_batch_size': 32, u'weight_decay': 0.0001, u'learning_rate': 0.1, u'momentum': 0}[0m
[31m[08/25/2019 09:22:51

In [12]:

%%time
# Deploying a model to an endpoint takes a few minutes to complete

deployed_endpoint = image_classifier.deploy(
    initial_instance_count = 1,
    instance_type = 'ml.t2.medium'
)

--------------------------------------------------------------------------------------------------------------------------!CPU times: user 659 ms, sys: 18.7 ms, total: 678 ms
Wall time: 10min 15s
