## Set up some variables for the local directories we'll work in for this task

In [1]:
base_dir='/tmp'
dataset_name = 'caltech_objects'
dataset_dir = base_dir + '/' + dataset_name

%env BASE_DIR=$base_dir
%env DATASET_DIR=$dataset_dir
%env DATASET_NAME=$dataset_name

env: BASE_DIR=/tmp
env: DATASET_DIR=/tmp/caltech_objects
env: DATASET_NAME=caltech_objects


## Grab a bunch of images grouped by folders, one per label class

In [2]:
%%bash
rm -rf $DATASET_DIR
mkdir -p $DATASET_DIR
cd $DATASET_DIR
wget http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz --quiet
tar -xzf 101_ObjectCategories.tar.gz
mv 101_ObjectCategories/* .
rm -rf 101_ObjectCategories
rm 101_ObjectCategories.tar.gz


## Set up some variables for Sagemaker

In [3]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

role = get_execution_role()
sess = sagemaker.Session()

training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version="latest")

## Preparing data for our model

In [4]:
# Find im2rec in our environment and set up some other vars in our environemnt
import sys,os

suffix='/mxnet/tools/im2rec.py'
im2rec = list(filter( (lambda x: os.path.isfile(x + suffix )), sys.path))[0] + suffix
%env IM2REC=$im2rec

env: IM2REC=/home/ec2-user/anaconda3/envs/mxnet_p36/lib/python3.6/site-packages/mxnet/tools/im2rec.py


In [5]:
# Resize images to 224x224
# Sometimes, our input images aren't already in the desired format for training. I like to format all my images to be the correct size before I train my model.

from PIL import Image
import os
import sys

def resize(filepath, size=224, fill_color=(0, 0, 0, 0)):
    try:
        image = Image.open(filepath)
    except:
        os.remove(filepath)
        return

    new_im = Image.new('RGB', (size, size), fill_color)
    image.thumbnail([size, size])
    (w, h) = image.size
    new_im.paste(image, (int((size - w) / 2), int((size - h) / 2 )))

    # Overwrite file with resized version
    new_im.save(filepath, "JPEG", quality = 95)

def recursively_process_files(dirname, processing_func):
    for dirname, dirnames, filenames in os.walk(dirname):
        for subdirname in dirnames:
            recursively_process_files(subdirname, processing_func)
 
        for filename in filenames:
            processing_func(os.path.join(dirname, filename))


recursively_process_files(f"{dataset_dir}", resize)

In [6]:
%%bash
# Use the IM2REC script to convert our images into RecordIO files

cd $BASE_DIR

rm ${DATASET_NAME}_classes

# First we need to create two LST files (training and test lists), noting the correct label class for each image
# We'll also save the output of the LST files command, since it includes a list of all of our label classes
echo "Creating LST files"
python $IM2REC --list --recursive --pass-through --test-ratio=0.3 --train-ratio=0.7 $DATASET_NAME $DATASET_NAME > ${DATASET_NAME}_classes

# Then we create RecordIO files from the LST files
echo "Creating RecordIO files"
rm *.rec
python $IM2REC --num-thread=4 ${DATASET_NAME}_train.lst $DATASET_NAME
python $IM2REC --num-thread=4 ${DATASET_NAME}_test.lst $DATASET_NAME
ls -lh *.rec

Creating LST files
Label classes:
BACKGROUND_Google 0
Faces 1
Faces_easy 2
Leopards 3
Motorbikes 4
accordion 5
airplanes 6
anchor 7
ant 8
barrel 9
bass 10
beaver 11
binocular 12
bonsai 13
brain 14
brontosaurus 15
buddha 16
butterfly 17
camera 18
cannon 19
car_side 20
ceiling_fan 21
cellphone 22
chair 23
chandelier 24
cougar_body 25
cougar_face 26
crab 27
crayfish 28
crocodile 29
crocodile_head 30
cup 31
dalmatian 32
dollar_bill 33
dolphin 34
dragonfly 35
electric_guitar 36
elephant 37
emu 38
euphonium 39
ewer 40
ferry 41
flamingo 42
flamingo_head 43
garfield 44
gerenuk 45
gramophone 46
grand_piano 47
hawksbill 48
headphone 49
hedgehog 50
helicopter 51
ibis 52
inline_skate 53
joshua_tree 54
kangaroo 55
ketch 56
lamp 57
laptop 58
llama 59
lobster 60
lotus 61
mandolin 62
mayfly 63
menorah 64
metronome 65
minaret 66
nautilus 67
octopus 68
okapi 69
pagoda 70
panda 71
pigeon 72
pizza 73
platypus 74
pyramid 75
revolver 76
rhino 77
rooster 78
saxophone 79
schooner 80
scissors 81
scorpion 82
se

rm: cannot remove ‘caltech_objects_classes’: No such file or directory
rm: cannot remove ‘*.rec’: No such file or directory


In [1]:
# Get the list of class labels into a python variable to use later
class_labels = ! cut -d " " -f1 {base_dir}/{classes_file_name} | paste -sd " "
class_labels = class_labels[0].split(' ')

# Show a few of them now just so we can see the kind of labels we're working with...
class_labels[0:10]

['cut:',
 '{base_dir}/{classes_file_name}:',
 'No',
 'such',
 'file',
 'or',
 'directory']

In [7]:
# Upload our train and test RecordIO files to S3 in the bucket that our sagemaker session is using
bucket = sess.default_bucket()

s3train_path = 's3://{}/{}/train/'.format(bucket, dataset_name)
s3validation_path = 's3://{}/{}/validation/'.format(bucket, dataset_name)

# Clean up any existing data in our training s3 bucket
!aws s3 rm s3://{bucket}/{dataset_name}/train --recursive
!aws s3 rm s3://{bucket}/{dataset_name}/validation --recursive

# Upload the rec files to the train and validation channels
!aws s3 cp /tmp/{dataset_name}_train.rec $s3train_path
!aws s3 cp /tmp/{dataset_name}_test.rec $s3validation_path

upload: ../../../tmp/caltech_objects_train.rec to s3://sagemaker-us-west-2-541003905521/caltech_objects/train/caltech_objects_train.rec
upload: ../../../tmp/caltech_objects_test.rec to s3://sagemaker-us-west-2-541003905521/caltech_objects/validation/caltech_objects_test.rec


### Configuring the data for our model training to use


In [8]:
train_data = sagemaker.session.s3_input(
    s3train_path, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

validation_data = sagemaker.session.s3_input(
    s3validation_path, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

data_channels = {'train': train_data, 'validation': validation_data}

## Training

### Training Setup

In [9]:
s3_output_location = 's3://{}/{}/output'.format(bucket, dataset_name)
image_classifier = sagemaker.estimator.Estimator(
    training_image,
    role, 
    train_instance_count=1, 
    train_instance_type='ml.p3.2xlarge',
    output_path=s3_output_location,
    sagemaker_session=sess,
    train_use_spot_instances=True,
    train_max_wait=60*60,
    train_max_run=60*60,
)

In [10]:
num_classes=! ls -l {base_dir}/{dataset_name} | wc -l
num_classes=int(num_classes[0]) - 1

num_training_samples=! cat {base_dir}/{dataset_name}_train.lst | wc -l
num_training_samples = int(num_training_samples[0])

# Learn more about the Sagemaker built-in Image Classifier hyperparameters here: https://docs.aws.amazon.com/sagemaker/latest/dg/IC-Hyperparameter.html

# These hyperparameters we won't want to change, as they define things like
# the size of the images we'll be sending for input, the number of training classes we have, etc.
base_hyperparameters=dict(
    use_pretrained_model=1,
    image_shape='3,224,224',
    num_classes=num_classes,
    num_training_samples=num_training_samples,
)

# These are hyperparameters we may want to tune, as they can affect the model training success:
hyperparameters={
    **base_hyperparameters, 
    **dict(
        epochs=60,
        learning_rate=0.001,
        mini_batch_size=64,
        early_stopping=True,
    )
}


image_classifier.set_hyperparameters(**hyperparameters)

hyperparameters

{'use_pretrained_model': 1,
 'image_shape': '3,224,224',
 'num_classes': 102,
 'num_training_samples': 6400,
 'epochs': 60,
 'learning_rate': 0.001,
 'mini_batch_size': 64,
 'early_stopping': True}

### Start the training
This will take some time because it's provisioning a new container runtime to train our model, then the actual training happens, then the trained model gets uploaded to S3 and the container is shut down.

In [11]:
%%time

image_classifier.fit(inputs=data_channels, logs=True)

job = image_classifier.latest_training_job
model_path = f"{base_dir}/{job.name}"

print(f"\n\n Finished training! The model is available for download at: {image_classifier.output_path}/{job.name}/output/model.tar.gz")

2019-11-21 04:08:48 Starting - Starting the training job...
2019-11-21 04:08:50 Starting - Launching requested ML instances......
2019-11-21 04:10:18 Starting - Preparing the instances for training.........
2019-11-21 04:11:34 Downloading - Downloading input data
2019-11-21 04:11:34 Training - Downloading the training image.....[31mDocker entrypoint called with argument(s): train[0m
[31m[11/21/2019 04:12:26 INFO 140138295818048] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/image_classification/default-input.json: {u'beta_1': 0.9, u'gamma': 0.9, u'beta_2': 0.999, u'optimizer': u'sgd', u'use_pretrained_model': 0, u'eps': 1e-08, u'epochs': 30, u'lr_scheduler_factor': 0.1, u'num_layers': 152, u'image_shape': u'3,224,224', u'precision_dtype': u'float32', u'mini_batch_size': 32, u'weight_decay': 0.0001, u'learning_rate': 0.1, u'momentum': 0}[0m
[31m[11/21/2019 04:12:26 INFO 140138295818048] Reading provided configuration from /opt/ml/input/config/hyperparam

## Converting our SageMaker-trained model to the ONNX Format

SageMaker uses a framework called MXNet to train and produce our image classifier model.  But, we might want to use this model to perform inference somehwere that MXNet doesn't easily run, such as in a web browser.  

ONNX is an open format to represent deep learning models. With ONNX, AI developers can more easily move models between state-of-the-art tools and choose the combination that is best for them. More info at https://onnx.ai/

SageMaker provides helpful tooling in the Python SDK to convert trained models to the ONNX format, making it easy to take your trained model, convert it to ONNX, then use that model in whatever environment you want to (as long as that environment will accept models in the ONNX format).

In [12]:
# Get the path where our trained model was saved to S3
model_s3_path = image_classifier.model_data
model_s3_output_dir = "/".join(model_s3_path.split('/')[0:-1])
%env MODEL_S3_PATH = $model_s3_path

env: MODEL_S3_PATH=s3://sagemaker-us-west-2-541003905521/caltech_objects/output/image-classification-2019-11-21-04-08-48-253/output/model.tar.gz


In [16]:
%%bash

# Download our model and extract it into $BASE_DIR/downloaded_model
rm -rf $BASE_DIR/downloaded_model/
mkdir -p $BASE_DIR/downloaded_model/
aws s3 cp $MODEL_S3_PATH $BASE_DIR/downloaded_model/model.tar.gz
cd $BASE_DIR/downloaded_model
tar -xzvf model.tar.gz

download: s3://sagemaker-us-west-2-541003905521/caltech_objects/output/image-classification-2019-11-21-04-08-48-253/output/model.tar.gz to ../../../tmp/downloaded_model/model.tar.gz
model-shapes.json
image-classification-symbol.json
image-classification-0014.params


In [18]:
# Use MXNet's onnx_mxnet module to convert the MXNet model that SageMaker trained into ONNX format
from mxnet.contrib import onnx as onnx_mxnet
from glob import glob
import numpy as np

model_dir = base_dir + "/downloaded_model"

model_symbol_file = glob(model_dir + "/*symbol.json")[0]
model_params_file = glob(model_dir + "/*.params")[0]

onnx_mxnet.export_model(sym=model_symbol_file,
                            params=model_params_file,
                            input_shape=[(1, 3, 224, 224)],
                            input_type=np.float32,
                            onnx_file_path="{}/model.onnx".format(model_dir),
                            verbose=True)

# And upload the ONNX model back to the same place on S3 as where we Sagemaker put the MXNet version of the model, just for safe-keeping
! aws s3 cp {model_dir}/model.onnx {model_s3_output_dir}/model.onnx

upload: ../../../tmp/downloaded_model/model.onnx to s3://sagemaker-us-west-2-541003905521/caltech_objects/output/image-classification-2019-11-21-04-08-48-253/output/model.onnx


In [19]:
# Finally, display a link so we can download the ONNX version of the model easily from this notebook's local disk

from IPython.display import FileLink
# DownloadFileLink via https://github.com/jupyterlab/jupyterlab/issues/5443
class DownloadFileLink(FileLink):
    html_link_str = "<a href='{link}' download={file_name}>{link_text}</a>"

    def __init__(self, path, file_name=None, link_text=None, *args, **kwargs):
        super(DownloadFileLink, self).__init__(path, *args, **kwargs)

        self.file_name = file_name or os.path.split(path)[1]
        self.link_text = link_text or self.file_name

    def _format_path(self):
        from html import escape
        fp = ''.join([self.url_prefix, escape(self.path)])
        return ''.join([self.result_html_prefix,
                        self.html_link_str.format(link=fp, file_name=self.file_name, link_text=self.link_text),
                        self.result_html_suffix])
    
# We'll need to symlink the onnx model file from base_dir/downloaded_model/model.onnx to this notebook's home directory 
# so that Jupyter will serve it
! ln -fs {model_dir}/model.onnx ./model.onnx

# Output the download link for us to click on
DownloadFileLink("model.onnx", result_html_prefix="Click here to download the model in ONNX format: ")

In [140]:
# Finally, we'll also want the handy list of knowing all the classes that our model returns scores for. 
# Our model returns a score for each label, in this same order.

print(' '.join(class_labels))

BACKGROUND_Google Faces Faces_easy Leopards Motorbikes accordion airplanes anchor ant barrel bass beaver binocular bonsai brain brontosaurus buddha butterfly camera cannon car_side ceiling_fan cellphone chair chandelier cougar_body cougar_face crab crayfish crocodile crocodile_head cup dalmatian dollar_bill dolphin dragonfly electric_guitar elephant emu euphonium ewer ferry flamingo flamingo_head garfield gerenuk gramophone grand_piano hawksbill headphone hedgehog helicopter ibis inline_skate joshua_tree kangaroo ketch lamp laptop llama lobster lotus mandolin mayfly menorah metronome minaret nautilus octopus okapi pagoda panda pigeon pizza platypus pyramid revolver rhino rooster saxophone schooner scissors scorpion sea_horse snoopy soccer_ball stapler starfish stegosaurus stop_sign strawberry sunflower tick trilobite umbrella watch water_lilly wheelchair wild_cat windsor_chair wrench yin_yang


## Using the ONNX Model for inference

Now that you have an ONNX format of your image classifer model downloaded, 
you can use that model, along with the list of class labels (shown above),
to make inferences about images any place that can run ONNX models, including 
offline, in your web browser! 

Want to give that a try? Download your ONNX model, copy the class labels list above,
then visit this URL which will load your model into your web browser (it doesn't upload the model anywhere)
and make inferences on images that you drag and drop onto the page.

## Optional - Using SageMaker to host a deployed version of our model

In [48]:
%%time
# Deploying a model to an endpoint takes a few minutes to complete

assert False # Change this to True to deploy. This takes about 10 minutes to complete this step.

import time
now = str(int(time.time()))

deployed_endpoint = image_classifier.deploy(
    endpoint_name = dataset_name.replace('_', '-') + '-' + now,
    #endpoint_name = "stitches-1573714918",
    #update_endpoint = True,
    initial_instance_count = 1,
    instance_type = 'ml.t2.medium',
)

# image-classification-2019-02-01-06-47-08-571

Using already existing model: image-classification-2019-11-15-03-41-36-403


--------------------------------------------------------------------------------------------------------------------------------------!CPU times: user 691 ms, sys: 24.3 ms, total: 715 ms
Wall time: 11min 16s


### Calling a deployed endpoint from Python code

Once you've deployed to a SakeMaker hosted endpoint, you'll want to pass it some images to see it perform inferences.  Here's how to do this from some python code.

In [None]:
import json
import numpy as np
import os

def classify_deployed(file_name, classes):
    payload = None
    with open(file_name, 'rb') as f:
        payload = f.read()
        payload = bytearray(payload)

    deployed_endpoint.content_type = 'application/x-image'
    result = json.loads(deployed_endpoint.predict(payload))
    print(result)
    best_prob_index = np.argmax(result)
    return (classes[best_prob_index], result[best_prob_index])



In [None]:
from PIL import Image
import requests
from io import BytesIO

image_url="https://some/image/to/download"

# Download an image from a URL
response = requests.get(image_url)
img = Image.open(BytesIO(response.content))

# And resize it
size = 224
new_im = Image.new('RGB', (size, size), (0, 0, 0, 0))
img.thumbnail([size, size])
(w, h) = img.size
new_im.paste(img, (int((size - w) / 2), int((size - h) / 2 )))
new_im.save(tmp_local_path, "JPEG", quality = 95)

# Show it
from IPython.display import Image, display
display(Image(filename=tmp_local_path))

# Classify it
classify_deployed(tmp_local_path, class_labels)

### (Optional) Perform Hyperparameter Tuning

Often, you might not know which values for hyperparameters like `learning_rate` and `mini_batch_size` will yield acceptible results. Traditionally, this meant manually running many training jobs with different hyperparameter values, looking at each trained model's performance, and then picking a winner. 


This type of manual tuning is _very_ time consuming, so you can automate this process using automatic model tuning with SageMaker. Here's some example code to illustrate how to start one of these jobs using the SageMaker Python SDK.

In [93]:
# ---------------

assert False # Change to True to run this

# ---------------
from sagemaker.tuner import HyperparameterTuner, IntegerParameter, CategoricalParameter, ContinuousParameter
hyperparameter_ranges = {
    'optimizer': CategoricalParameter(['sgd', 'adam', 'rmsprop', 'nag']),
     'learning_rate': ContinuousParameter(0.0001, 0.1),
     'mini_batch_size': CategoricalParameter([4, 8, 16, 32]),
     'momentum': ContinuousParameter(0.0, 0.99),                                                                   
     'weight_decay': ContinuousParameter(0.0, 0.99),   
}

objective_metric_name = 'validation:accuracy'

tuner = HyperparameterTuner(
    image_classifier,
    objective_metric_name,
    hyperparameter_ranges,
    early_stopping_type='Off',
    max_jobs=500,
    max_parallel_jobs=2
)


tuner.fit(inputs=data_channels, logs=True, include_cls_metadata=False)



## Cleaning Up

When you're done learning here, you'll likley want to delete the endpoint you deployed (if you did so), since that endpoint will charge you money even if you're not actively making inferences (because you're charged for the total availability time of the endpoint). Here's how to clean up the endpoint.

In [None]:
deployed_endpoint.delete_endpoint()