In [1]:
import time
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner
from sagemaker.model import Model
from sagemaker.predictor import RealTimePredictor
import json
import numpy as np

Configure Built In Image Classification Algorithm


 Configure Hyperparameters

In [2]:
#Number of output classes
num_classes = 2

# Number of training samples in the training set # number obtained from train.lst
num_training_samples = 194266

# Number of layers for underlying neural network
num_layers = 18

# Batch size for training  # How many images are provided to the model for training
mini_batch_size = 128

#Input image shape for the training data
image_shape = '3,50,50'

# Augmentation type #i have an unbalanced dataset 
augmentation_type = 'crop_color_transform'

#Number of epoch
epochs = 5

# Learning Rate
learning_rate =0.01

# Enable Transfer Learning
use_pretrained_model = 1

Create a Unique Job Name

In [3]:
job_name_prefix = 'breast-cancer-detection'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = job_name_prefix + timestamp

Specify the input path for the job

In [4]:
bucket = 'sagemaker-gwu-capstone-2019'
input_prefix = 'breast-cancer-detection/input/recordio'
input_train = 's3://{}/{}/train/'.format(bucket, input_prefix)
input_test = 's3://{}/{}/test/'.format(bucket, input_prefix)

Specify the output path for the job

In [5]:
output_prefix = 'breast-cancer-detection/output'
output_path = 's3://{}/{}/'.format(bucket, output_prefix)

 Configure training instances

In [6]:
instance_count = 1
instance_type = 'ml.p2.xlarge'
volume_size_gb = 50


Execution role and training image URI for Image Classification

In [7]:
role = get_execution_role()
training_image = get_image_uri(boto3.Session().region_name, 'image-classification')

Configure train timeout

In [8]:
train_timeout = 360000

create a sagemaker estimator

In [9]:
sagemaker_session = sagemaker.Session()
estimator = sagemaker.estimator.Estimator(training_image, 
                                          role, 
                                          train_instance_count=instance_count,
                                          train_instance_type=instance_type,
                                          train_volume_size=volume_size_gb,
                                          train_max_run=train_timeout,
                                          output_path=output_path, 
                                          sagemaker_session=sagemaker_session,
                                          input_mode='Pipe')

In [10]:
estimator.set_hyperparameters(num_classes=num_classes,
                              num_training_samples=num_training_samples,
                              num_layers=num_layers,
                              mini_batch_size=mini_batch_size,
                              image_shape=image_shape,
                              augmentation_type=augmentation_type,
                              epochs=epochs,
                              learning_rate=learning_rate,
                              use_pretrained_model=use_pretrained_model)

Create a training job

In [11]:
s3_input_train = sagemaker.s3_input(s3_data=input_train, content_type='application/x-recordio')
s3_input_validation = sagemaker.s3_input(s3_data=input_test, content_type='application/x-recordio')

In [12]:
estimator.fit({
    'train': s3_input_train,
    'validation': s3_input_validation
}, job_name=job_name)

2019-11-23 20:30:29 Starting - Starting the training job...
2019-11-23 20:30:51 Starting - Launching requested ML instances......
2019-11-23 20:31:54 Starting - Preparing the instances for training............
2019-11-23 20:33:37 Downloading - Downloading input data...
2019-11-23 20:34:10 Training - Downloading the training image......
2019-11-23 20:35:15 Training - Training image download completed. Training in progress.[31mDocker entrypoint called with argument(s): train[0m
[31m[11/23/2019 20:35:18 INFO 139637762905920] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/image_classification/default-input.json: {u'beta_1': 0.9, u'gamma': 0.9, u'beta_2': 0.999, u'optimizer': u'sgd', u'use_pretrained_model': 0, u'eps': 1e-08, u'epochs': 30, u'lr_scheduler_factor': 0.1, u'num_layers': 152, u'image_shape': u'3,224,224', u'precision_dtype': u'float32', u'mini_batch_size': 32, u'weight_decay': 0.0001, u'learning_rate': 0.1, u'momentum': 0}[0m
[31m[11/23/2019 20:

Creating a tuning job

Defining tuning configuration

In [14]:
hyperparameter_ranges = {
    'learning_rate': ContinuousParameter(0.001, 1.0),
    'mini_batch_size': IntegerParameter(64, 128),
    'optimizer': CategoricalParameter(['sgd', 'adam'])
}

objective_metric_name = 'validation:accuracy'
objective_type='Maximize'
max_jobs=2
max_parallel_jobs=2

Create a unique job name

In [15]:
job_name_prefix = 'bcd-tuning'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = job_name_prefix + timestamp

Creating a hyperparameter tuner

In [16]:
tuner = HyperparameterTuner(estimator=estimator, 
                            objective_metric_name=objective_metric_name, 
                            hyperparameter_ranges=hyperparameter_ranges,
                            objective_type=objective_type, 
                            max_jobs=max_jobs, 
                            max_parallel_jobs=max_parallel_jobs)

Launch the tuning job

In [17]:
tuner.fit({
    'train': s3_input_train,
    'validation': s3_input_validation
}, job_name=job_name)
tuner.wait()

......................................................................................................................................................................................................................................................................................................!


Deploying the best model found by the tuning job

 Get the execution role and the hosting image URI for Image Classification

In [18]:
role = get_execution_role()
hosting_image = get_image_uri(boto3.Session().region_name, 'image-classification')

Configure hosting instances

In [19]:
instance_count = 1
instance_type = 'ml.m4.xlarge'

Create a unique model name

In [20]:
model_name_prefix = 'bcd-image-sdk'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
model_name = model_name_prefix + timestamp

Create a Model object

In [21]:
model_artifacts_s3_path = 's3://sagemaker-gwu-capstone-2019/breast-cancer-detection/output/bcd-tuning-2019-11-12-13-27-23-002-c75e5348/output/model.tar.gz'
model = Model(
    name=model_name,
    model_data=model_artifacts_s3_path,
    image=hosting_image,
    role=role,
    predictor_cls=lambda endpoint_name, sagemaker_session: RealTimePredictor(endpoint_name, sagemaker_session)
)

Create a unique endpoint name

In [22]:
endpoint_name_prefix = 'breast-cancer-detection-sdk-ep'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
endpoint_name = endpoint_name_prefix + timestamp

Create a model, an endpoint configuration and an endpoint

In [23]:
predictor = model.deploy(
    endpoint_name=endpoint_name,
    initial_instance_count=instance_count,
    instance_type=instance_type
)

---------------------------------------------------------------------------------------------------------------------------!

Testing the deployed model

In [24]:
def predict_breast_cancer(image_path):
    with open(image_path, 'rb') as f:
        payload = f.read()
        payload = bytearray(payload)
    response = predictor.predict(payload)
    result = json.loads(response)
    print('Probabilities for all classes: ', result)
    predicted_class = np.argmax(result)
    if predicted_class == 0:
        print('Breast cancer not detected')
    else:
        print('Breast cancer detected')

In [30]:
predict_breast_cancer('images/0/10275_idx5_x351_y851_class0.png')

Probabilities for all classes:  [0.8033275604248047, 0.19667242467403412]
Breast cancer not detected


In [29]:
predict_breast_cancer('images/1/10275_idx5_x951_y751_class1.png')

Probabilities for all classes:  [0.01271785143762827, 0.9872822165489197]
Breast cancer detected


Deleting endpoint

In [79]:
sagemaker.Session().delete_endpoint(predictor.endpoint)