# Keypoint Assessment

## Simple Breakdown

-  Data exploration and processing
-  Build toy models and train on local env
-  Training and hyperparameter tuning on AWS Sagemaker
-  Save the model and write inference script 

## Data exploration and processing

For details in terms of data analysis, processing, and architecture choice, please see Selected_Architecture document. 

Data inspection: 

	-Image rotation is less than +/- 3 degrees
	-Image scaling is less than +/- 3% on both axes
	-Image capture offsets are less than +/- 60 px (off the image average) on both axes

Small dataset in danger of overfitting. Dropout layer and data augmentation should help there. 

## Build toy models and train on local env

-  A very basic CNN should be good enough

In [4]:
withDropout = False
conv2D_1 = 32
conv2D_2 = 32
conv2D_3 = 0
dense_1 = 10
dense_2 = 10
dense_3 = 0

def SimpleCNN(withDropout=withDropout):
    """
    WithDropout: If True, then dropout regularlization is added.
    This feature is experimented later.
    """
    model = Sequential()
    model.add(Conv2D(conv2D_1, (3, 3), input_shape = (HEIGHT, WIDTH, 1)))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size = (2,2)))
    if withDropout:
        model.add(Dropout(0.1))

    if conv2D_2 > 0:
        model.add(Conv2D(conv2D_2,(2,2)))
        model.add(Activation("relu"))
        model.add(MaxPooling2D(pool_size = (2,2)))
        if withDropout:
            model.add(Dropout(0.1))
    
    if conv2D_3 > 0:
        model.add(Conv2D(conv2D_3,(2,2)))
        model.add(Activation("relu"))
        model.add(MaxPooling2D(pool_size = (2,2)))
        if withDropout:
            model.add(Dropout(0.1))
    
    model.add(Flatten())

    model.add(Dense(dense_1))
    model.add(Activation("relu"))
    if withDropout:
        model.add(Dropout(0.1))
    
    if dense_2 > 0:
        model.add(Dense(dense_2))
        model.add(Activation("relu"))
        if withDropout:
            model.add(Dropout(0.1))
    
    if dense_3 > 0:
        model.add(Dense(dense_3))
        model.add(Activation("relu"))
        if withDropout:
            model.add(Dropout(0.1))

    model.add(Dense(6))
    sgd = SGD(lr=0.01, momentum = 0.9, nesterov=True)
    if gpu_count > 1:
        parallel_model = multi_gpu_model(model, gpus=gpu_count)
    else:
        parallel_model = model
    parallel_model.compile(loss="mean_squared_error", optimizer=sgd)
    
    return parallel_model, model

-  The CNN with just 120 images to train (20% is for test) overfits. The loss for training set is a lot higher than the loss for the test set

## Data augmentation

-  Shifting pictures randomly at every batch created more training data seems sufficient

In [5]:
HEIGHT = 688
WIDTH = 1032

class DataModifier(object):
    def fit(self,X_,y_):
        return(NotImplementedError)

class ShiftPic(DataModifier):
    def __init__(self, prop=0.1):
        self.prop = prop

    def fit(self, X, y):
        X, y = self.shift_image(X , y, prop=self.prop)
        return X, y

    def random_shift(self, shift_range, n=None):
        """
        :param shift_range:
        The maximum number of columns/rows to shift
        :return:
        keep(0):   minimum row/column index to keep
        keep(1):   maximum row/column index to keep
        assign(0): minimum row/column index to assign
        assign(1): maximum row/column index to assign
        shift:     amount to shift the keypoint

        assign(1) - assign(0) == keep(1) - keep(0)
        """
        shift = np.random.randint(-shift_range, shift_range)

        def shift_left(n, shift):
            shift = np.abs(shift)
            return (0, n - shift)
        
        def shift_right(n, shift):
            shift = np.abs(shift)
            return (shift, n)

        if shift < 0:
            keep = shift_left(n, shift)
            assign = shift_right(n, shift)
        else:
            assign = shift_left(n, shift)
            keep = shift_right(n, shift)

        return keep, assign, shift

    def shift_single_image(self, x_, y_, prop=0.1):
        """
        :param x_: a single picture array (HEIGHT, WIDTH, 1)
        :param y_: keypoint locations flatten (1, 6)
                    [0::2] contains x axis values
                    [1::2] contains y axis values
        :param prop: proportion of random horizontal and vertical shift
                        relative to the number of columns
                        e.g. prop = 0.1 then the picture is moved at least by
                        0.1*1032 = 103 columns/rows
        :return:
        x_, y_
        """
        w_shift_max = int(x_.shape[0] * prop)
        h_shift_max = int(x_.shape[1] * prop)

        w_keep, w_assign, w_shift = self.random_shift(w_shift_max, HEIGHT)
        h_keep, h_assign, h_shift = self.random_shift(h_shift_max, WIDTH)

        x_[w_assign[0]:w_assign[1],
            h_assign[0]:h_assign[1], :] = x_[w_keep[0]:w_keep[1],
                                            h_keep[0]:h_keep[1], :]

        y_[:, 0::2] = y_[:, 0::2] - h_shift/float(x_.shape[0]/2.)
        y_[:, 1::2] = y_[:, 1::2] - w_shift/float(x_.shape[1]/2.)

        return x_, y_

    def shift_image(self, X, y, prop=0.1):
        for irow in range(X.shape[0]):
            x_ = X[irow]
            y_ = y[irow].reshape((1, 6))
            X[irow], y[irow] = self.shift_single_image(x_, y_, prop=prop)
        return X, y

## Train and hyperparameter tuning on Amazon Sagemaker

-  Train the CNN with augmented data using Amazon Sagemaker
-  Trained in under 5 minutes 

In [None]:
estimator = sagemaker.estimator.Estimator(
                       image_name=image_name,
                       base_job_name=base_job_name,
                       role=role, 
                       train_instance_count=1, 
                       train_instance_type='ml.p3.8xlarge',
                       output_path=output_path,
                       sagemaker_session=sess)

estimator.set_hyperparameters(epochs=290, gpu_count=4, batch_size=32, conv2D_3=0, dense_3=0)

from sagemaker.tuner import HyperparameterTuner, IntegerParameter

hyperparameter_ranges = {'conv2D_1': IntegerParameter(16, 64), 'conv2D_2': IntegerParameter(16, 64), 'dense_1': IntegerParameter(10, 30), 'dense_2': IntegerParameter(10, 30)}
objective_metric_name = 'score'
metric_definitions = [{'Name': 'score', 'Regex': 'score: ([0-9\\.]+)'}]

tuner = HyperparameterTuner(estimator,
                        objective_metric_name,
                        hyperparameter_ranges,
                        metric_definitions,
                        max_jobs=8,
                        max_parallel_jobs=2)

tuner.fit({'training': train_input_path, 'validation': validation_input_path})

In [8]:
"""
conv2D_1, conv2D_2, conv2D_3, dense_1, dense_2, dense_3 are parameters of the CNN
""" 
# Best hyperparameters:

print({'conv2D_2': '32', 'dense_2': '20', 'batch_size': '32', 'conv2D_1': '32', 'conv2D_3': '0', 'epochs': '300', 
 'gpu_count': '4', 'dense_3': '0', 'dense_1': '20'})

print({
    "training_loss": 0.00033,
    "eval_loss": 0.00046
})

{'conv2D_2': '32', 'dense_2': '20', 'batch_size': '32', 'conv2D_1': '32', 'conv2D_3': '0', 'epochs': '300', 'gpu_count': '4', 'dense_3': '0', 'dense_1': '20'}
{'training_loss': 0.00033, 'eval_loss': 0.00046}
