# Training BYOC on SageMaker
>__Note:__ This assumes a container has already been built and deployed to ECR.

## Prepare the Data

In [1]:
# Load the required libraries
import warnings
import zipfile
import boto3
import os
import urllib.request
import sagemaker
import tempfile
import cv2
import numpy as np
import pandas as pd
import matplotlib.image as mpimg
from sagemaker.estimator import Estimator
from sklearn.model_selection import train_test_split

In [2]:
region = boto3.Session().region_name
sagemaker_session = sagemaker.Session()
sagemaker_client = boto3.client('sagemaker')
bucket = sagemaker.Session().default_bucket()
role = sagemaker.get_execution_role()

## Set Hyperparameters

In [3]:
channels = {'train': 's3://robostig-assets-us-west-2'}
training_instance_type = 'ml.p3.16xlarge'
#training_instance_type = 'ml.m5.24xlarge'
hyperparameters = dict(batch_size=64, learning_rate=.0001, epochs=12, gpu_count=8)
output_location = "s3://{}".format(bucket)

In [4]:
image_name = '500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:gpu'
BYOC_estimator = Estimator(
    image_name,
    role=role,
    output_path=output_location,
    train_instance_count=1,
    train_instance_type=training_instance_type,
    hyperparameters=hyperparameters,
    sagemaker_session=sagemaker_session
)

In [5]:
BYOC_estimator.fit(channels)

INFO:sagemaker:Creating training-job with name: pystig-2018-07-08-03-54-44-356


.....................................
[31mUsing MXNet backend[0m
[31mStarting model training ...[0m
[31mHyper parameters: {'learning_rate': '0.0001', 'epochs': '12', 'gpu_count': '8', 'batch_size': '64'}[0m
[31mInput parameters: {'train': {'TrainingInputMode': 'File', 'S3DistributionType': 'FullyReplicated', 'RecordWrapperType': 'None'}}[0m
[31m_________________________________________________________________[0m
[31mLayer (type)                 Output Shape              Param #   [0m
[31mlambda_1 (Lambda)            (0, 3, 66, 200)           0         [0m
[31m_________________________________________________________________[0m
[31mconv2d_1 (Conv2D)            (0, 16, 17, 50)           3088      [0m
[31m_________________________________________________________________[0m
[31mactivation_1 (Activation)    (0, 16, 17, 50)           0         [0m
[31m_________________________________________________________________[0m
[31mconv2d_2 (Conv2D)            (0, 32, 9, 25) 

[31mEpoch 2/12

  1/113 [..............................] - ETA: 1s - loss: 0.0378
  6/113 [>.............................] - ETA: 1s - loss: 0.0396[0m
[31m 11/113 [=>............................] - ETA: 3s - loss: 0.0394
 12/113 [==>...........................] - ETA: 6s - loss: 0.0385
 13/113 [==>...........................] - ETA: 8s - loss: 0.0386[0m
[31m 14/113 [==>...........................] - ETA: 10s - loss: 0.0380
 15/113 [==>...........................] - ETA: 12s - loss: 0.0373
 16/113 [===>..........................] - ETA: 13s - loss: 0.0377[0m
[31m 17/113 [===>..........................] - ETA: 14s - loss: 0.0380
 18/113 [===>..........................] - ETA: 15s - loss: 0.0376
 19/113 [====>.........................] - ETA: 16s - loss: 0.0381[0m
[31m 20/113 [====>.........................] - ETA: 16s - loss: 0.0379
 21/113 [====>.........................] - ETA: 17s - loss: 0.0383
 22/113 [====>.........................] - ETA: 17s - loss: 0.0380[0m
[31m 23/1

[31mEpoch 3/12

  1/113 [..............................] - ETA: 1s - loss: 0.0401
  5/113 [>.............................] - ETA: 1s - loss: 0.0346
 10/113 [=>............................] - ETA: 1s - loss: 0.0296[0m
[31m 11/113 [=>............................] - ETA: 3s - loss: 0.0303
 12/113 [==>...........................] - ETA: 6s - loss: 0.0302
 13/113 [==>...........................] - ETA: 8s - loss: 0.0304[0m
[31m 14/113 [==>...........................] - ETA: 10s - loss: 0.0306
 15/113 [==>...........................] - ETA: 12s - loss: 0.0304
 16/113 [===>..........................] - ETA: 13s - loss: 0.0306[0m
[31m 17/113 [===>..........................] - ETA: 14s - loss: 0.0305
 18/113 [===>..........................] - ETA: 15s - loss: 0.0306[0m
[31m 19/113 [====>.........................] - ETA: 16s - loss: 0.0306
 20/113 [====>.........................] - ETA: 16s - loss: 0.0311
 21/113 [====>.........................] - ETA: 17s - loss: 0.0319[0m
[31m 22/11

[31m 23/113 [=====>........................] - ETA: 18s - loss: 0.0294
 24/113 [=====>........................] - ETA: 18s - loss: 0.0295
 25/113 [=====>........................] - ETA: 18s - loss: 0.0300[0m
[31m 26/113 [=====>........................] - ETA: 19s - loss: 0.0300
[31mEpoch 5/12

  1/113 [..............................] - ETA: 1s - loss: 0.0264[0m
[31m  6/113 [>.............................] - ETA: 1s - loss: 0.0268
 10/113 [=>............................] - ETA: 1s - loss: 0.0252
 11/113 [=>............................] - ETA: 3s - loss: 0.0258[0m
[31m 12/113 [==>...........................] - ETA: 6s - loss: 0.0262
 13/113 [==>...........................] - ETA: 8s - loss: 0.0266
 14/113 [==>...........................] - ETA: 10s - loss: 0.0268[0m
[31m 15/113 [==>...........................] - ETA: 12s - loss: 0.0265
 16/113 [===>..........................] - ETA: 13s - loss: 0.0268
 17/113 [===>..........................] - ETA: 14s - loss: 0.0272[0m
[31m 

[31mEpoch 6/12

  1/113 [..............................] - ETA: 1s - loss: 0.0224
  5/113 [>.............................] - ETA: 1s - loss: 0.0230
 10/113 [=>............................] - ETA: 1s - loss: 0.0228
 11/113 [=>............................] - ETA: 3s - loss: 0.0228[0m
[31m 12/113 [==>...........................] - ETA: 6s - loss: 0.0228
 13/113 [==>...........................] - ETA: 8s - loss: 0.0229[0m
[31m 14/113 [==>...........................] - ETA: 10s - loss: 0.0225
 15/113 [==>...........................] - ETA: 12s - loss: 0.0232
 16/113 [===>..........................] - ETA: 13s - loss: 0.0231[0m
[31m 17/113 [===>..........................] - ETA: 14s - loss: 0.0232
 18/113 [===>..........................] - ETA: 15s - loss: 0.0233
 19/113 [====>.........................] - ETA: 16s - loss: 0.0233[0m
[31m 20/113 [====>.........................] - ETA: 16s - loss: 0.0234
 21/113 [====>.........................] - ETA: 17s - loss: 0.0237
 22/113 [====>.

[31mEpoch 7/12

  1/113 [..............................] - ETA: 1s - loss: 0.0273
  5/113 [>.............................] - ETA: 1s - loss: 0.0252
  9/113 [=>............................] - ETA: 1s - loss: 0.0229[0m
[31m 11/113 [=>............................] - ETA: 3s - loss: 0.0233
 12/113 [==>...........................] - ETA: 6s - loss: 0.0233
 13/113 [==>...........................] - ETA: 8s - loss: 0.0241[0m
[31m 14/113 [==>...........................] - ETA: 10s - loss: 0.0245
 15/113 [==>...........................] - ETA: 12s - loss: 0.0247
 16/113 [===>..........................] - ETA: 13s - loss: 0.0257[0m
[31m 17/113 [===>..........................] - ETA: 14s - loss: 0.0254
 18/113 [===>..........................] - ETA: 15s - loss: 0.0260
 19/113 [====>.........................] - ETA: 16s - loss: 0.0259[0m
[31m 20/113 [====>.........................] - ETA: 16s - loss: 0.0255
 21/113 [====>.........................] - ETA: 17s - loss: 0.0254[0m
[31m 22/11

[31mEpoch 8/12

  1/113 [..............................] - ETA: 1s - loss: 0.0213
  5/113 [>.............................] - ETA: 1s - loss: 0.0202
 10/113 [=>............................] - ETA: 1s - loss: 0.0193[0m
[31m 11/113 [=>............................] - ETA: 3s - loss: 0.0192
 12/113 [==>...........................] - ETA: 6s - loss: 0.0194[0m
[31m 13/113 [==>...........................] - ETA: 8s - loss: 0.0201
 14/113 [==>...........................] - ETA: 10s - loss: 0.0204
 15/113 [==>...........................] - ETA: 11s - loss: 0.0208[0m
[31m 16/113 [===>..........................] - ETA: 13s - loss: 0.0208
 17/113 [===>..........................] - ETA: 14s - loss: 0.0212
 18/113 [===>..........................] - ETA: 15s - loss: 0.0217[0m
[31m 19/113 [====>.........................] - ETA: 16s - loss: 0.0216
 20/113 [====>.........................] - ETA: 16s - loss: 0.0217
 21/113 [====>.........................] - ETA: 17s - loss: 0.0221[0m
[31m 22/11

[31mEpoch 10/12

  1/113 [..............................] - ETA: 1s - loss: 0.0186
  5/113 [>.............................] - ETA: 1s - loss: 0.0174
  9/113 [=>............................] - ETA: 1s - loss: 0.0164
 11/113 [=>............................] - ETA: 3s - loss: 0.0169[0m
[31m 12/113 [==>...........................] - ETA: 6s - loss: 0.0171
 13/113 [==>...........................] - ETA: 8s - loss: 0.0182[0m
[31m 14/113 [==>...........................] - ETA: 10s - loss: 0.0186
 15/113 [==>...........................] - ETA: 12s - loss: 0.0187
 16/113 [===>..........................] - ETA: 13s - loss: 0.0191[0m
[31m 17/113 [===>..........................] - ETA: 14s - loss: 0.0193
 18/113 [===>..........................] - ETA: 15s - loss: 0.0205
 19/113 [====>.........................] - ETA: 16s - loss: 0.0205[0m
[31m 20/113 [====>.........................] - ETA: 16s - loss: 0.0203
 21/113 [====>.........................] - ETA: 17s - loss: 0.0203
 22/113 [====>

[31mEpoch 11/12

  1/113 [..............................] - ETA: 1s - loss: 0.0302
  5/113 [>.............................] - ETA: 1s - loss: 0.0295[0m
[31m  9/113 [=>............................] - ETA: 1s - loss: 0.0286
 11/113 [=>............................] - ETA: 3s - loss: 0.0280
 12/113 [==>...........................] - ETA: 6s - loss: 0.0278[0m
[31m 13/113 [==>...........................] - ETA: 8s - loss: 0.0283
 14/113 [==>...........................] - ETA: 10s - loss: 0.0282
 15/113 [==>...........................] - ETA: 12s - loss: 0.0275[0m
[31m 16/113 [===>..........................] - ETA: 13s - loss: 0.0272
 17/113 [===>..........................] - ETA: 14s - loss: 0.0274[0m
[31m 18/113 [===>..........................] - ETA: 15s - loss: 0.0275
 19/113 [====>.........................] - ETA: 16s - loss: 0.0274
 20/113 [====>.........................] - ETA: 16s - loss: 0.0275[0m
[31m 21/113 [====>.........................] - ETA: 17s - loss: 0.0276
 22/1

[31mEpoch 12/12

  1/113 [..............................] - ETA: 1s - loss: 0.0258
  6/113 [>.............................] - ETA: 1s - loss: 0.0238
 11/113 [=>............................] - ETA: 3s - loss: 0.0226[0m
[31m 12/113 [==>...........................] - ETA: 6s - loss: 0.0229
 13/113 [==>...........................] - ETA: 8s - loss: 0.0230
 14/113 [==>...........................] - ETA: 10s - loss: 0.0233[0m
[31m 15/113 [==>...........................] - ETA: 11s - loss: 0.0236
 16/113 [===>..........................] - ETA: 13s - loss: 0.0239
 17/113 [===>..........................] - ETA: 14s - loss: 0.0240[0m
[31m 18/113 [===>..........................] - ETA: 15s - loss: 0.0239
 19/113 [====>.........................] - ETA: 16s - loss: 0.0241
 20/113 [====>.........................] - ETA: 16s - loss: 0.0243[0m
[31m 21/113 [====>.........................] - ETA: 17s - loss: 0.0238
 22/113 [====>.........................] - ETA: 17s - loss: 0.0237[0m
[31m 23/

[31mSaving the trained model ...[0m
===== Job Complete =====
Billable seconds: 2208


## Training Job Description

In [6]:
response = sagemaker_client.describe_training_job(
    TrainingJobName='pystig-2018-07-08-03-54-44-356'
)
response

{'TrainingJobName': 'pystig-2018-07-08-03-54-44-356',
 'TrainingJobArn': 'arn:aws:sagemaker:us-west-2:500842391574:training-job/pystig-2018-07-08-03-54-44-356',
 'ModelArtifacts': {'S3ModelArtifacts': 's3://sagemaker-us-west-2-500842391574/pystig-2018-07-08-03-54-44-356/output/model.tar.gz'},
 'TrainingJobStatus': 'Completed',
 'SecondaryStatus': 'Completed',
 'HyperParameters': {'batch_size': '64',
  'epochs': '12',
  'gpu_count': '8',
  'learning_rate': '0.0001'},
 'AlgorithmSpecification': {'TrainingImage': '500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:gpu',
  'TrainingInputMode': 'File'},
 'RoleArn': 'arn:aws:iam::500842391574:role/SageMaker',
 'InputDataConfig': [{'ChannelName': 'train',
   'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',
     'S3Uri': 's3://robostig-assets-us-west-2',
     'S3DataDistributionType': 'FullyReplicated'}},
   'CompressionType': 'None',
   'RecordWrapperType': 'None'}],
 'OutputDataConfig': {'KmsKeyId': '',
  'S3OutputPath': 's3://sage

---
## Deploy model - Standard
__Use `estimator.deploy()` based on GPU Container training__
>__Note:__ This is not cost effective.

```
predictor = BYOC_estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')
```

---
## Deploy Model - CPU Container
__Use separate CPU container and the `sagemaker.Session()` API to specify a different serving container__

### Step 1: Create a new model from the training job, specifying a different container for training, in this case a CPU-based container.

In [7]:
job_name = 'pystig-2018-07-08-03-54-44-356'
BYOC_model = sagemaker_session.create_model_from_job(
    name = job_name.split('-')[0]+'-model',
    training_job_name=job_name,
    role=role,
    primary_container_image='500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:cpu',
    model_data_url='s3:/{}/{}/output/model.tar.gz'.format(bucket, job_name)
)

INFO:sagemaker:Creating model with name: pystig-model


ClientError: An error occurred (ValidationException) when calling the CreateModel operation: 1 validation error detected: Value 's3:/sagemaker-us-west-2-500842391574/pystig-2018-07-08-03-54-44-356/output/model.tar.gz' at 'primaryContainer.modelDataUrl' failed to satisfy constraint: Member must satisfy regular expression pattern: ^(https|s3)://([^/]+)/?(.*)$

### Step 2: Create a SageMaker Endpoint Configuration

In [None]:
BYOC_endpoint_config_name = sagemaker_session.create_endpoint_config(
    name=job_name.split('-')[0]+'-endpoint-config',
    model_name=BYOC_model,
    initial_instance_count=1,
    instance_type='ml.c4.xlarge'
)

### Step 3: Deploy the SageMaker Endpoint

In [None]:
create_endpoint_response = sagemaker_session.create_endpoint(
    endpoint_name=job_name.split('-')[0]+'-endpoint',
    config_name=str(BYOC_endpoint_config_name)
)

In [None]:
sagemaker_client.describe_endpoint(EndpointName=create_endpoint_response)

---
## Test Endpoint (Simuilate pyStig)
### Get Sample Data for predictions

In [None]:
# Helper functions
def download(url):
    """
    Helper function to download individual file from given url.
    
    Arguments:
    url -- full URL of the file to download
    
    Returns:
    filename -- downloaded file name
    """
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)
    return filename

# To download and extract Sample Data
file = download('https://d17h27t6h515a5.cloudfront.net/topher/2016/December/584f6edd_data/data.zip')

# Extract the file
with zipfile.ZipFile(file) as zf:
    zf.extractall()
    
# Image Transofmrations
def crop(image):
    """
    Crop the image (removing the sky at the top and the car front at the bottom).
    
    Returns:
    Cropped image.
    """
    return image[60:-25, :, :]

def resize(image):
    """
    Resize the image to the input shape used by the network model.
    
    Returns:
    Resized image.
    """
    return cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT), cv2.INTER_AREA)

def rgb2yuv(image):
    """
    Convert the image from RGB to YUV.
    
    Returns:
    YUV image.
    """
    return cv2.cvtColor(image, cv2.COLOR_RGB2YUV)

def load(data_dir, image_file):
    """
    Load RGB images from a file
    """
    return mpimg.imread(os.path.join(data_dir, image_file.strip()))

def transform(image):
    """
    Combine all preprocess functions into one
    """
    image = crop(image)
    image = resize(image)
    image = rgb2yuv(image)
    return image

### Random Sample Image

In [None]:
# Origional 'left' image
IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS = 66, 200, 3
INPUT_SHAPE = (IMAGE_CHANNELS, IMAGE_HEIGHT, IMAGE_WIDTH)
data_df = pd.read_csv('./data/driving_log.csv')
X = data_df[['center', 'left', 'right']].values
y = data_df['steering'].values
random_image = X[100][0]
img = load('data', random_image)

In [None]:
img.shape

In [None]:
img = img.reshape(img.shape[2], img.shape[0], img.shape[1])

In [None]:
img.shape

In [None]:
normalize = lambda x: x/127.55 -1
print(normalize(img).shape)

In [None]:
# Simulate pyStig call by first pre-preocessing image and converting to 4D array
endpoint_name = sagemaker_client.describe_endpoint(EndpointName=create_endpoint_response)['EndpointName']
payload = np.array([transform(img)])

In [None]:
runtime_client = boto3.client('sagemaker-runtime')
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=json.dumps(payload.tolist())
)
prediction = float(json.loads(response['Body'].read().decode('utf-8'))[0])

In [None]:
prediction

In [None]:
y[100]