# Training BYOC on SageMaker
>__Note:__ This assumes a container has already been built and deployed to ECR.

## Prepare the Data

In [1]:
# Load the required libraries
import warnings
import zipfile
import boto3
import os
import json
import urllib.request
import sagemaker
import tempfile
import cv2
import numpy as np
import pandas as pd
import matplotlib.image as mpimg
from sagemaker.estimator import Estimator
from sklearn.model_selection import train_test_split

In [2]:
# Configure SageMaker parameters
region = boto3.Session().region_name
sagemaker_session = sagemaker.Session()
sagemaker_client = boto3.client('sagemaker')
bucket = sagemaker.Session().default_bucket()
role = sagemaker.get_execution_role()

## Set Hyperparameters
>__Note:__ Hyperparameters to be supplied by instructor are:
1. `data_bucket`: s3://sagemaker-workshop-500842391574-us-west-2/data/
2. `training_image`: 500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:keras-gpu
3. `hosting_image`: 500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:keras-cpu

In [4]:
# Configure the hyperparameters from the instructor
data_bucket = 's3://sagemaker-workshop-test-us-west-2'
training_image = '500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:keras-gpu'
hosting_image = '500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:keras-cpu'

# Training data channel
channels = {'train': data_bucket}

# Optmized training parameters
hyperparameters = dict(batch_size=32, learning_rate=.0001, epochs=12)

# Output of trained model
output_location = "s3://{}".format(bucket)

# SageMaker estimator
BYOC_estimator = Estimator(
    training_image,
    role=role,
    output_path=output_location,
    train_instance_count=1,
    train_instance_type='ml.p3.2xlarge',
    hyperparameters=hyperparameters,
    sagemaker_session=sagemaker_session
)

# Start training
BYOC_estimator.fit(channels)

INFO:sagemaker:Creating training-job with name: pystig-2018-07-26-16-56-52-848


................................
[31mUsing TensorFlow backend.[0m
[31mcreating SageMaker trainer environment:[0m
[31mTrainerEnvironment(input_dir='/opt/ml/input', input_config_dir='/opt/ml/input/config', model_dir='/opt/ml/model', output_dir='/opt/ml/output', hyperparameters={'epochs': '12', 'learning_rate': '0.0001', 'batch_size': '32'}, resource_config={'current_host': 'algo-1', 'network_interface_name': 'ethwe', 'hosts': ['algo-1']}, input_data_config={'train': {'TrainingInputMode': 'File', 'RecordWrapperType': 'None', 'S3DistributionType': 'FullyReplicated'}}, output_data_dir='/opt/ml/output/data', hosts=['algo-1'], channel_dirs={'train': '/opt/ml/input/data/train'}, current_host='algo-1', available_gpus=1, available_cpus=8)[0m
[31mStarting model training ...
[0m
[31mcomma.ai Model Summary
[0m
[31m_________________________________________________________________[0m
[31mLayer (type)                 Output Shape              Param #   [0m
[31mlambda_1 (Lambda)         



[31mEpoch 2/12

  1/226 [..............................] - ETA: 2s - loss: 0.0523
  8/226 [>.............................] - ETA: 1s - loss: 0.0378
 11/226 [>.............................] - ETA: 4s - loss: 0.0341
 12/226 [>.............................] - ETA: 7s - loss: 0.0350
 13/226 [>.............................] - ETA: 9s - loss: 0.0353
 14/226 [>.............................] - ETA: 11s - loss: 0.0362[0m
[31m 15/226 [>.............................] - ETA: 13s - loss: 0.0359
 16/226 [=>............................] - ETA: 14s - loss: 0.0370
 17/226 [=>............................] - ETA: 15s - loss: 0.0373
 18/226 [=>............................] - ETA: 16s - loss: 0.0382
 19/226 [=>............................] - ETA: 17s - loss: 0.0374[0m
[31m 20/226 [=>............................] - ETA: 18s - loss: 0.0383
 21/226 [=>............................] - ETA: 19s - loss: 0.0376
 22/226 [=>............................] - ETA: 20s - loss: 0.0375
 23/226 [==>....................

[31mEpoch 3/12

  1/226 [..............................] - ETA: 2s - loss: 0.0417
  7/226 [..............................] - ETA: 1s - loss: 0.0325
 11/226 [>.............................] - ETA: 4s - loss: 0.0317
 12/226 [>.............................] - ETA: 7s - loss: 0.0322[0m
[31m 13/226 [>.............................] - ETA: 9s - loss: 0.0320
 14/226 [>.............................] - ETA: 11s - loss: 0.0340
 15/226 [>.............................] - ETA: 13s - loss: 0.0339
 16/226 [=>............................] - ETA: 14s - loss: 0.0337
 17/226 [=>............................] - ETA: 15s - loss: 0.0344[0m
[31m 18/226 [=>............................] - ETA: 16s - loss: 0.0359
 19/226 [=>............................] - ETA: 17s - loss: 0.0369
 20/226 [=>............................] - ETA: 18s - loss: 0.0366
 21/226 [=>............................] - ETA: 19s - loss: 0.0364
 22/226 [=>............................] - ETA: 20s - loss: 0.0376
 23/226 [==>....................



[31mEpoch 4/12

  1/226 [..............................] - ETA: 1s - loss: 0.0306
  8/226 [>.............................] - ETA: 1s - loss: 0.0261[0m
[31m 11/226 [>.............................] - ETA: 4s - loss: 0.0263
 12/226 [>.............................] - ETA: 7s - loss: 0.0283
 13/226 [>.............................] - ETA: 9s - loss: 0.0298
 14/226 [>.............................] - ETA: 11s - loss: 0.0306
 15/226 [>.............................] - ETA: 13s - loss: 0.0313
 16/226 [=>............................] - ETA: 14s - loss: 0.0326[0m
[31m 17/226 [=>............................] - ETA: 15s - loss: 0.0333
 18/226 [=>............................] - ETA: 16s - loss: 0.0339
 19/226 [=>............................] - ETA: 17s - loss: 0.0349
 20/226 [=>............................] - ETA: 18s - loss: 0.0347
 21/226 [=>............................] - ETA: 19s - loss: 0.0347[0m
[31m 22/226 [=>............................] - ETA: 20s - loss: 0.0342
 23/226 [==>...........

[31mEpoch 5/12

  1/226 [..............................] - ETA: 2s - loss: 0.0149
  8/226 [>.............................] - ETA: 1s - loss: 0.0224
 11/226 [>.............................] - ETA: 4s - loss: 0.0241
 12/226 [>.............................] - ETA: 7s - loss: 0.0245
 13/226 [>.............................] - ETA: 9s - loss: 0.0242
 14/226 [>.............................] - ETA: 11s - loss: 0.0252[0m
[31m 15/226 [>.............................] - ETA: 13s - loss: 0.0250
 16/226 [=>............................] - ETA: 14s - loss: 0.0259
 17/226 [=>............................] - ETA: 15s - loss: 0.0264
 18/226 [=>............................] - ETA: 16s - loss: 0.0265
 19/226 [=>............................] - ETA: 17s - loss: 0.0267[0m
[31m 20/226 [=>............................] - ETA: 18s - loss: 0.0276
 21/226 [=>............................] - ETA: 19s - loss: 0.0283
 22/226 [=>............................] - ETA: 20s - loss: 0.0289
 23/226 [==>....................



[31mEpoch 6/12

  1/226 [..............................] - ETA: 2s - loss: 0.0306
  8/226 [>.............................] - ETA: 1s - loss: 0.0225
 11/226 [>.............................] - ETA: 4s - loss: 0.0235[0m
[31m 12/226 [>.............................] - ETA: 7s - loss: 0.0242
 13/226 [>.............................] - ETA: 9s - loss: 0.0247
 14/226 [>.............................] - ETA: 11s - loss: 0.0260
 15/226 [>.............................] - ETA: 13s - loss: 0.0255
 16/226 [=>............................] - ETA: 14s - loss: 0.0262
 17/226 [=>............................] - ETA: 15s - loss: 0.0266[0m
[31m 18/226 [=>............................] - ETA: 17s - loss: 0.0271
 19/226 [=>............................] - ETA: 17s - loss: 0.0270
 20/226 [=>............................] - ETA: 18s - loss: 0.0285
 21/226 [=>............................] - ETA: 19s - loss: 0.0288
 22/226 [=>............................] - ETA: 20s - loss: 0.0286[0m
[31m 23/226 [==>...........

[31mEpoch 7/12

  1/226 [..............................] - ETA: 2s - loss: 0.0310
  7/226 [..............................] - ETA: 1s - loss: 0.0182[0m
[31m 11/226 [>.............................] - ETA: 4s - loss: 0.0178
 12/226 [>.............................] - ETA: 6s - loss: 0.0185
 13/226 [>.............................] - ETA: 9s - loss: 0.0218
 14/226 [>.............................] - ETA: 11s - loss: 0.0230
 15/226 [>.............................] - ETA: 12s - loss: 0.0243
 16/226 [=>............................] - ETA: 14s - loss: 0.0253[0m
[31m 17/226 [=>............................] - ETA: 15s - loss: 0.0261
 18/226 [=>............................] - ETA: 16s - loss: 0.0259
 19/226 [=>............................] - ETA: 17s - loss: 0.0275
 20/226 [=>............................] - ETA: 18s - loss: 0.0281
 21/226 [=>............................] - ETA: 19s - loss: 0.0292[0m
[31m 22/226 [=>............................] - ETA: 19s - loss: 0.0286
 23/226 [==>...........



[31mEpoch 8/12

  1/226 [..............................] - ETA: 2s - loss: 0.0318[0m
[31m  7/226 [..............................] - ETA: 1s - loss: 0.0323
 11/226 [>.............................] - ETA: 4s - loss: 0.0284
 12/226 [>.............................] - ETA: 6s - loss: 0.0290
 13/226 [>.............................] - ETA: 9s - loss: 0.0290
 14/226 [>.............................] - ETA: 11s - loss: 0.0287[0m
[31m 15/226 [>.............................] - ETA: 13s - loss: 0.0297
 16/226 [=>............................] - ETA: 14s - loss: 0.0306
 17/226 [=>............................] - ETA: 15s - loss: 0.0312
 18/226 [=>............................] - ETA: 16s - loss: 0.0309
 19/226 [=>............................] - ETA: 17s - loss: 0.0308
 20/226 [=>............................] - ETA: 18s - loss: 0.0311[0m
[31m 21/226 [=>............................] - ETA: 19s - loss: 0.0314
 22/226 [=>............................] - ETA: 20s - loss: 0.0321
 23/226 [==>...........

[31mEpoch 9/12

  1/226 [..............................] - ETA: 2s - loss: 0.0196
  8/226 [>.............................] - ETA: 1s - loss: 0.0212
 11/226 [>.............................] - ETA: 4s - loss: 0.0234
 12/226 [>.............................] - ETA: 6s - loss: 0.0239[0m
[31m 13/226 [>.............................] - ETA: 9s - loss: 0.0256
 14/226 [>.............................] - ETA: 11s - loss: 0.0251
 15/226 [>.............................] - ETA: 12s - loss: 0.0252
 16/226 [=>............................] - ETA: 14s - loss: 0.0253
 17/226 [=>............................] - ETA: 15s - loss: 0.0267
 18/226 [=>............................] - ETA: 16s - loss: 0.0277[0m
[31m 19/226 [=>............................] - ETA: 17s - loss: 0.0279
 20/226 [=>............................] - ETA: 18s - loss: 0.0277
 21/226 [=>............................] - ETA: 19s - loss: 0.0277
 22/226 [=>............................] - ETA: 20s - loss: 0.0277
 23/226 [==>....................



[31mEpoch 10/12

  1/226 [..............................] - ETA: 2s - loss: 0.0416
  7/226 [..............................] - ETA: 1s - loss: 0.0331[0m
[31m 11/226 [>.............................] - ETA: 4s - loss: 0.0279
 12/226 [>.............................] - ETA: 7s - loss: 0.0290
 13/226 [>.............................] - ETA: 9s - loss: 0.0310
 14/226 [>.............................] - ETA: 11s - loss: 0.0310
 15/226 [>.............................] - ETA: 12s - loss: 0.0309[0m
[31m 16/226 [=>............................] - ETA: 14s - loss: 0.0303
 17/226 [=>............................] - ETA: 15s - loss: 0.0303
 18/226 [=>............................] - ETA: 16s - loss: 0.0305
 19/226 [=>............................] - ETA: 17s - loss: 0.0303
 20/226 [=>............................] - ETA: 18s - loss: 0.0304
 21/226 [=>............................] - ETA: 19s - loss: 0.0315[0m
[31m 22/226 [=>............................] - ETA: 20s - loss: 0.0316
 23/226 [==>..........

[31mEpoch 11/12

  1/226 [..............................] - ETA: 2s - loss: 0.0251
  7/226 [..............................] - ETA: 1s - loss: 0.0207
 11/226 [>.............................] - ETA: 4s - loss: 0.0201
 12/226 [>.............................] - ETA: 7s - loss: 0.0217
 13/226 [>.............................] - ETA: 9s - loss: 0.0216[0m
[31m 14/226 [>.............................] - ETA: 11s - loss: 0.0232
 15/226 [>.............................] - ETA: 13s - loss: 0.0230
 16/226 [=>............................] - ETA: 14s - loss: 0.0244
 17/226 [=>............................] - ETA: 15s - loss: 0.0244
 18/226 [=>............................] - ETA: 16s - loss: 0.0244
 19/226 [=>............................] - ETA: 17s - loss: 0.0242[0m
[31m 20/226 [=>............................] - ETA: 18s - loss: 0.0247
 21/226 [=>............................] - ETA: 19s - loss: 0.0255
 22/226 [=>............................] - ETA: 20s - loss: 0.0264
 23/226 [==>...................



[31mEpoch 12/12

  1/226 [..............................] - ETA: 2s - loss: 0.0224
  7/226 [..............................] - ETA: 1s - loss: 0.0188
 11/226 [>.............................] - ETA: 4s - loss: 0.0193[0m
[31m 12/226 [>.............................] - ETA: 7s - loss: 0.0213
 13/226 [>.............................] - ETA: 9s - loss: 0.0213
 14/226 [>.............................] - ETA: 11s - loss: 0.0210
 15/226 [>.............................] - ETA: 13s - loss: 0.0217
 16/226 [=>............................] - ETA: 14s - loss: 0.0224
 17/226 [=>............................] - ETA: 16s - loss: 0.0223[0m
[31m 18/226 [=>............................] - ETA: 17s - loss: 0.0223
 19/226 [=>............................] - ETA: 17s - loss: 0.0231
 20/226 [=>............................] - ETA: 18s - loss: 0.0230
 21/226 [=>............................] - ETA: 19s - loss: 0.0235
 22/226 [=>............................] - ETA: 20s - loss: 0.0235
 23/226 [==>...................

[31mSaving the trained model ...[0m
===== Job Complete =====
Billable seconds: 738


## Training Job Description
>__Note:__ Make sure to remember the name of the training job above.

In [6]:
# Add name of the training job
#training_job_name = '<<TRIANING JOB NAME>>'
job_name = 'pystig-2018-07-26-16-56-52-848'
response = sagemaker_client.describe_training_job(
    TrainingJobName=job_name
)
response

{'TrainingJobName': 'pystig-2018-07-26-16-56-52-848',
 'TrainingJobArn': 'arn:aws:sagemaker:us-west-2:722812380636:training-job/pystig-2018-07-26-16-56-52-848',
 'ModelArtifacts': {'S3ModelArtifacts': 's3://sagemaker-us-west-2-722812380636/pystig-2018-07-26-16-56-52-848/output/model.tar.gz'},
 'TrainingJobStatus': 'Completed',
 'SecondaryStatus': 'Completed',
 'HyperParameters': {'batch_size': '32',
  'epochs': '12',
  'learning_rate': '0.0001'},
 'AlgorithmSpecification': {'TrainingImage': '500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:keras-gpu',
  'TrainingInputMode': 'File'},
 'RoleArn': 'arn:aws:iam::722812380636:role/SageMaker',
 'InputDataConfig': [{'ChannelName': 'train',
   'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',
     'S3Uri': 's3://sagemaker-workshop-test-us-west-2',
     'S3DataDistributionType': 'FullyReplicated'}},
   'CompressionType': 'None',
   'RecordWrapperType': 'None'}],
 'OutputDataConfig': {'KmsKeyId': '',
  'S3OutputPath': 's3://sagemaker-

---
## Deploy model - Standard
__Use `estimator.deploy()` based on GPU Container training__
>__Note:__ This is not cost effective.

```
predictor = BYOC_estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')
```

---
## Deploy Model - CPU Container
__Use separate CPU container and the `sagemaker.Session()` API to specify a different serving container__

### Step 1: Create a new model from the training job, specifying a different container for training, in this case a CPU-based container.

In [10]:
BYOC_model = sagemaker_session.create_model_from_job(
    name = job_name.split('-')[0]+'-model',
    training_job_name=job_name,
    role=role,
    primary_container_image=hosting_image,
    model_data_url='s3://{}/{}/output/model.tar.gz'.format(bucket, job_name)
)

INFO:sagemaker:Creating model with name: pystig-model


### Step 2: Create a SageMaker Endpoint Configuration

In [11]:
BYOC_endpoint_config_name = sagemaker_session.create_endpoint_config(
    name=job_name.split('-')[0]+'-endpoint-config',
    model_name=BYOC_model,
    initial_instance_count=1,
    instance_type='ml.c4.xlarge'
)

INFO:sagemaker:Creating endpoint-config with name pystig-endpoint-config


### Step 3: Deploy the SageMaker Endpoint

In [12]:
create_endpoint_response = sagemaker_session.create_endpoint(
    endpoint_name=job_name.split('-')[0]+'-endpoint',
    config_name=str(BYOC_endpoint_config_name)
)

INFO:sagemaker:Creating endpoint with name pystig-endpoint


---------------------------------------------------!

In [13]:
sagemaker_client.describe_endpoint(EndpointName=create_endpoint_response)

{'EndpointName': 'pystig-endpoint',
 'EndpointArn': 'arn:aws:sagemaker:us-west-2:722812380636:endpoint/pystig-endpoint',
 'EndpointConfigName': 'pystig-endpoint-config',
 'ProductionVariants': [{'VariantName': 'AllTraffic',
   'DeployedImages': [{'SpecifiedImage': '500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:keras-cpu',
     'ResolvedImage': '500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig@sha256:bbfa72c73e5831e0d4d4f52594b57242963c63a884678435cbbad0e2a3fc4d66',
     'ResolutionTime': datetime.datetime(2018, 7, 26, 17, 13, 45, 989000, tzinfo=tzlocal())}],
   'CurrentWeight': 1.0,
   'DesiredWeight': 1.0,
   'CurrentInstanceCount': 1,
   'DesiredInstanceCount': 1}],
 'EndpointStatus': 'InService',
 'CreationTime': datetime.datetime(2018, 7, 26, 17, 13, 43, 344000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2018, 7, 26, 17, 17, 55, 914000, tzinfo=tzlocal()),
 'ResponseMetadata': {'RequestId': '8c570ee1-14fd-4480-b2e8-91651998c64f',
  'HTTPStatusCode': 200,
  'H

---
## Test Endpoint (Simuilate pyStig driver)
### Get Sample Data for predictions


<details><summary><strong>Note to self</strong></summary><p>
    DO NOT DOWNLOAD `data.zip`. Use the `numpy` arrays in `/tmp/`
    </p>
</details>

In [17]:
# Helper functions
def download(url):
    """
    Helper function to download individual file from given url.
    
    Arguments:
    url -- full URL of the file to download
    
    Returns:
    filename -- downloaded file name
    """
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)
    return filename

# To download and extract Sample Data
URL = 'https://s3.us-west-2.amazonaws.com/'+data_bucket.split('//')[1]+'/data/data.zip'
file = download(URL)

# Extract the file
with zipfile.ZipFile(file) as zf:
    zf.extractall()
    
# Image Transofmrations
def crop(image):
    """
    Crop the image (removing the sky at the top and the car front at the bottom).
    
    Returns:
    Cropped image.
    """
    return image[60:-25, :, :]

def resize(image):
    """
    Resize the image to the input shape used by the network model.
    
    Returns:
    Resized image.
    """
    return cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT), cv2.INTER_AREA)

def rgb2yuv(image):
    """
    Convert the image from RGB to YUV.
    
    Returns:
    YUV image.
    """
    return cv2.cvtColor(image, cv2.COLOR_RGB2YUV)

def load(data_dir, image_file):
    """
    Load RGB images from a file
    """
    return mpimg.imread(os.path.join(data_dir, image_file.strip()))

def transform(image):
    """
    Combine all preprocess functions into one
    """
    image = crop(image)
    image = resize(image)
    image = rgb2yuv(image)
    return image

### Random Sample Image

In [45]:
# Origional 'left' image
IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS = 66, 200, 3
INPUT_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)
data_df = pd.read_csv('./data/driving_log.csv')
X = data_df[['center', 'left', 'right']].values
y = data_df['steering'].values
# Use image 900
random_image = X[900][0]
img = load('data', random_image)

In [46]:
# Simulate pyStig call by first pre-preocessing image and converting to 4D array
endpoint_name = sagemaker_client.describe_endpoint(EndpointName=create_endpoint_response)['EndpointName']
payload = np.array([transform(img)])

In [47]:
# Invoke SageMaker endpoint with image data
runtime_client = boto3.client('sagemaker-runtime')
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=json.dumps(payload.tolist())
)
prediction = float(json.loads(response['Body'].read().decode('utf-8'))[0])

In [48]:
# Steering angle prediction on image 900
prediction

0.14810311794281006

In [49]:
# Origional image 900 steering angle
y[900]

0.1670138