# Training BYOC on SageMaker
>__Note:__ This assumes a container has already been built and deployed to ECR.

## Prepare the Data

In [None]:
# Load the required libraries
import warnings
import zipfile
import boto3
import os
import json
import urllib.request
import sagemaker
import tempfile
import cv2
import numpy as np
import pandas as pd
import matplotlib.image as mpimg
from sagemaker.estimator import Estimator
from sklearn.model_selection import train_test_split

In [None]:
# Configure SageMaker parameters
region = boto3.Session().region_name
sagemaker_session = sagemaker.Session()
sagemaker_client = boto3.client('sagemaker')
bucket = sagemaker.Session().default_bucket()
role = sagemaker.get_execution_role()

## Set Hyperparameters
>__Note:__ Hyperparameters to be supplied by instructor are:
1. `data_bucket`: s3://sagemaker-workshop-500842391574-us-west-2/data/
2. `training_image`: 500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:keras-gpu
3. `hosting_image`: 500842391574.dkr.ecr.us-west-2.amazonaws.com/pystig:keras-cpu

In [None]:
# Configure the hyperparameters from the instructor
data_bucket = '<<PROVIDED BY INSTRUCTOR>>'
training_image = '<<PROVIDED BY INSTRUCTOR>>'
hosting_image = '<<PROVIDED BY INSTRUCTOR>>'

# Training data channel
channels = {'train': data_bucket}

# Optmized training parameters
hyperparameters = dict(batch_size=32, learning_rate=.0001, epochs=12)

# Output of trained model
output_location = "s3://{}".format(bucket)

# SageMaker estimator
BYOC_estimator = Estimator(
    training_image,
    role=role,
    output_path=output_location,
    train_instance_count=1,
    train_instance_type='ml.p3.2xlarge',
    hyperparameters=hyperparameters,
    sagemaker_session=sagemaker_session
)

# Start training
BYOC_estimator.fit(channels)

## Training Job Description
>__Note:__ Make sure to remember the name of the training job above.

In [None]:
# Add name of the training job
job_name = '<<TRAINING JOB NAME>>'
response = sagemaker_client.describe_training_job(
    TrainingJobName=job_name
)
response

---
## Deploy model - Standard
__Use `estimator.deploy()` based on GPU Container training__
>__Note:__ This is not cost effective.

```
predictor = BYOC_estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')
```

---
## Deploy Model - CPU Container
__Use separate CPU container and the `sagemaker.Session()` API to specify a different serving container__

### Step 1: Create a new model from the training job, specifying a different container for training, in this case a CPU-based container.

In [None]:
BYOC_model = sagemaker_session.create_model_from_job(
    name = job_name.split('-')[0]+'-model',
    training_job_name=job_name,
    role=role,
    primary_container_image=hosting_image,
    model_data_url='s3://{}/{}/output/model.tar.gz'.format(bucket, job_name)
)

### Step 2: Create a SageMaker Endpoint Configuration

In [None]:
BYOC_endpoint_config_name = sagemaker_session.create_endpoint_config(
    name=job_name.split('-')[0]+'-endpoint-config',
    model_name=BYOC_model,
    initial_instance_count=1,
    instance_type='ml.c4.xlarge'
)

### Step 3: Deploy the SageMaker Endpoint

In [None]:
create_endpoint_response = sagemaker_session.create_endpoint(
    endpoint_name=job_name.split('-')[0]+'-endpoint',
    config_name=str(BYOC_endpoint_config_name)
)

In [None]:
sagemaker_client.describe_endpoint(EndpointName=create_endpoint_response)

---

---
## Manually Test Endpoint (Simuilate pyStig driver)
### Get Sample Data for predictions

In [None]:
# Helper functions
def download(url):
    """
    Helper function to download individual file from given url.
    
    Arguments:
    url -- full URL of the file to download
    
    Returns:
    filename -- downloaded file name
    """
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)
    return filename

# To download and extract Sample Data
URL = 'https://s3.us-west-2.amazonaws.com/'+data_bucket.split('//')[1]+'/data/data.zip'
file = download(URL)

# Extract the file
with zipfile.ZipFile(file) as zf:
    zf.extractall()
    
# Image Transofmrations
def crop(image):
    """
    Crop the image (removing the sky at the top and the car front at the bottom).
    
    Returns:
    Cropped image.
    """
    return image[60:-25, :, :]

def resize(image):
    """
    Resize the image to the input shape used by the network model.
    
    Returns:
    Resized image.
    """
    return cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT), cv2.INTER_AREA)

def rgb2yuv(image):
    """
    Convert the image from RGB to YUV.
    
    Returns:
    YUV image.
    """
    return cv2.cvtColor(image, cv2.COLOR_RGB2YUV)

def load(data_dir, image_file):
    """
    Load RGB images from a file
    """
    return mpimg.imread(os.path.join(data_dir, image_file.strip()))

def transform(image):
    """
    Combine all preprocess functions into one
    """
    image = crop(image)
    image = resize(image)
    image = rgb2yuv(image)
    return image

### Random Sample Image

In [None]:
# Origional 'left' image
IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS = 66, 200, 3
INPUT_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)
data_df = pd.read_csv('./data/driving_log.csv')
X = data_df[['center', 'left', 'right']].values
y = data_df['steering'].values
# Use image 900
random_image = X[900][0]
img = load('data', random_image)

In [None]:
# Simulate pyStig call by first pre-preocessing image and converting to 4D array
endpoint_name = sagemaker_client.describe_endpoint(EndpointName=create_endpoint_response)['EndpointName']
payload = np.array([transform(img)])

In [None]:
# Invoke SageMaker endpoint with image data
runtime_client = boto3.client('sagemaker-runtime')
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=json.dumps(payload.tolist())
)
prediction = float(json.loads(response['Body'].read().decode('utf-8'))[0])

In [None]:
# Steering angle prediction on image 900
prediction

In [None]:
# Origional image 900 steering angle
y[900]

---

---
## Test Endpoint using `numpy` Arrays (Simuilate pyStig driver)

<details><summary><strong>Note to self</strong></summary><p>
    DO NOT DOWNLOAD `data.zip`. Use the `numpy` arrays in `/tmp/`
    </p>
</details>