# SageMaker Model Training and Predition
## Introduction
"Leverage SageMaker for manual data science process."

---
## 1 - Model Training
### Permissions and Environmental Variables

In [None]:
# Import libraries
from __future__ import print_function
import os
import io
import boto3
import sagemaker
import h5py
import json
import numpy as np
import mxnet as mx
from sagemaker.mxnet import MXNet
from mxnet import gluon
from sagemaker import get_execution_role

# Confifure SageMaker
sagemaker_session = sagemaker.Session()
role = get_execution_role()

### Input Data Preparation

In [None]:
# Create local repository for Numpy Arrays
if not os.path.exists('tmp'): os.mkdir('tmp')

# Load the Training and Testing dataset
dataset = h5py.File('datasets/datasets.h5', 'r')

# Save the Dataset as Numpy Arrays
np.save('tmp/train_X.npy', np.array(dataset['train_set_x'][:]))
np.save('tmp/train_Y.npy', np.array(dataset['train_set_y'][:]))

# Upload the Training and Testing Data to S3
inputs = sagemaker_session.upload_data(path='tmp', key_prefix='training_input')
bucket = inputs.split('/')[2]

### Create the Estimator using the SageMaker Python Library

In [None]:
mxnet_estimator = MXNet(
    'model.py',
    role=role,
    train_instance_count=1,
    train_instance_type='ml.c4.xlarge',
    output_path='s3://'+bucket,
    hyperparameters={
        'epochs': 2500,
        'optmizer': 'sgd',
        'learning_rate': 0.0075,
        'batch_size': 64
    }
)

### Train the Model

In [None]:
mxnet_estimator.fit(inputs)

---
## 2 - Model Analysis

`<<Bucket>>/<<Job Name>>/output/`
- Model
- Results

In [None]:
# Enter the S3 Bucket Name and Region used during training
#S3_bucket = <<Bucket Name>>
#rgn = <<AWS Region>>
#job_name = <<SageMaker Job Name>>
rgn = 'us-west-2'
job_name = 'sagemaker-mxnet-py2-cpu-2018-03-31-04-49-38-166'

# Download and uncompress output results from model training
import tarfile, datetime
import matplotlib.pyplot as plt
%matplotlib inline
s3 = boto3.resource('s3')
s3.Bucket(bucket).download_file(job_name+'/output/output.tar.gz', './output.tar.gz')
tarfile.open('./output.tar.gz').extractall()
with open('results.json') as j:
    data = json.load(j)#, object_pairs_hook=OrderedDict)

# Format data for plotting
costs = []
for key, value in sorted(data.iteritems(), key=lambda (k,v): (v, k)):
    if 'epoch' in key:
        costs.append(value)
    elif 'Start' in key:
        start = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S.%f")
    else:
        end = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S.%f")
delta = end - start
costs.reverse()
print("Total Processing time: {} Minute(s)".format(int(delta.total_seconds() / 60)))

# Plot the results
plt.rcParams['figure.figsize'] = (11.0, 10.0)
plt.plot(costs)
plt.ylabel('Cost')
plt.xlabel('Epochs')
plt.title("Learning Curve")
plt.show;

---
## Next: DevOps Process Integration