## Sagemaker Mxnet/gluon Example
Let's use the final project dataset

In [1]:
import os
import boto3
import sagemaker
from sagemaker.mxnet import MXNet
from mxnet import gluon
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

role = get_execution_role()

#### Reading the dataset from our s3 bucket

In [2]:
import pandas as pd
import pickle

sm = boto3.client('s3')
response = sm.get_object(Bucket='amazon-ml-accelerator', Key='mla-cv-sagemaker-demo/training_data.pkl')
model_str = response['Body'].read()
df = pickle.loads(model_str)

print(df.head())

                                                data  label
0  [[[255, 255, 255], [255, 255, 255], [255, 255,...      4
1  [[[255, 255, 255], [255, 255, 255], [255, 255,...      1
2  [[[255, 255, 255], [255, 255, 255], [255, 255,...      4
3  [[[255, 255, 255], [255, 255, 255], [255, 255,...      3
4  [[[255, 255, 255], [255, 255, 255], [255, 255,...      1


#### Preparing the data for xnet/gluon format

In [3]:
import numpy as np
from skimage.transform import resize

image_vals = df["data"].values
label_vals = df["label"].values

del df

# Create image and label arrays
images = np.zeros((image_vals.shape[0], 3, 224, 224))
labels = np.zeros((label_vals.shape[0]))

# Iterate through the dataframe row by row
for i, (im, label) in enumerate(zip(image_vals, label_vals)):
    # Get image from the data column of the current row
    
    # We need a fixed size input, our images have different sizes, let's pick 224x224.
    # Resize image below
    im = resize(im, output_shape=(224, 224))
    
    # Gluon/mxnet expects images in this format (channel, row, column)
    # This is the opposite of (row, column, channel), let's fix it
    im = np.moveaxis(im, -1, 0)
    
    # Assign the value in the image array
    images[i] = im
    
    # Assign the label in the label array
    labels[i] = label

  warn("The default mode, 'constant', will be changed to 'reflect' in "


#### Splitting the data into Training, Validation, Test and saving into s3 bucket

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(images, labels, stratify=labels, test_size=0.20)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, stratify=y_test, test_size=0.5)

# Save training and validation data locally
np.save('train_data', X_train)
np.save('train_label', y_train)
np.save('validation_data', X_val)
np.save('validation_label', y_val)
np.save('test_data', X_test)
np.save('test_label', y_test)

prefix = 'mla-cv-sagemaker-demo'
bucket = 'amazon-ml-accelerator'

boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'training/train_data.npy')).upload_file('train_data.npy')
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'training/train_label.npy')).upload_file('train_label.npy')
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'validation/validation_data.npy')).upload_file('validation_data.npy')
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'validation/validation_label.npy')).upload_file('validation_label.npy')
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'test/test_data.npy')).upload_file('test_data.npy')
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'test/test_label.npy')).upload_file('test_label.npy')

In [5]:
# Let's clear the files we saved locally (they are already uploaded to s3 above)
! rm train_data.npy train_label.npy validation_data.npy validation_label.npy test_data.npy test_label.npy

In [6]:
data_location = 's3://{}/{}'.format(bucket, prefix)

In [7]:
m = MXNet("final_project.py",
          role=role,
          train_instance_count=1,
          train_instance_type="ml.p2.xlarge",
          framework_version="1.2.1",
          py_version = 'py3',
          hyperparameters={'batch_size': 32,
                           'epochs': 20,
                           'learning_rate': 0.01,
                           'patience': 5
                          })

In [8]:
m.fit(data_location)

2019-10-18 20:25:04 Starting - Starting the training job...
2019-10-18 20:25:05 Starting - Launching requested ML instances......
2019-10-18 20:26:32 Starting - Preparing the instances for training.........
2019-10-18 20:27:41 Downloading - Downloading input data.........
2019-10-18 20:29:29 Training - Downloading the training image...
2019-10-18 20:29:49 Training - Training image download completed. Training in progress.[31m2019-10-18 20:29:49,337 INFO - root - running container entrypoint[0m
[31m2019-10-18 20:29:49,338 INFO - root - starting train task[0m
[31m2019-10-18 20:29:49,360 INFO - container_support.training - Training starting[0m
[31m2019-10-18 20:29:50,752 INFO - mxnet_container.train - MXNetTrainingEnvironment: {'model_dir': '/opt/ml/model', 'resource_config': {'network_interface_name': 'eth0', 'current_host': 'algo-1', 'hosts': ['algo-1']}, '_ps_verbose': 0, '_scheduler_host': 'algo-1', 'code_dir': '/opt/ml/code', 'user_script_name': 'final_project.py', 'hyperparam

In [10]:
predictor = m.deploy(instance_type='ml.m4.xlarge', initial_instance_count=1, endpoint_name='mla-cv-endpoint')

Using already existing model: sagemaker-mxnet-2019-10-18-20-25-03-160


--------------------------------------------------------------------------------------------------!

## Let's test this endpoint
We will read the test data from our S3 buckets and call the endpoint we just created.

In [11]:
import numpy as np
from io import BytesIO
import mxnet.ndarray as nd
import boto3

sm = boto3.client('s3')
obj = sm.get_object(Bucket='amazon-ml-accelerator', Key='mla-cv-sagemaker-demo/test/test_data.npy')
X_test = np.load(BytesIO(obj['Body'].read()))

obj = sm.get_object(Bucket='amazon-ml-accelerator', Key='mla-cv-sagemaker-demo/test/test_label.npy')
y_test = np.load(BytesIO(obj['Body'].read()))

payload = X_test[:4]
payload = bytearray(payload)

runtime = boto3.Session().client(service_name='runtime.sagemaker')
response = runtime.invoke_endpoint(EndpointName="mla-cv-endpoint", 
                                   ContentType='application/x-image', 
                                   Body=payload)

In [12]:
print(response['Body'].read())

b'[2.0, 1.0, 1.0, 4.0]'
