In [86]:
# Basic set up to define IAM Role
import boto3
import re

import os
import numpy as np
import pandas as pd

from sagemaker import get_execution_role

role = get_execution_role()

# Create a SageMaker Session that will be used
# to perform all SageMaker operations
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

In [87]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name

#Note - make sure to include the Docker image tag (eg, :latest), since there seem to 
# be some issues with deploying a model if you don't include the tag. 
image = '{}.dkr.ecr.{}.amazonaws.com/npng-sagemaker-repo:latest'.format(account, region)

sagemaker_ml = sage.estimator.Estimator(image, 
                                 role, 1, 
                                 'ml.c4.2xlarge',
                                 output_path='s3://{}/output'.format(sess.default_bucket()),
                                 sagemaker_session=sess)

INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-010467601374


In [88]:
# The current Docker image needs both training and testing data, 
# so they're specified in two different channels. It's assumed 
# that the user has already uploaded the required data into a couple of
# different directories, and this dictionary just specifies where the 
# training and testing data are, respectively. 
data_location = {'training': 's3://h2o-sagemaker-npng/s3titanic_train.csv',
                 'testing': 's3://h2o-sagemaker-npng/s3titanictest.csv'}
sagemaker_ml.fit(data_location)

INFO:sagemaker:Creating training-job with name: npng-sagemaker-repo-2018-03-13-20-48-16-982


.........................................................
[31m/opt/ml/config/h2o_params.json[0m
[31mH2O Parameters:[0m
[31m{}[0m
[31mChecking whether there is an H2O instance running at http://localhost:54321..... not found.[0m
[31mAttempting to start a local H2O server...
  Java Version: openjdk version "1.8.0_151"; OpenJDK Runtime Environment (build 1.8.0_151-8u151-b12-0ubuntu0.16.04.2-b12); OpenJDK 64-Bit Server VM (build 25.151-b12, mixed mode)
  Starting server from /root/.local/lib/python2.7/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpUc16oA
  JVM stdout: /tmp/tmpUc16oA/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmpUc16oA/h2o_unknownUser_started_from_python.err[0m
[31m  Server is running at http://127.0.0.1:54321[0m
[31mConnecting to H2O server at http://127.0.0.1:54321... successful.[0m
[31m--------------------------  ----------------------------------------[0m
[31mH2O cluster uptime:         02 secs[0m
[31mH2O cluster timezone:  

In [89]:
# Deploying an actual predictor, so that we can make predictions on test data here
from sagemaker.predictor import csv_serializer
predictor = sagemaker_ml.deploy(1, 'ml.m4.xlarge', serializer=csv_serializer)

INFO:sagemaker:Creating model with name: npng-sagemaker-repo-2018-03-13-21-02-53-107
INFO:sagemaker:Creating endpoint with name npng-sagemaker-repo-2018-03-13-20-48-16-982


----------------------------------------------------------------------------------------------------------------!

In [105]:
import io
s3 = boto3.client('s3')
obj = s3.get_object(Bucket='h2o-sagemaker-npng', Key='titanic_test.csv')
df = pd.read_csv(io.BytesIO(obj['Body'].read()))
df = df.drop('PassengerId', axis=1)
x = np.array(df.columns).reshape((1,10))
test_vals = df.values
valid = np.append(x, test_vals, axis=0)
preds = predictor.predict(valid).decode('utf-8')

In [135]:
preds_list = preds.split(',')
full_preds = []
one_row = []
for item in preds_list:
    if '\n' in item:
        rmloc = item.find('\n')
        item = item[:rmloc]
        one_row.append(item)
        full_preds.append(one_row)
        one_row = []
    else:
        one_row.append(item)

In [136]:
full_preds

[['', 'predict', 'p0', 'p1'],
 ['0', '0.979892214511368', '0.020107785488632096'],
 ['0', '0.9322915744178676', '0.06770842558213233'],
 ['0', '0.9987185937599596', '0.0012814062400405134'],
 ['0', '0.9712614849057376', '0.02873851509426242'],
 ['1', '0.1020739455378058', '0.8979260544621942'],
 ['0', '0.9222775133514172', '0.07772248664858288'],
 ['1', '0.5029539960977737', '0.4970460039022263'],
 ['0', '0.9977786429374036', '0.0022213570625963567'],
 ['1', '0.059749551430131276', '0.9402504485698688'],
 ['0', '0.9962351874955512', '0.0037648125044487963'],
 ['0', '0.9914924269910028', '0.008507573008997316'],
 ['1', '0.12916968794170613', '0.8708303120582939'],
 ['1', '0.013753002470466293', '0.9862469975295336'],
 ['1', '0.7213429131943063', '0.2786570868056937'],
 ['1', '0.4084783267255101', '0.5915216732744899'],
 ['1', '0.00952648101657505', '0.9904735189834251'],
 ['0', '0.9987718388007584', '0.001228161199241625'],
 ['1', '0.7090959243622414', '0.2909040756377585'],
 ['1', '0.1

In [137]:
#only run for cleanup, deletes the endpoint for the predictor
sess.delete_endpoint(predictor.endpoint)

INFO:sagemaker:Deleting endpoint with name: npng-sagemaker-repo-2018-03-13-20-48-16-982
