In [4]:
# Basic set up to define IAM Role
import boto3
import re

import os
import numpy as np
import pandas as pd

from sagemaker import get_execution_role

role = get_execution_role()

# Create a SageMaker Session that will be used
# to perform all SageMaker operations
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

In [5]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name

#Note - make sure to include the Docker image tag (eg, :latest), since there seem to 
# be some issues with deploying a model if you don't include the tag. 
image = '{}.dkr.ecr.{}.amazonaws.com/npng-sagemaker-repo:latest'.format(account, region)

sagemaker_ml = sage.estimator.Estimator(image, 
                                 role, 1, 
                                 'ml.c4.2xlarge',
                                 output_path='s3://{}/output'.format(sess.default_bucket()),
                                 sagemaker_session=sess)

INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-010467601374


In [6]:
# AutoML only requires training data during the training process
# Additional data can be ingested later for prediction
# Specify location of the data with a dictionary where the value
# is the path to the s3 bucket containing the training data
data_location = {'training': 's3://h2o-sagemaker-npng/higgs_train_10k.csv'}
sagemaker_ml.fit(data_location)

INFO:sagemaker:Creating training-job with name: npng-sagemaker-repo-2018-03-16-00-42-26-556


..............................................................................
[31m/opt/ml/config/hyperparameters.json[0m
[31mAll Parameters:[0m
[31m{u'training': {u'target': u'response', u'classification': u'true'}, u'h2o': {}, u'aml': {u'max_models': 10}}[0m
[31mChecking whether there is an H2O instance running at http://localhost:54321..... not found.[0m
[31mAttempting to start a local H2O server...
  Java Version: openjdk version "1.8.0_151"; OpenJDK Runtime Environment (build 1.8.0_151-8u151-b12-0ubuntu0.16.04.2-b12); OpenJDK 64-Bit Server VM (build 25.151-b12, mixed mode)
  Starting server from /root/.local/lib/python2.7/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmppOcoBF
  JVM stdout: /tmp/tmppOcoBF/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmppOcoBF/h2o_unknownUser_started_from_python.err[0m
[31m  Server is running at http://127.0.0.1:54321[0m
[31mConnecting to H2O server at http://127.0.0.1:54321... successful.[0m
[31m--------------

In [7]:
# Deploying an actual predictor, so that we can make predictions on test data here
from sagemaker.predictor import csv_serializer
predictor = sagemaker_ml.deploy(1, 'ml.m4.xlarge', serializer=csv_serializer)

INFO:sagemaker:Creating model with name: npng-sagemaker-repo-2018-03-16-00-52-10-660
INFO:sagemaker:Creating endpoint with name npng-sagemaker-repo-2018-03-16-00-42-26-556


--------------------------------------------------------------------------------------------------!

In [7]:
import io
s3 = boto3.client('s3')
obj = s3.get_object(Bucket='h2o-sagemaker-npng', Key='higgs_test_5k.csv')
df = pd.read_csv(io.BytesIO(obj['Body'].read()))
x = np.array(df.columns).reshape((1,29))
test_vals = df.values
valid = np.append(x, test_vals, axis=0)
preds = predictor.predict(valid).decode('utf-8')

In [8]:
preds_list = preds.split(',')
full_preds = []
one_row = []
for item in preds_list:
    if '\n' in item:
        rmloc = item.find('\n')
        item = item[:rmloc]
        one_row.append(item)
        full_preds.append(one_row)
        one_row = []
    else:
        one_row.append(item)

In [9]:
full_preds

[['', 'predict', 'p0', 'p1'],
 ['0', '0.7169843180776059', '0.2830156819223941'],
 ['0', '0.6906529103093888', '0.30934708969061114'],
 ['1', '0.6077459796067575', '0.3922540203932425'],
 ['1', '0.3066629622627297', '0.6933370377372703'],
 ['1', '0.5347121308197321', '0.4652878691802679'],
 ['1', '0.2672755608707995', '0.7327244391292005'],
 ['1', '0.26313638220945623', '0.7368636177905438'],
 ['1', '0.6035179987388393', '0.3964820012611608'],
 ['1', '0.5363514627357449', '0.4636485372642552'],
 ['0', '0.8207930793457859', '0.17920692065421406'],
 ['1', '0.3976772803979276', '0.6023227196020724'],
 ['1', '0.4227416362101037', '0.5772583637898963'],
 ['1', '0.4812742610157208', '0.5187257389842792'],
 ['0', '0.7007538885440376', '0.2992461114559624'],
 ['1', '0.25171978955679863', '0.7482802104432014'],
 ['0', '0.6858086393937961', '0.3141913606062039'],
 ['1', '0.6039269942729166', '0.3960730057270833'],
 ['1', '0.4655046346118985', '0.5344953653881015'],
 ['1', '0.14812272417898042', 

In [137]:
#only run for cleanup, deletes the endpoint for the predictor
sess.delete_endpoint(predictor.endpoint)

INFO:sagemaker:Deleting endpoint with name: npng-sagemaker-repo-2018-03-13-20-48-16-982
