In [None]:
%%sh

# The name of our algorithm
algorithm_name=lstm_gender_classifier

cd container

chmod +x decision_trees/train
chmod +x decision_trees/serve

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

In [1]:
# S3 prefix
prefix = 'lstm-gender-classifier'

# Define IAM role
import boto3
import re

import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

role = get_execution_role()

## Create the session

The session remembers our connection parameters to SageMaker. We'll use it to perform all of our SageMaker operations.

In [2]:
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

## Upload the data for training

In [3]:
WORK_DIRECTORY = 'data'

data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-741855114961


## Create an estimator and fit the model

In [4]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = '{}.dkr.ecr.{}.amazonaws.com/lstm-gender-classifier:latest'.format(account, region)

tree = sage.estimator.Estimator(image,
                       role, 1, 'ml.c4.8xlarge',
                       output_path="s3://{}/output".format(sess.default_bucket()),
                       sagemaker_session=sess)

tree.fit(data_location)

INFO:sagemaker:Creating training-job with name: lstm-gender-classifier-2018-03-12-04-16-47-095


.............................................................
  from ._conv import register_converters as _register_converters[0m
[31mUsing TensorFlow backend.[0m
[31m+-ml/
  |
  +-input/
  | |
  | +-config/
  | | |
  | | +-metric-definition-regex.json
  | | |
  | | +-hyperparameters.json
  | | |
  | | +-resourceconfig.json
  | | |
  | | +-init-config.json
  | | |
  | | +-inputdataconfig.json
  | |
  | +-data/
  |   |
  |   +-training-manifest
  |   |
  |   +-training-manifest-1
  |   |
  |   +-training/
  |     |
  |     +-allnames.txt
  |     |
  |     +-names.txt
  |
  +-output/
  | |
  | +-data/
  |
  +-model/[0m
[31mStarting the training.[0m
[31mHeperparameters file : {}[0m
[31mHyperparameters initialized[0m
[31mLoading data from : /opt/ml/input/data/training/allnames.txt[0m
[31mTraining data loaded[0m
[31mTraining data prepared[0m
[31mModel compiled[0m
[31mTrain on 85356 samples, validate on 21339 samples[0m
[31mEpoch 1/5[0m
[31m2018-03-12 04:21:50.521817:

[31mEpoch 2/5[0m
[31m 1000/85356 [..............................] - ETA: 1:02 - loss: 0.5283 - acc: 0.7400[0m
[31m 2000/85356 [..............................] - ETA: 1:01 - loss: 0.5357 - acc: 0.7270
 3000/85356 [>.............................] - ETA: 1:00 - loss: 0.5431 - acc: 0.7287[0m
[31m 4000/85356 [>.............................] - ETA: 1:00 - loss: 0.5426 - acc: 0.7252[0m
[31m 5000/85356 [>.............................] - ETA: 59s - loss: 0.5562 - acc: 0.7198 
 6000/85356 [=>............................] - ETA: 58s - loss: 0.5634 - acc: 0.7173[0m
[31m 7000/85356 [=>............................] - ETA: 58s - loss: 0.5632 - acc: 0.7170[0m
[31m 8000/85356 [=>............................] - ETA: 57s - loss: 0.5595 - acc: 0.7152[0m
[31m 9000/85356 [==>...........................] - ETA: 56s - loss: 0.5564 - acc: 0.7171[0m
[31m10000/85356 [==>...........................] - ETA: 55s - loss: 0.5581 - acc: 0.7164[0m
[31m11000/85356 [==>...........................] - ET

[31mEpoch 3/5[0m
[31m 1000/85356 [..............................] - ETA: 1:03 - loss: 0.5417 - acc: 0.7180
 2000/85356 [..............................] - ETA: 1:01 - loss: 0.5390 - acc: 0.7175[0m
[31m 3000/85356 [>.............................] - ETA: 1:00 - loss: 0.5349 - acc: 0.7213[0m
[31m 4000/85356 [>.............................] - ETA: 1:00 - loss: 0.5321 - acc: 0.7310[0m
[31m 5000/85356 [>.............................] - ETA: 59s - loss: 0.5233 - acc: 0.7370 
 6000/85356 [=>............................] - ETA: 58s - loss: 0.5247 - acc: 0.7365[0m
[31m 7000/85356 [=>............................] - ETA: 57s - loss: 0.5287 - acc: 0.7314[0m
[31m 8000/85356 [=>............................] - ETA: 57s - loss: 0.5278 - acc: 0.7322[0m
[31m 9000/85356 [==>...........................] - ETA: 56s - loss: 0.5287 - acc: 0.7326[0m
[31m10000/85356 [==>...........................] - ETA: 55s - loss: 0.5279 - acc: 0.7346[0m
[31m11000/85356 [==>...........................] - ET

[31m 5000/85356 [>.............................] - ETA: 59s - loss: 0.4986 - acc: 0.7522 
 6000/85356 [=>............................] - ETA: 58s - loss: 0.5055 - acc: 0.7483[0m
[31m 7000/85356 [=>............................] - ETA: 57s - loss: 0.5010 - acc: 0.7511[0m
[31m 8000/85356 [=>............................] - ETA: 57s - loss: 0.4996 - acc: 0.7534
 9000/85356 [==>...........................] - ETA: 56s - loss: 0.4983 - acc: 0.7532[0m
[31m10000/85356 [==>...........................] - ETA: 55s - loss: 0.4998 - acc: 0.7529[0m
[31m11000/85356 [==>...........................] - ETA: 54s - loss: 0.4976 - acc: 0.7548[0m
[31m12000/85356 [===>..........................] - ETA: 54s - loss: 0.4965 - acc: 0.7557[0m
[31m13000/85356 [===>..........................] - ETA: 53s - loss: 0.4977 - acc: 0.7556[0m
[31m14000/85356 [===>..........................] - ETA: 52s - loss: 0.4973 - acc: 0.7564[0m
[31m15000/85356 [====>.........................] - ETA: 51s - loss: 0.4958 -

[31m 6000/85356 [=>............................] - ETA: 58s - loss: 0.4870 - acc: 0.7635[0m
[31m 7000/85356 [=>............................] - ETA: 58s - loss: 0.4859 - acc: 0.7647[0m
[31m 8000/85356 [=>............................] - ETA: 57s - loss: 0.4851 - acc: 0.7670
 9000/85356 [==>...........................] - ETA: 56s - loss: 0.4811 - acc: 0.7682[0m
[31m10000/85356 [==>...........................] - ETA: 55s - loss: 0.4789 - acc: 0.7695[0m
[31m11000/85356 [==>...........................] - ETA: 54s - loss: 0.4787 - acc: 0.7702[0m
[31m12000/85356 [===>..........................] - ETA: 54s - loss: 0.4767 - acc: 0.7723[0m
[31m13000/85356 [===>..........................] - ETA: 53s - loss: 0.4771 - acc: 0.7731[0m
[31m14000/85356 [===>..........................] - ETA: 52s - loss: 0.4781 - acc: 0.7721[0m
[31m15000/85356 [====>.........................] - ETA: 51s - loss: 0.4768 - acc: 0.7732[0m
[31m16000/85356 [====>.........................] - ETA: 51s - loss: 

## Deploy the model

Deploying the model to SageMaker hosting just requires a `deploy` call on the fitted model. This call takes an instance count, instance type, and optionally serializer and deserializer functions. These are used when the resulting predictor is created on the endpoint.

In [5]:
from sagemaker.predictor import csv_serializer
predictor = tree.deploy(1, 'ml.m4.xlarge', serializer=csv_serializer)

INFO:sagemaker:Creating model with name: lstm-gender-classifier-2018-03-12-04-29-58-528
INFO:sagemaker:Creating endpoint with name lstm-gender-classifier-2018-03-12-04-16-47-095


----------------------------------------------------------------------------------------!

## Choose some data and use it for a prediction

Prediction is as easy as calling predict with the predictor we got back from deploy and the data we want to do predictions with. The serializers take care of doing the data conversions for us.

In [7]:
lines = [line.rstrip('\n') for line in open('container/local_test/names.txt', 'r')]
for line in lines:
    print(predictor.predict(line))

b'M,M,F,F,'


## Optional cleanup

When you're done with the endpoint, you'll want to clean it up.

In [8]:
sess.delete_endpoint(predictor.endpoint)

INFO:sagemaker:Deleting endpoint with name: lstm-gender-classifier-2018-03-12-04-16-47-095
