In [59]:
####Import statements
import sagemaker
from sagemaker import get_execution_role
import boto3
import multiprocessing
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/ec2-user/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [60]:
#####Start Session, connect S3 bucket, define file path
sess = sagemaker.Session()

role = get_execution_role()
print(role) # This is the role that SageMaker would use to leverage AWS resources (S3, CloudWatch) on your behalf

bucket = "crazycurlygirlbucket311" # Replace with your own bucket name if needed
print(bucket)
prefix = 'BookProphet/blazingtext/supervised' #Replace with the prefix under which you want to store the data if needed



arn:aws:iam::023375022819:role/service-role/AmazonSageMaker-ExecutionRole-20181029T121824
crazycurlygirlbucket311


In [61]:
####Upload train and validation data into s3 bucket in defined file path
%time

train_channel = prefix + '/train'
validation_channel = prefix + '/validation'

sess.upload_data(path='bookprophet.train', bucket=bucket, key_prefix=train_channel)
sess.upload_data(path='bookprophet.validation', bucket=bucket, key_prefix=validation_channel)

s3_train_data = 's3://{}/{}'.format(bucket, train_channel)
s3_validation_data = 's3://{}/{}'.format(bucket, validation_channel)

####Output location defined
s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.48 µs


In [62]:
####Defines Container
region_name = boto3.Session().region_name
container = sagemaker.amazon.amazon_estimator.get_image_uri(region_name, "blazingtext", "latest")
print('Using SageMaker BlazingText container: {} ({})'.format(container, region_name))

Using SageMaker BlazingText container: 811284229777.dkr.ecr.us-east-1.amazonaws.com/blazingtext:latest (us-east-1)


In [63]:
####Defines Estimator Creation Job
bt_model = sagemaker.estimator.Estimator(container,
                                         role, 
                                         base_job_name = "BookProphetjob",
                                         train_instance_count=1, 
                                         train_instance_type='ml.m5.xlarge',
                                         train_volume_size = 5,
                                         train_max_run = 360000,
                                         input_mode= 'File',
                                         output_path=s3_output_location,
                                         sagemaker_session=sess)

In [67]:
####Define Hyperparameters
bt_model.set_hyperparameters(mode="supervised",
                            epochs=50,
                            min_count=2,#Change in case of poor performance
                            learning_rate=0.05,
                            vector_dim=10,
                            early_stopping=True,
                            patience=4,
                            min_epochs=5,
                            word_ngrams=2)

In [68]:
####Data Objects created
train_data = sagemaker.session.s3_input(s3_train_data, distribution='FullyReplicated', 
                        content_type='text/plain', s3_data_type='S3Prefix')
validation_data = sagemaker.session.s3_input(s3_validation_data, distribution='FullyReplicated', 
                             content_type='text/plain', s3_data_type='S3Prefix')
data_channels = {'train': train_data, 'validation': validation_data}

In [None]:
####Train the model! Yay!
bt_model.fit(inputs=data_channels, logs=True)

INFO:sagemaker:Creating training-job with name: BookProphetjob-2019-03-14-19-17-10-549


2019-03-14 19:17:10 Starting - Starting the training job...
2019-03-14 19:17:12 Starting - Launching requested ML instances.