In [18]:
####Import statements
import sagemaker
from sagemaker import get_execution_role
import boto3
import multiprocessing

In [19]:
#####Start Session, connect S3 bucket, define file path
sess = sagemaker.Session()

role = get_execution_role()
print(role) # This is the role that SageMaker would use to leverage AWS resources (S3, CloudWatch) on your behalf

bucket = "crazycurlygirlbucket311" # Replace with your own bucket name if needed
print(bucket)
prefix = 'BookProphet/blazingtext/supervised' #Replace with the prefix under which you want to store the data if needed



arn:aws:iam::023375022819:role/service-role/AmazonSageMaker-ExecutionRole-20181029T121824
crazycurlygirlbucket311


In [20]:
####Upload train and validation data into s3 bucket in defined file path
%time

train_channel = prefix + '/train'
validation_channel = prefix + '/validation'

sess.upload_data(path='bookprophet.train', bucket=bucket, key_prefix=train_channel)
sess.upload_data(path='bookprophet.validation', bucket=bucket, key_prefix=validation_channel)

s3_train_data = 's3://{}/{}'.format(bucket, train_channel)
s3_validation_data = 's3://{}/{}'.format(bucket, validation_channel)

####Output location defined
s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs


In [22]:
####Defines Container
region_name = boto3.Session().region_name
container = sagemaker.amazon.amazon_estimator.get_image_uri(region_name, "blazingtext", "latest")
print('Using SageMaker BlazingText container: {} ({})'.format(container, region_name))

Using SageMaker BlazingText container: 811284229777.dkr.ecr.us-east-1.amazonaws.com/blazingtext:latest (us-east-1)


In [34]:
####Defines Estimator Creation Job
bt_model = sagemaker.estimator.Estimator(container,
                                         role, 
                                         base_job_name = "BookProphetjob",
                                         train_instance_count=1, 
                                         train_instance_type='ml.m5.xlarge',
                                         train_volume_size = 5,
                                         train_max_run = 360000,
                                         input_mode= 'File',
                                         output_path=s3_output_location,
                                         sagemaker_session=sess)

In [35]:
####Define Hyperparameters
bt_model.set_hyperparameters(mode="supervised",
                            epochs=10,
                            min_count=1,#Change in case of poor performance
                            learning_rate=0.05,
                            vector_dim=10,
                            early_stopping=True,
                            patience=4,
                            min_epochs=5,
                            word_ngrams=2)

In [36]:
####Data Objects created
train_data = sagemaker.session.s3_input(s3_train_data, distribution='FullyReplicated', 
                        content_type='text/plain', s3_data_type='S3Prefix')
validation_data = sagemaker.session.s3_input(s3_validation_data, distribution='FullyReplicated', 
                             content_type='text/plain', s3_data_type='S3Prefix')
data_channels = {'train': train_data, 'validation': validation_data}

In [37]:
####Train the model! Yay!
bt_model.fit(inputs=data_channels, logs=True)

INFO:sagemaker:Creating training-job with name: BookProphetjob-2019-03-14-16-12-57-895


2019-03-14 16:12:58 Starting - Starting the training job...
2019-03-14 16:12:59 Starting - Launching requested ML instances......
2019-03-14 16:14:11 Starting - Preparing the instances for training......
2019-03-14 16:15:18 Downloading - Downloading input data
2019-03-14 16:15:18 Training - Training image download completed. Training in progress.
[31mArguments: train[0m
[31m[03/14/2019 16:15:19 INFO 140305627490112] nvidia-smi took: 0.0250999927521 secs to identify 0 gpus[0m
[31m[03/14/2019 16:15:19 INFO 140305627490112] Running single machine CPU BlazingText training using supervised mode.[0m
[31m[03/14/2019 16:15:19 INFO 140305627490112] Processing /opt/ml/input/data/train/bookprophet.train . File size: 0 MB[0m
[31m[03/14/2019 16:15:19 INFO 140305627490112] Processing /opt/ml/input/data/validation/bookprophet.validation . File size: 0 MB[0m
[31mRead 0M words[0m
[31mNumber of words:  3708[0m
[31mLoading validation data from /opt/ml/input/data/validation/bookprophet.vali

ValueError: Error for Training job BookProphetjob-2019-03-14-16-12-57-895: Failed Reason: ClientError: Training did not complete successfully! Please check the logs for errors.