From ac7b85406b5fe92a5d94696c228a5920f18d44b4 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Tue, 16 Jan 2018 19:20:24 -0800 Subject: [PATCH 01/19] image classification algorithm api --- src/sagemaker/amazon/image_classification.py | 205 +++++++++++++++++++ tests/integ/test_image_classification.py | 72 +++++++ 2 files changed, 277 insertions(+) create mode 100644 src/sagemaker/amazon/image_classification.py create mode 100644 tests/integ/test_image_classification.py diff --git a/src/sagemaker/amazon/image_classification.py b/src/sagemaker/amazon/image_classification.py new file mode 100644 index 0000000000..70b9ed1fea --- /dev/null +++ b/src/sagemaker/amazon/image_classification.py @@ -0,0 +1,205 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from sagemaker.amazon.amazon_estimator import AmazonS3AlgorithmEstimatorBase, registry +from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa +from sagemaker.amazon.validation import gt, isin, isint, ge, isstr, lt, le +from sagemaker.predictor import RealTimePredictor +from sagemaker.model import Model +from sagemaker.session import Session + + +class ImageClassification(AmazonS3AlgorithmEstimatorBase): + + repo = 'image-classification:latest' + + num_classes = hp('num_classes', (gt(1), isint), 'num_classes should be an integer greater-than 1') + num_training_samples = hp('num_training_samples', (gt(1), isint), 'num_training_samples should be an integer greater-than 1') + use_pretrained_model = hp('use_pretrained_model', (isin(0, 1), isint), 'use_pretrained_model should be in the set, [0,1]') + checkpoint_frequency = hp('checkpoint_frequency', (ge(1), isint), 'checkpoint_frequency should be an integer greater-than 1') + num_layers = hp('num_layers', (isin(18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110), isint), \ + 'num_layers should be in the set [18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110]' ) + resize = hp('resize', (gt(1), isint), 'resize should be an integer greater-than 1') + epochs = hp('epochs', (ge(1), isint), 'epochs should be an integer greater-than 1') + learning_rate = hp('learning_rate', (gt(0)), 'learning_rate shoudl be a floating point greater than 0' ) + lr_schedule_factor = hp ('lr_schedule_factor', (gt(0)), 'lr_schedule_factor should be a floating point greater than 0') + lr_scheduler_step = hp ('lr_scheduler_step' ,(isstr), 'lr_scheduler_step should be a string input.') + optimizer = hp ('optimizer', (isin('sgd', 'adam', 'rmsprop', 'nag')), \ + 'Should be one optimizer among the list sgd, adam, rmsprop, or nag.') + momentum = hp ('momentum', (ge(0), le(1)), 'momentum is expected in the range 0, 1') + weight_decay = hp ('weight_decay', (ge(0), le(1)), 'weight_decay in range 0 , 1 ') + beta_1 = hp ('beta_1', (ge(0), le(1)), 'beta_1 shoud be in range 0, 1') + beta_2 = hp ('beta_2', (ge(0), le(1)), 'beta_2 should be in the range 0, 1') + eps = hp ('eps', (gt(0), le(1)), 'eps should be in the range 0, 1') + gamma = hp ('gamma', (ge(0), le(1)), 'gamma should be in the range 0, 1') + mini_batch_size = hp ('mini_batch_size', (gt(0)), 'mini_batch_size should be an integer greater than 0') + image_shape = hp ('image_shape', (isstr), 'image_shape is expected to be a string') + augmentation_type = hp ('beta_1', (isin ('crop', 'crop_color', 'crop_color_transform')), \ + 'beta_1 must be from one option offered') + top_k = hp ('top_k', (ge(1), isint), 'top_k should be greater than or equal to 1') + kv_store = hp ('kv_store', (isin ('dist_sync', 'dist_async' )), 'Can be dist_sync or dist_async') + + + + def __init__(self, role, train_instance_count, train_instance_type, num_classes, num_training_samples, resize = None, + lr_scheduler_step = None, use_pretrained_model = 0, checkpoint_frequency = 1 , num_layers = 152, + epochs = 30, learning_rate = 0.1, + lr_schedule_factor = 0.1, optimizer = 'sgd', momentum = 0., weight_decay = 0.0001, beta_1 = 0.9, + beta_2 = 0.999, eps = 1e-8, gamma = 0.9 , mini_batch_size = 32 , image_shape = '3,224,224', + augmentation_type = None, top_k = None, kv_store = None, **kwargs): + """ + An Image classification algorithm :class:`~sagemaker.amazon.AmazonAlgorithmEstimatorBase`. Learns a classifier model that + + This Estimator may be fit via calls to + :meth:`~sagemaker.amazon.amazon_estimator.AmazonS3AlgorithmEstimatorBase.fit` + + After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker + Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, + ``deploy`` returns a :class:`~sagemaker.amazon.kmeans.ImageClassificationPredictor` object that can be used to label + assignment, using the trained model hosted in the SageMaker Endpoint. + + ImageClassification Estimators can be configured by setting hyperparameters. The available hyperparameters for + ImageClassification are documented below. For further information on the AWS ImageClassification algorithm, please consult AWS technical + documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/IC-Hyperparameter.html + + Args: + role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and + APIs that create Amazon SageMaker endpoints use this role to access + training data and model artifacts. After the endpoint is created, + the inference code might use the IAM role, if accessing AWS resource. + For more information, see ???. + train_instance_count (int): Number of Amazon EC2 instances to use for training. + train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. + num_classes (int): Number of output classes. This parameter defines the dimensions of the network output + and is typically set to the number of classes in the dataset. + num_training_samples (int): Number of training examples in the input dataset. If there is a + mismatch between this value and the number of samples in the training + set, then the behavior of the lr_scheduler_step parameter is undefined + and distributed training accuracy might be affected. + use_pretrained_model (int): Flag to indicate whether to use pre-trained model for training. + If set to `1`, then the pretrained model with the corresponding number + of layers is loaded and used for training. Only the top FC layer are + reinitialized with random weights. Otherwise, the network is trained from scratch. Default value: 0 + checkpoint_frequency (int): Period to store model parameters (in number of epochs). Default value: 1 + num_layers (int): Number of layers for the network. For data with large image size (for example, 224x224 - like ImageNet), + we suggest selecting the number of layers from the set [18, 34, 50, 101, 152, 200]. For data with small + image size (for example, 28x28 - like CFAR), we suggest selecting the number of layers from the + set [20, 32, 44, 56, 110]. The number of layers in each set is based on the ResNet paper. + For transfer learning, the number of layers defines the architecture of base network and hence + can only be selected from the set [18, 34, 50, 101, 152, 200]. Default value: 152 + resize (int): Resize the image before using it for training. The images are resized so that the shortest side is of this + parameter. If the parameter is not set, then the training data is used as such without resizing. + Note: This option is available only for inputs specified as application/x-image content-type in + training and validation channels. + epochs (int): Number of training epochs. Default value: 30 + learning_rate (float): Initial learning rate. Float. Range in [0, 1]. Default value: 0.1 + lr_scheduler_factor (flaot): The ratio to reduce learning rate used in conjunction with the `lr_scheduler_step` parameter, + defined as `lr_new = lr_old * lr_scheduler_factor`. Valid values: Float. Range in [0, 1]. Default value: 0.1 + lr_scheduler_step (str): The epochs at which to reduce the learning rate. As explained in the ``lr_scheduler_factor`` parameter, the + learning rate is reduced by ``lr_scheduler_factor`` at these epochs. For example, if the value is set + to "10, 20", then the learning rate is reduced by ``lr_scheduler_factor`` after 10th epoch and again by + ``lr_scheduler_factor`` after 20th epoch. The epochs are delimited by ",". + optimizer (str): The optimizer types. For more details of the parameters for the optimizers, please refer to MXNet's API. + Valid values: One of sgd, adam, rmsprop, or nag. Default value: `sgd`. + momentum (float): The momentum for sgd and nag, ignored for other optimizers. Valid values: Float. Range in [0, 1]. Default value: 0 + weight_decay (float): The coefficient weight decay for sgd and nag, ignored for other optimizers. Range in [0, 1]. Default value: 0.0001 + beta_1 (float): The beta1 for adam, in other words, exponential decay rate for the first moment estimates. Range in [0, 1]. Default value: 0.9 + beta_2 (float): The beta2 for adam, in other words, exponential decay rate for the second moment estimates. Range in [0, 1]. Default value: 0.999 + eps (float): The epsilon for adam and rmsprop. It is usually set to a small value to avoid division by 0. Range in [0, 1]. Default value: 1e-8 + gamma (float): The gamma for rmsprop. A decay factor of moving average of the squared gradient. Range in [0, 1]. Default value: 0.9 + mini_batch_size (int): The batch size for training. In a single-machine multi-GPU setting, each GPU handles mini_batch_size/num_gpu + training samples. For the multi-machine training in dist_sync mode, the actual batch size is mini_batch_size*number + of machines. See MXNet docs for more details. Default value: 32 + image_shape (str): The input image dimensions, which is the same size as the input layer of the network. + The format is defined as 'num_channels, height, width'. The image dimension can take on any value as the + network can handle varied dimensions of the input. However, there may be memory constraints if a larger image + dimension is used. Typical image dimensions for image classification are '3, 224, 224'. This is similar to the ImageNet dataset. + Default value: ‘3, 224, 224’ + augmentation_type: (str): Data augmentation type. The input images can be augmented in multiple ways as specified below. + 'crop' - Randomly crop the image and flip the image horizontally + 'crop_color' - In addition to ‘crop’, three random values in the range [-36, 36], [-50, 50], and [-50, 50] + are added to the corresponding Hue-Saturation-Lightness channels respectively + 'crop_color_transform': In addition to crop_color, random transformations, including rotation, + shear, and aspect ratio variations are applied to the image. The maximum angle of rotation + is 10 degrees, the maximum shear ratio is 0.1, and the maximum aspect changing ratio is 0.25. + top_k (int): Report the top-k accuracy during training. This parameter has to be greater than 1, + since the top-1 training accuracy is the same as the regular training accuracy that has already been reported. + kv_store (str): Weight update synchronization mode during distributed training. The weight updates can be updated either synchronously + or asynchronously across machines. Synchronous updates typically provide better accuracy than asynchronous + updates but can be slower. See distributed training in MXNet for more details. This parameter is not applicable + to single machine training. + 'dist_sync' - The gradients are synchronized after every batch with all the workers. With dist_sync, + batch-size now means the batch size used on each machine. So if there are n machines and we use + batch size b, then dist_sync behaves like local with batch size n*b + 'dist_async'- Performs asynchronous updates. The weights are updated whenever gradients are received from any + machine and the weight updates are atomic. However, the order is not guaranteed. + **kwargs: base class keyword argument values. + """ + super(ImageClassification, self).__init__(role, train_instance_count, train_instance_type, **kwargs) + self.num_classes = num_classes + self.num_training_samples = num_training_samples + self.resize = resize + self.lr_scheduler_step = lr_scheduler_step + self.use_pretrained_model = use_pretrained_model + self.checkpoint_frequency = checkpoint_frequency + self.num_layers = num_layers + self.epochs = epochs + self.learning_rate = learning_rate + self.lr_schedule_factor = lr_schedule_factor + self.optimizetr = optimizer + self.momentum = momentum + self.weight_decay = weight_decay + self.beta_1 = beta_1 + self.beta_2 = beta_2 + self.eps = eps + self.gamma = gamma + self.mini_batch_size = mini_batch_size + self.image_shape = image_shape + self.augmentation_type = augmentation_type + self.top_k = top_k + self.kv_store = kv_store + + def create_model(self): + """Return a :class:`~sagemaker.amazon.image_classification.ImageClassification` referencing the latest + s3 model data produced by this Estimator.""" + return ImageClassificationModel(self.model_data, self.role, self.sagemaker_session) + + def hyperparameters(self): + """Return the SageMaker hyperparameters for training this ImageClassification Estimator""" + hp = dict(force_dense='True') # Not sure what this is. + hp.update(super(ImageClassification, self).hyperparameters()) + return hp + + +class ImageClassificationPredictor(RealTimePredictor): + """Assigns input vectors to their closest cluster in a ImageClassification model. + + The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a `x-image` as input. + + ``predict()`` returns """ + + def __init__(self, endpoint, sagemaker_session=None): + super(ImageClassifcationPredictor, self).__init__(endpoint, sagemaker_session, serializer=numpy_to_record_serializer(), + deserializer=record_deserializer(), content_type = 'application/x-image') + + +class ImageClassificationModel(Model): + """Reference KMeans s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return + a Predictor to performs k-means cluster assignment.""" + + def __init__(self, model_data, role, sagemaker_session=None): + sagemaker_session = sagemaker_session or Session() + image = registry(sagemaker_session.boto_session.region_name, algorithm = 'image_classification') + \ + "/" + ImageClassification.repo + super(ImageClassificationModel, self).__init__(model_data, image, role, predictor_cls=ImageClassificationPredictor, + sagemaker_session=sagemaker_session) diff --git a/tests/integ/test_image_classification.py b/tests/integ/test_image_classification.py new file mode 100644 index 0000000000..b2be924685 --- /dev/null +++ b/tests/integ/test_image_classification.py @@ -0,0 +1,72 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import gzip +import pickle +import sys + +import boto3 +import os + +import sagemaker +from sagemaker import ImageClassification, ImageClassificationModel +from sagemaker.utils import name_from_base +from tests.integ import DATA_DIR, REGION +from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name +import urllib + +def download(url): + filename = url.split("/")[-1] + if not os.path.exists(filename): + urllib.request.urlretrieve(url, filename) + + +def upload_to_s3(channel, file, bucket): + s3 = boto3.resource('s3') + data = open(file, "rb") + key = channel + '/' + file + s3.Bucket(bucket).put_object(Key=key, Body=data) + +def test_image_classification(): + + with timeout(minutes=15): + sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) + + # caltech-256 + download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec') + upload_to_s3('train', 'caltech-256-60-train.rec', sagemaker_session.default_bucket()) + download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec') + upload_to_s3('validation', 'caltech-256-60-val.rec', sagemaker_session.default_bucket()) + ic = ImageClassification(role='SageMakerRole', train_instance_count=1, + train_instance_type='ml.c4.xlarge', data_location = 's3://' + sagemaker_session.default_bucket(), + num_classes=257, num_training_samples=15420, epochs = 1, image_shape= '3,32,32', + sagemaker_session=sagemaker_session, base_job_name='test-ic') + + ic.epochs = 1 + records = [] + records.append(ic.s3_record_set( 'train', channel = 'train')) + records.append(ic.s3_record_set( 'validation', channel = 'validation')) + import pdb + pdb.set_trace() + ic.fit(records) + """ + endpoint_name = name_from_base('ic') + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + model = ImageClassificationModel(ic.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) + predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) + result = predictor.predict(train_set[0][:10]) + + assert len(result) == 10 + for record in result: + assert record.label["closest_cluster"] is not None + assert record.label["distance_to_cluster"] is not None + """ From 8b96f693f6f6dd1aeacd62f47548f9ef5aa56891 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Tue, 16 Jan 2018 19:20:32 -0800 Subject: [PATCH 02/19] image classification api --- .gitignore | 1 + src/sagemaker/__init__.py | 4 +- src/sagemaker/amazon/amazon_estimator.py | 78 ++++++++++++++++++++---- src/sagemaker/amazon/validation.py | 5 ++ tests/unit/test_amazon_estimator.py | 29 +++++++++ 5 files changed, 103 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 3ee5780429..cb6d0d2664 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ examples/tensorflow/distributed_mnist/data doc/_build **/.DS_Store venv/ +*.rec \ No newline at end of file diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index d5901c086d..15e8600742 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -16,6 +16,7 @@ from sagemaker.amazon.kmeans import KMeans, KMeansModel, KMeansPredictor from sagemaker.amazon.pca import PCA, PCAModel, PCAPredictor from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerModel, LinearLearnerPredictor +from sagemaker.amazon.image_classification import ImageClassification, ImageClassificationModel, ImageClassificationPredictor from sagemaker.model import Model from sagemaker.predictor import RealTimePredictor @@ -27,5 +28,6 @@ __all__ = [estimator, KMeans, KMeansModel, KMeansPredictor, PCA, PCAModel, PCAPredictor, LinearLearner, - LinearLearnerModel, LinearLearnerPredictor, Model, RealTimePredictor, Session, + LinearLearnerModel, LinearLearnerPredictor, Model, RealTimePredictor, Session, + ImageClassification, ImageClassificationModel, ImageClassificationPredictor, container_def, s3_input, production_variant, get_execution_role] diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 6da58aa165..67ee7fd0b6 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -23,7 +23,6 @@ logger = logging.getLogger(__name__) - class AmazonAlgorithmEstimatorBase(EstimatorBase): """Base class for Amazon first-party Estimator implementations. This class isn't intended to be instantiated directly.""" @@ -128,10 +127,53 @@ def record_set(self, train, labels=None, channel="train"): logger.debug("Created manifest file {}".format(manifest_s3_file)) return RecordSet(manifest_s3_file, num_records=train.shape[0], feature_dim=train.shape[1], channel=channel) +class AmazonS3AlgorithmEstimatorBase(AmazonAlgorithmEstimatorBase): + """Base class for Amazon first-party Estimator implementations. This class ins't + intended to be instantiated directly. This is difference from the base class + because this class handles S3 data""" + + def fit(self, records, mini_batch_size=None, distribution = 'ShardedByS3Key', **kwargs): + """Fit this Estimator on serialized Record objects, stored in S3. + + ``records`` should be a list of instances of :class:`~RecordSet`. This defines a collection of + s3 data files to train this ``Estimator`` on. + + More information on the Amazon Record format is available at: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html + + See :meth:`~AmazonS3AlgorithmEstimatorBase.s3_record_set` to construct a ``RecordSet`` object + from :class:`~numpy.ndarray` arrays. + + Args: + records (list): This is a list of :class:`~RecordSet` items The list of records to train + this ``Estimator`` will depend on each algorithm and type of input data. + mini_batch_size (int or None): The size of each mini-batch to use when training. If None, a + default value will be used. + """ + default_mini_batch_size = self.MAX_DEFAULT_BATCH_SIZE + self.mini_batch_size = mini_batch_size or default_mini_batch_size + #self.feature_dim = records.feature_dim + data = {} + for record in records: + data = {record.channel: s3_input(record.s3_data, distribution=distribution, + s3_data_type=record.s3_data_type)} + super(AmazonAlgorithmEstimatorBase, self).fit(data, **kwargs) + + def s3_record_set(self, s3_loc, channel="train" ): + """Build a :class:`~RecordSet` from a S3 location with data in it. + + Args: + s3_loc (str): A s3 bucket where data is located + channel (str): The SageMaker TrainingJob channel this RecordSet should be assigned to. + + Returns: + RecordSet: A RecordSet referencing the encoded, uploading training and label data. + """ + return RecordSet(s3_loc, channel=channel) class RecordSet(object): - def __init__(self, s3_data, num_records, feature_dim, s3_data_type='ManifestFile', channel='train'): + def __init__(self, s3_data, num_records = None, feature_dim = None, s3_data_type='ManifestFile', channel='train'): """A collection of Amazon :class:~`Record` objects serialized and stored in S3. Args: @@ -166,7 +208,6 @@ def _build_shards(num_shards, array): shards.append(array[(num_shards - 1) * shard_size:]) return shards - def upload_numpy_to_s3_shards(num_shards, s3, bucket, key_prefix, array, labels=None): """Upload the training ``array`` and ``labels`` arrays to ``num_shards`` s3 objects, stored in "s3://``bucket``/``key_prefix``/".""" @@ -202,13 +243,24 @@ def upload_numpy_to_s3_shards(num_shards, s3, bucket, key_prefix, array, labels= finally: raise ex - -def registry(region_name): - """Return docker registry for the given AWS region""" - account_id = { - "us-east-1": "382416733822", - "us-east-2": "404615174143", - "us-west-2": "174872318107", - "eu-west-1": "438346466558" - }[region_name] - return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) +def registry(region_name, algorithm = None): + """Return docker registry for the given AWS region + + Args: + algorithm (str): Provide the algorithm to get the docker back""" + if algorithm is None: + account_id = { + "us-east-1": "382416733822", + "us-east-2": "404615174143", + "us-west-2": "174872318107", + "eu-west-1": "438346466558" + }[region_name] + return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) + elif algorithm in ['image_classification']: + account_id = { + "us-east-1": "811284229777", + "us-east-2": "825641698319", + "us-west-2": "433757028032", + "eu-west-1": "685385470294" + }[region_name] + return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) \ No newline at end of file diff --git a/src/sagemaker/amazon/validation.py b/src/sagemaker/amazon/validation.py index ff3259be8f..93fa960d30 100644 --- a/src/sagemaker/amazon/validation.py +++ b/src/sagemaker/amazon/validation.py @@ -30,6 +30,10 @@ def validate(value): return value < maximum return validate +def le(maximum): + def validate(value): + return value <= maximum + return validate def isin(*expected): def validate(value): @@ -45,4 +49,5 @@ def validate(value): isint = istype(int) isbool = istype(bool) +isstr = istype(str) isnumber = istype(numbers.Number) # noqa diff --git a/tests/unit/test_amazon_estimator.py b/tests/unit/test_amazon_estimator.py index a9eb15886e..e562be86d5 100644 --- a/tests/unit/test_amazon_estimator.py +++ b/tests/unit/test_amazon_estimator.py @@ -16,6 +16,7 @@ # Use PCA as a test implementation of AmazonAlgorithmEstimator from sagemaker.amazon.pca import PCA +from sagemaker.amazon.image_classification import ImageClassification from sagemaker.amazon.amazon_estimator import upload_numpy_to_s3_shards, _build_shards, registry @@ -63,6 +64,10 @@ def test_init(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.num_components == 55 +def test_s3_init(sagemaker_session): + ic = ImageClassification(epochs = 12, num_classes = 2, num_training_samples = 2, + sagemaker_session=sagemaker_session, **COMMON_ARGS) + assert ic.epochs == 12 def test_init_all_pca_hyperparameters(sagemaker_session): pca = PCA(num_components=55, algorithm_mode='randomized', @@ -72,6 +77,14 @@ def test_init_all_pca_hyperparameters(sagemaker_session): assert pca.algorithm_mode == 'randomized' assert pca.extra_components == 33 +def test_init_all_ic_hyperparameters(sagemaker_session): + ic = ImageClassification(data_location='s3://some-bucket/some-key/', + num_classes=257, num_training_samples=15420, epochs = 1, + image_shape= '3,32,32', sagemaker_session=sagemaker_session, + **COMMON_ARGS) + assert ic.num_classes == 257 + assert ic.num_training_samples == 15420 + assert ic.image_shape == '3,32,32' def test_init_estimator_args(sagemaker_session): pca = PCA(num_components=1, train_max_run=1234, sagemaker_session=sagemaker_session, @@ -82,6 +95,16 @@ def test_init_estimator_args(sagemaker_session): assert pca.train_max_run == 1234 assert pca.data_location == 's3://some-bucket/some-key/' +def test_init_s3estimator_args(sagemaker_session): + ic = ImageClassification(data_location='s3://some-bucket/some-key/', + num_classes=257, num_training_samples=15420, epochs = 1, + image_shape= '3,32,32', sagemaker_session=sagemaker_session, + **COMMON_ARGS) + assert ic.train_instance_type == COMMON_ARGS['train_instance_type'] + assert ic.train_instance_count == COMMON_ARGS['train_instance_count'] + assert ic.role == COMMON_ARGS['role'] + assert ic.data_location == 's3://some-bucket/some-key/' + def test_data_location_validation(sagemaker_session): pca = PCA(num_components=2, sagemaker_session=sagemaker_session, **COMMON_ARGS) @@ -99,6 +122,12 @@ def test_pca_hyperparameters(sagemaker_session): subtract_mean='True', algorithm_mode='randomized') +def test_ic_hyperparameters(sagemaker_session): + ic = ImageClassification(data_location = 's3://some-bucket/some-key/', + num_classes=257, num_training_samples=15420, epochs = 1, + image_shape= '3,32,32', sagemaker_session=sagemaker_session, + **COMMON_ARGS) + assert isinstance(ic.hyperparameters(),dict) def test_image(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) From 7167aecd10dd6b85d7290f312fd24f6fc306383a Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Tue, 16 Jan 2018 19:25:24 -0800 Subject: [PATCH 03/19] sync --- src/sagemaker/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index 9ab20f045a..2dc35de963 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -30,6 +30,8 @@ __all__ = [estimator, KMeans, KMeansModel, KMeansPredictor, PCA, PCAModel, PCAPredictor, LinearLearner, - LinearLearnerModel, LinearLearnerPredictor, Model, RealTimePredictor, Session, + LinearLearnerModel, LinearLearnerPredictor, + FactorizationMachines, FactorizationMachinesModel, FactorizationMachinesPredictor, + Model, RealTimePredictor, Session, ImageClassification, ImageClassificationModel, ImageClassificationPredictor, container_def, s3_input, production_variant, get_execution_role ] From 3d985e75476bdfd7257a59bbdabde542e597f528 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Tue, 16 Jan 2018 20:35:25 -0800 Subject: [PATCH 04/19] estimator is done. waiting on tests. --- src/sagemaker/amazon/amazon_estimator.py | 10 +++++----- tests/integ/test_image_classification.py | 10 ++++------ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 252a754bf9..12e7d99a8d 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -147,13 +147,13 @@ def fit(self, records, mini_batch_size=None, distribution = 'ShardedByS3Key', ** mini_batch_size (int or None): The size of each mini-batch to use when training. If None, a default value will be used. """ - default_mini_batch_size = self.MAX_DEFAULT_BATCH_SIZE + default_mini_batch_size = 32 self.mini_batch_size = mini_batch_size or default_mini_batch_size - #self.feature_dim = records.feature_dim + #self.feature_dim = records.feature_dim data = {} for record in records: - data = {record.channel: s3_input(record.s3_data, distribution=distribution, - s3_data_type=record.s3_data_type)} + data[record.channel] = s3_input(record.s3_data, distribution=distribution, + s3_data_type=record.s3_data_type) super(AmazonAlgorithmEstimatorBase, self).fit(data, **kwargs) def s3_record_set(self, s3_loc, channel="train" ): @@ -166,7 +166,7 @@ def s3_record_set(self, s3_loc, channel="train" ): Returns: RecordSet: A RecordSet referencing the encoded, uploading training and label data. """ - return RecordSet(s3_loc, channel=channel) + return RecordSet(self.data_location + '/' + s3_loc, channel=channel) class RecordSet(object): diff --git a/tests/integ/test_image_classification.py b/tests/integ/test_image_classification.py index b2be924685..750638268f 100644 --- a/tests/integ/test_image_classification.py +++ b/tests/integ/test_image_classification.py @@ -45,18 +45,16 @@ def test_image_classification(): download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec') upload_to_s3('train', 'caltech-256-60-train.rec', sagemaker_session.default_bucket()) download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec') - upload_to_s3('validation', 'caltech-256-60-val.rec', sagemaker_session.default_bucket()) + upload_to_s3('validation', 'caltech-256-60-val.rec', sagemaker_session.default_bucket()) ic = ImageClassification(role='SageMakerRole', train_instance_count=1, - train_instance_type='ml.c4.xlarge', data_location = 's3://' + sagemaker_session.default_bucket(), + train_instance_type='ml.p3.2xlarge', data_location = 's3://' + sagemaker_session.default_bucket(), num_classes=257, num_training_samples=15420, epochs = 1, image_shape= '3,32,32', sagemaker_session=sagemaker_session, base_job_name='test-ic') ic.epochs = 1 records = [] - records.append(ic.s3_record_set( 'train', channel = 'train')) - records.append(ic.s3_record_set( 'validation', channel = 'validation')) - import pdb - pdb.set_trace() + records.append(ic.s3_record_set( 'training', channel = 'train')) + records.append(ic.s3_record_set( 'validation', channel = 'validation')) ic.fit(records) """ endpoint_name = name_from_base('ic') From 24353e2835739bcc28c87e8715887e6d23ff38cc Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Wed, 24 Jan 2018 12:40:23 -0800 Subject: [PATCH 05/19] formatting for flake --- src/sagemaker/__init__.py | 5 ++-- src/sagemaker/amazon/amazon_estimator.py | 17 +++++++------ tests/unit/test_amazon_estimator.py | 32 ++++++++++++++---------- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index 2dc35de963..d60e37a061 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -16,7 +16,8 @@ from sagemaker.amazon.kmeans import KMeans, KMeansModel, KMeansPredictor from sagemaker.amazon.pca import PCA, PCAModel, PCAPredictor from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerModel, LinearLearnerPredictor -from sagemaker.amazon.image_classification import ImageClassification, ImageClassificationModel, ImageClassificationPredictor +from sagemaker.amazon.image_classification import ImageClassification, ImageClassificationModel +from sagemaker.amazon.image_classification import ImageClassificationPredictor from sagemaker.amazon.factorization_machines import FactorizationMachines, FactorizationMachinesModel from sagemaker.amazon.factorization_machines import FactorizationMachinesPredictor @@ -34,4 +35,4 @@ FactorizationMachines, FactorizationMachinesModel, FactorizationMachinesPredictor, Model, RealTimePredictor, Session, ImageClassification, ImageClassificationModel, ImageClassificationPredictor, - container_def, s3_input, production_variant, get_execution_role ] + container_def, s3_input, production_variant, get_execution_role] \ No newline at end of file diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 12e7d99a8d..ef9b2c03c3 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -124,12 +124,13 @@ def record_set(self, train, labels=None, channel="train"): logger.debug("Created manifest file {}".format(manifest_s3_file)) return RecordSet(manifest_s3_file, num_records=train.shape[0], feature_dim=train.shape[1], channel=channel) + class AmazonS3AlgorithmEstimatorBase(AmazonAlgorithmEstimatorBase): - """Base class for Amazon first-party Estimator implementations. This class ins't - intended to be instantiated directly. This is difference from the base class + """Base class for Amazon first-party Estimator implementations. This class isn't + intended to be instantiated directly. This is difference from the base class because this class handles S3 data""" - def fit(self, records, mini_batch_size=None, distribution = 'ShardedByS3Key', **kwargs): + def fit(self, records, mini_batch_size=None, distribution='ShardedByS3Key', **kwargs): """Fit this Estimator on serialized Record objects, stored in S3. ``records`` should be a list of instances of :class:`~RecordSet`. This defines a collection of @@ -142,21 +143,20 @@ def fit(self, records, mini_batch_size=None, distribution = 'ShardedByS3Key', ** from :class:`~numpy.ndarray` arrays. Args: - records (list): This is a list of :class:`~RecordSet` items The list of records to train + records (list): This is a list of :class:`~RecordSet` items The list of records to train this ``Estimator`` will depend on each algorithm and type of input data. mini_batch_size (int or None): The size of each mini-batch to use when training. If None, a - default value will be used. + default value will be used. """ default_mini_batch_size = 32 self.mini_batch_size = mini_batch_size or default_mini_batch_size - #self.feature_dim = records.feature_dim data = {} for record in records: data[record.channel] = s3_input(record.s3_data, distribution=distribution, - s3_data_type=record.s3_data_type) + s3_data_type=record.s3_data_type) super(AmazonAlgorithmEstimatorBase, self).fit(data, **kwargs) - def s3_record_set(self, s3_loc, channel="train" ): + def s3_record_set(self, s3_loc, channel="train"): """Build a :class:`~RecordSet` from a S3 location with data in it. Args: @@ -168,6 +168,7 @@ def s3_record_set(self, s3_loc, channel="train" ): """ return RecordSet(self.data_location + '/' + s3_loc, channel=channel) +# Re-write a new recordset class for s3 objects. class RecordSet(object): def __init__(self, s3_data, num_records = None, feature_dim = None, s3_data_type='ManifestFile', channel='train'): diff --git a/tests/unit/test_amazon_estimator.py b/tests/unit/test_amazon_estimator.py index e562be86d5..0dac3ce678 100644 --- a/tests/unit/test_amazon_estimator.py +++ b/tests/unit/test_amazon_estimator.py @@ -19,7 +19,6 @@ from sagemaker.amazon.image_classification import ImageClassification from sagemaker.amazon.amazon_estimator import upload_numpy_to_s3_shards, _build_shards, registry - COMMON_ARGS = {'role': 'myrole', 'train_instance_count': 1, 'train_instance_type': 'ml.c4.xlarge'} REGION = "us-west-2" @@ -64,11 +63,13 @@ def test_init(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.num_components == 55 + def test_s3_init(sagemaker_session): - ic = ImageClassification(epochs = 12, num_classes = 2, num_training_samples = 2, - sagemaker_session=sagemaker_session, **COMMON_ARGS) + ic = ImageClassification(epochs=12, num_classes=2, num_training_samples=2, + sagemaker_session=sagemaker_session, **COMMON_ARGS) assert ic.epochs == 12 + def test_init_all_pca_hyperparameters(sagemaker_session): pca = PCA(num_components=55, algorithm_mode='randomized', subtract_mean=True, extra_components=33, sagemaker_session=sagemaker_session, @@ -77,15 +78,17 @@ def test_init_all_pca_hyperparameters(sagemaker_session): assert pca.algorithm_mode == 'randomized' assert pca.extra_components == 33 + def test_init_all_ic_hyperparameters(sagemaker_session): ic = ImageClassification(data_location='s3://some-bucket/some-key/', - num_classes=257, num_training_samples=15420, epochs = 1, - image_shape= '3,32,32', sagemaker_session=sagemaker_session, + num_classes=257, num_training_samples=15420, epochs=1, + image_shape='3,32,32', sagemaker_session=sagemaker_session, **COMMON_ARGS) assert ic.num_classes == 257 assert ic.num_training_samples == 15420 assert ic.image_shape == '3,32,32' + def test_init_estimator_args(sagemaker_session): pca = PCA(num_components=1, train_max_run=1234, sagemaker_session=sagemaker_session, data_location='s3://some-bucket/some-key/', **COMMON_ARGS) @@ -95,11 +98,12 @@ def test_init_estimator_args(sagemaker_session): assert pca.train_max_run == 1234 assert pca.data_location == 's3://some-bucket/some-key/' + def test_init_s3estimator_args(sagemaker_session): ic = ImageClassification(data_location='s3://some-bucket/some-key/', - num_classes=257, num_training_samples=15420, epochs = 1, - image_shape= '3,32,32', sagemaker_session=sagemaker_session, - **COMMON_ARGS) + num_classes=257, num_training_samples=15420, epochs=1, + image_shape='3,32,32', sagemaker_session=sagemaker_session, + **COMMON_ARGS) assert ic.train_instance_type == COMMON_ARGS['train_instance_type'] assert ic.train_instance_count == COMMON_ARGS['train_instance_count'] assert ic.role == COMMON_ARGS['role'] @@ -122,12 +126,14 @@ def test_pca_hyperparameters(sagemaker_session): subtract_mean='True', algorithm_mode='randomized') + def test_ic_hyperparameters(sagemaker_session): - ic = ImageClassification(data_location = 's3://some-bucket/some-key/', - num_classes=257, num_training_samples=15420, epochs = 1, - image_shape= '3,32,32', sagemaker_session=sagemaker_session, - **COMMON_ARGS) - assert isinstance(ic.hyperparameters(),dict) + ic = ImageClassification(data_location='s3://some-bucket/some-key/', + num_classes=257, num_training_samples=15420, epochs=1, + image_shape='3,32,32', sagemaker_session=sagemaker_session, + **COMMON_ARGS) + assert isinstance(ic.hyperparameters(), dict) + def test_image(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) From 3d91eb7df68ee26eef5f1bcff56ae875ce20163c Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Wed, 31 Jan 2018 15:32:31 -0800 Subject: [PATCH 06/19] merge --- .gitignore | 3 ++- src/sagemaker/amazon/amazon_estimator.py | 25 ++++++++++++++++-------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index cb6d0d2664..29c833a1ec 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,5 @@ examples/tensorflow/distributed_mnist/data doc/_build **/.DS_Store venv/ -*.rec \ No newline at end of file +*.rec +*.~ \ No newline at end of file diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index ef9b2c03c3..89e7c00cbc 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -23,6 +23,7 @@ logger = logging.getLogger(__name__) + class AmazonAlgorithmEstimatorBase(EstimatorBase): """Base class for Amazon first-party Estimator implementations. This class isn't intended to be instantiated directly.""" @@ -83,6 +84,7 @@ def fit(self, records, mini_batch_size=None, **kwargs): records (:class:`~RecordSet`): The records to train this ``Estimator`` on mini_batch_size (int or None): The size of each mini-batch to use when training. If None, a default value will be used. + distribution (s3 distribution type): S3 Distribution. """ self.feature_dim = records.feature_dim self.mini_batch_size = mini_batch_size @@ -168,6 +170,7 @@ def s3_record_set(self, s3_loc, channel="train"): """ return RecordSet(self.data_location + '/' + s3_loc, channel=channel) + # Re-write a new recordset class for s3 objects. class RecordSet(object): @@ -241,19 +244,16 @@ def upload_numpy_to_s3_shards(num_shards, s3, bucket, key_prefix, array, labels= finally: raise ex -def registry(region_name, algorithm = None): - """Return docker registry for the given AWS region - - Args: - algorithm (str): Provide the algorithm to get the docker back""" - if algorithm is None: + +def registry(region_name, algorithm=None): + """Return docker registry for the given AWS region""" + if algorithm in [None, "pca", "kmeans", "linear-learner", "factorization-machines"]: account_id = { "us-east-1": "382416733822", "us-east-2": "404615174143", "us-west-2": "174872318107", "eu-west-1": "438346466558" }[region_name] - return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) elif algorithm in ['image_classification']: account_id = { "us-east-1": "811284229777", @@ -261,4 +261,13 @@ def registry(region_name, algorithm = None): "us-west-2": "433757028032", "eu-west-1": "685385470294" }[region_name] - return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) \ No newline at end of file + elif algorithm in ["lda"]: + account_id = { + "us-east-1": "766337827248", + "us-east-2": "999911452149", + "us-west-2": "266724342769", + "eu-west-1": "999678624901" + }[region_name] + else: + raise ValueError("Algorithm class:{} doesn't have mapping to account_id with images".format(algorithm)) + return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) \ No newline at end of file From 2de775a0e4a74238adee832998be95f669736321 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Wed, 31 Jan 2018 15:34:07 -0800 Subject: [PATCH 07/19] conflicts --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 29c833a1ec..9aff673bdc 100644 --- a/.gitignore +++ b/.gitignore @@ -21,4 +21,4 @@ doc/_build **/.DS_Store venv/ *.rec -*.~ \ No newline at end of file +*~ \ No newline at end of file From a919bce401a431193a66d1c92cf4f13b2d4847b5 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Tue, 6 Feb 2018 10:33:40 -0800 Subject: [PATCH 08/19] ic-sdk push --- src/sagemaker/__init__.py | 2 +- src/sagemaker/amazon/amazon_estimator.py | 70 +++++++++-- src/sagemaker/amazon/common.py | 22 +++- src/sagemaker/amazon/image_classification.py | 126 ++++++++++--------- src/sagemaker/content_types.py | 2 + src/sagemaker/estimator.py | 1 - tests/integ/test_image_classification.py | 31 +++-- tests/unit/test_amazon_estimator.py | 5 + 8 files changed, 171 insertions(+), 88 deletions(-) diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index d60e37a061..2c103768e4 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -35,4 +35,4 @@ FactorizationMachines, FactorizationMachinesModel, FactorizationMachinesPredictor, Model, RealTimePredictor, Session, ImageClassification, ImageClassificationModel, ImageClassificationPredictor, - container_def, s3_input, production_variant, get_execution_role] \ No newline at end of file + container_def, s3_input, production_variant, get_execution_role] diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 89e7c00cbc..07aa6a47af 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -127,12 +127,33 @@ def record_set(self, train, labels=None, channel="train"): return RecordSet(manifest_s3_file, num_records=train.shape[0], feature_dim=train.shape[1], channel=channel) -class AmazonS3AlgorithmEstimatorBase(AmazonAlgorithmEstimatorBase): +class AmazonS3AlgorithmEstimatorBase(EstimatorBase): """Base class for Amazon first-party Estimator implementations. This class isn't intended to be instantiated directly. This is difference from the base class because this class handles S3 data""" - def fit(self, records, mini_batch_size=None, distribution='ShardedByS3Key', **kwargs): + """Base class for Amazon first-party Estimator implementations. This class isn't intended + to be instantiated directly.""" + + mini_batch_size = hp('mini_batch_size', (validation.isint, validation.gt(0))) + + def __init__(self, role, train_instance_count, train_instance_type, algorithm, **kwargs): + """Initialize an AmazonAlgorithmEstimatorBase. + + Args: + algortihm (str): Use one of the supported algorithms + """ + super(AmazonS3AlgorithmEstimatorBase, self).__init__(role, train_instance_count, train_instance_type, + **kwargs) + self.algorithm = algorithm + + def train_image(self): + return registry(self.sagemaker_session.boto_region_name, algorithm=self.algorithm) + "/" + type(self).repo + + def hyperparameters(self): + return hp.serialize_all(self) + + def fit(self, s3set, mini_batch_size=None, distribution='ShardedByS3Key', **kwargs): """Fit this Estimator on serialized Record objects, stored in S3. ``records`` should be a list of instances of :class:`~RecordSet`. This defines a collection of @@ -145,33 +166,59 @@ def fit(self, records, mini_batch_size=None, distribution='ShardedByS3Key', **kw from :class:`~numpy.ndarray` arrays. Args: - records (list): This is a list of :class:`~RecordSet` items The list of records to train + s3set (list): This is a list of :class:`~S3Set` items The list of records to train this ``Estimator`` will depend on each algorithm and type of input data. + distribution (str): The s3 distribution of data. mini_batch_size (int or None): The size of each mini-batch to use when training. If None, a default value will be used. """ default_mini_batch_size = 32 self.mini_batch_size = mini_batch_size or default_mini_batch_size data = {} - for record in records: - data[record.channel] = s3_input(record.s3_data, distribution=distribution, - s3_data_type=record.s3_data_type) - super(AmazonAlgorithmEstimatorBase, self).fit(data, **kwargs) + for item in s3set: + data[item.channel] = s3_input(item.s3_location, distribution=item.distribution, + content_type=item.content_type, + s3_data_type=item.s3_data_type) + super(AmazonS3AlgorithmEstimatorBase, self).fit(data, **kwargs) - def s3_record_set(self, s3_loc, channel="train"): + def s3_record_set(self, s3_loc, content_type, channel="train"): """Build a :class:`~RecordSet` from a S3 location with data in it. Args: s3_loc (str): A s3 bucket where data is located channel (str): The SageMaker TrainingJob channel this RecordSet should be assigned to. - + content_type (str): Content type of the data. Returns: RecordSet: A RecordSet referencing the encoded, uploading training and label data. """ - return RecordSet(self.data_location + '/' + s3_loc, channel=channel) + return S3Set(s3_loc, content_type=content_type, channel=channel) + + +class S3Set (object): + def __init__(self, s3_location, content_type = None, s3_data_type='S3Prefix', distribution = 'FullyReplicated', channel='train'): + """A collection of Amazon :class:~`Record` objects serialized and stored in S3. + + Args: + s3_location (str): The S3 location of the training data + distribution (str): The s3 distribution of data. + content_type (str): Mandatory content type of the data. + s3_data_type (str): Valid values: 'S3Prefix', 'ManifestFile'. If 'S3Prefix', ``s3_data`` defines + a prefix of s3 objects to train on. All objects with s3 keys beginning with ``s3_data`` will + be used to train. If 'ManifestFile', then ``s3_data`` defines a single s3 manifest file, listing + each s3 object to train on. + channel (str): The SageMaker Training Job channel this RecordSet should be bound to + """ + self.s3_location = s3_location + self.distribution = distribution + self.s3_data_type = s3_data_type + self.channel = channel + self.content_type = content_type + + def __repr__(self): + """Return an unambiguous representation of this S3Set""" + return str((S3Set, self.__dict__)) -# Re-write a new recordset class for s3 objects. class RecordSet(object): def __init__(self, s3_data, num_records = None, feature_dim = None, s3_data_type='ManifestFile', channel='train'): @@ -209,6 +256,7 @@ def _build_shards(num_shards, array): shards.append(array[(num_shards - 1) * shard_size:]) return shards + def upload_numpy_to_s3_shards(num_shards, s3, bucket, key_prefix, array, labels=None): """Upload the training ``array`` and ``labels`` arrays to ``num_shards`` s3 objects, stored in "s3://``bucket``/``key_prefix``/".""" diff --git a/src/sagemaker/amazon/common.py b/src/sagemaker/amazon/common.py index 6b5dc0c68a..4b3f511cfb 100644 --- a/src/sagemaker/amazon/common.py +++ b/src/sagemaker/amazon/common.py @@ -16,7 +16,7 @@ import numpy as np from scipy.sparse import issparse - +import json from sagemaker.amazon.record_pb2 import Record @@ -35,6 +35,17 @@ def __call__(self, array): return buf +class file_to_image_serializer(object): + + def __init__(self, content_type='application/x-image'): + self.content_type = content_type + + def __call__(self, file): + with open(file, 'rb') as f: + payload = f.read() + payload = bytearray(payload) + return payload + class record_deserializer(object): def __init__(self, accept='application/x-recordio-protobuf'): @@ -47,6 +58,15 @@ def __call__(self, stream, content_type): stream.close() +class response_deserializer(object): + + def __init__(self, accept='application/json'): + self.accept = accept + + def __call__(self, stream, content_type=None): + return json.loads(stream) + + def _write_feature_tensor(resolved_type, record, vector): if resolved_type == "Int32": record.features["values"].int32_tensor.values.extend(vector) diff --git a/src/sagemaker/amazon/image_classification.py b/src/sagemaker/amazon/image_classification.py index 70b9ed1fea..8e44b24964 100644 --- a/src/sagemaker/amazon/image_classification.py +++ b/src/sagemaker/amazon/image_classification.py @@ -11,8 +11,9 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from sagemaker.amazon.amazon_estimator import AmazonS3AlgorithmEstimatorBase, registry +from sagemaker.amazon.common import file_to_image_serializer, response_deserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa -from sagemaker.amazon.validation import gt, isin, isint, ge, isstr, lt, le +from sagemaker.amazon.validation import gt, isin, isint, ge, isstr, le from sagemaker.predictor import RealTimePredictor from sagemaker.model import Model from sagemaker.session import Session @@ -20,42 +21,44 @@ class ImageClassification(AmazonS3AlgorithmEstimatorBase): - repo = 'image-classification:latest' + repo='image-classification:latest' num_classes = hp('num_classes', (gt(1), isint), 'num_classes should be an integer greater-than 1') - num_training_samples = hp('num_training_samples', (gt(1), isint), 'num_training_samples should be an integer greater-than 1') - use_pretrained_model = hp('use_pretrained_model', (isin(0, 1), isint), 'use_pretrained_model should be in the set, [0,1]') - checkpoint_frequency = hp('checkpoint_frequency', (ge(1), isint), 'checkpoint_frequency should be an integer greater-than 1') - num_layers = hp('num_layers', (isin(18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110), isint), \ + num_training_samples = hp('num_training_samples', (gt(1), isint), + 'num_training_samples should be an integer greater-than 1') + use_pretrained_model = hp('use_pretrained_model', (isin(0, 1), isint), + 'use_pretrained_model should be in the set, [0,1]') + checkpoint_frequency = hp('checkpoint_frequency', (ge(1), isint), + 'checkpoint_frequency should be an integer greater-than 1') + num_layers = hp('num_layers', (isin(18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110), isint), 'num_layers should be in the set [18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110]' ) resize = hp('resize', (gt(1), isint), 'resize should be an integer greater-than 1') epochs = hp('epochs', (ge(1), isint), 'epochs should be an integer greater-than 1') learning_rate = hp('learning_rate', (gt(0)), 'learning_rate shoudl be a floating point greater than 0' ) - lr_schedule_factor = hp ('lr_schedule_factor', (gt(0)), 'lr_schedule_factor should be a floating point greater than 0') - lr_scheduler_step = hp ('lr_scheduler_step' ,(isstr), 'lr_scheduler_step should be a string input.') - optimizer = hp ('optimizer', (isin('sgd', 'adam', 'rmsprop', 'nag')), \ - 'Should be one optimizer among the list sgd, adam, rmsprop, or nag.') - momentum = hp ('momentum', (ge(0), le(1)), 'momentum is expected in the range 0, 1') - weight_decay = hp ('weight_decay', (ge(0), le(1)), 'weight_decay in range 0 , 1 ') - beta_1 = hp ('beta_1', (ge(0), le(1)), 'beta_1 shoud be in range 0, 1') - beta_2 = hp ('beta_2', (ge(0), le(1)), 'beta_2 should be in the range 0, 1') - eps = hp ('eps', (gt(0), le(1)), 'eps should be in the range 0, 1') - gamma = hp ('gamma', (ge(0), le(1)), 'gamma should be in the range 0, 1') - mini_batch_size = hp ('mini_batch_size', (gt(0)), 'mini_batch_size should be an integer greater than 0') - image_shape = hp ('image_shape', (isstr), 'image_shape is expected to be a string') - augmentation_type = hp ('beta_1', (isin ('crop', 'crop_color', 'crop_color_transform')), \ - 'beta_1 must be from one option offered') - top_k = hp ('top_k', (ge(1), isint), 'top_k should be greater than or equal to 1') - kv_store = hp ('kv_store', (isin ('dist_sync', 'dist_async' )), 'Can be dist_sync or dist_async') - - - - def __init__(self, role, train_instance_count, train_instance_type, num_classes, num_training_samples, resize = None, - lr_scheduler_step = None, use_pretrained_model = 0, checkpoint_frequency = 1 , num_layers = 152, - epochs = 30, learning_rate = 0.1, - lr_schedule_factor = 0.1, optimizer = 'sgd', momentum = 0., weight_decay = 0.0001, beta_1 = 0.9, - beta_2 = 0.999, eps = 1e-8, gamma = 0.9 , mini_batch_size = 32 , image_shape = '3,224,224', - augmentation_type = None, top_k = None, kv_store = None, **kwargs): + lr_scheduler_factor = hp('lr_scheduler_factor', (gt(0)), + 'lr_schedule_factor should be a floating point greater than 0') + lr_scheduler_step = hp('lr_scheduler_step',(isstr), 'lr_scheduler_step should be a string input.') + optimizer = hp('optimizer', (isin('sgd', 'adam', 'rmsprop', 'nag')), + 'Should be one optimizer among the list sgd, adam, rmsprop, or nag.') + momentum = hp('momentum', (ge(0), le(1)), 'momentum is expected in the range 0, 1') + weight_decay = hp('weight_decay', (ge(0), le(1)), 'weight_decay in range 0 , 1 ') + beta_1 = hp('beta_1', (ge(0), le(1)), 'beta_1 should be in range 0, 1') + beta_2 = hp('beta_2', (ge(0), le(1)), 'beta_2 should be in the range 0, 1') + eps = hp('eps', (gt(0), le(1)), 'eps should be in the range 0, 1') + gamma = hp('gamma', (ge(0), le(1)), 'gamma should be in the range 0, 1') + mini_batch_size = hp('mini_batch_size', (gt(0)), 'mini_batch_size should be an integer greater than 0') + image_shape = hp('image_shape', (isstr), 'image_shape is expected to be a string') + augmentation_type = hp('beta_1', (isin ('crop', 'crop_color', 'crop_color_transform')), + 'beta_1 must be from one option offered') + top_k = hp('top_k', (ge(1), isint), 'top_k should be greater than or equal to 1') + kv_store=hp ('kv_store', (isin ('dist_sync', 'dist_async' )), 'Can be dist_sync or dist_async') + + def __init__(self, role, train_instance_count, train_instance_type, num_classes, num_training_samples, resize=None, + lr_scheduler_step=None, use_pretrained_model=0, checkpoint_frequency=1 , num_layers=18, + epochs=30, learning_rate=0.1, + lr_schedule_factor=0.1, optimizer='sgd', momentum=0., weight_decay=0.0001, beta_1=0.9, + beta_2=0.999, eps=1e-8, gamma=0.9 , mini_batch_size=32 , image_shape='3,224,224', + augmentation_type=None, top_k=None, kv_store=None, **kwargs): """ An Image classification algorithm :class:`~sagemaker.amazon.AmazonAlgorithmEstimatorBase`. Learns a classifier model that @@ -88,22 +91,25 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, use_pretrained_model (int): Flag to indicate whether to use pre-trained model for training. If set to `1`, then the pretrained model with the corresponding number of layers is loaded and used for training. Only the top FC layer are - reinitialized with random weights. Otherwise, the network is trained from scratch. Default value: 0 + reinitialized with random weights. Otherwise, the network is trained from scratch. + Default value: 0 checkpoint_frequency (int): Period to store model parameters (in number of epochs). Default value: 1 - num_layers (int): Number of layers for the network. For data with large image size (for example, 224x224 - like ImageNet), - we suggest selecting the number of layers from the set [18, 34, 50, 101, 152, 200]. For data with small - image size (for example, 28x28 - like CFAR), we suggest selecting the number of layers from the - set [20, 32, 44, 56, 110]. The number of layers in each set is based on the ResNet paper. - For transfer learning, the number of layers defines the architecture of base network and hence - can only be selected from the set [18, 34, 50, 101, 152, 200]. Default value: 152 - resize (int): Resize the image before using it for training. The images are resized so that the shortest side is of this - parameter. If the parameter is not set, then the training data is used as such without resizing. - Note: This option is available only for inputs specified as application/x-image content-type in - training and validation channels. + num_layers (int): Number of layers for the network. For data with large image size (for example, 224x224 - + like ImageNet), we suggest selecting the number of layers from the set [18, 34, 50, 101, + 152, 200]. For data with small image size (for example, 28x28 - like CFAR), we suggest + selecting the number of layers from the set [20, 32, 44, 56, 110]. The number of layers + in each set is based on the ResNet paper. For transfer learning, the number of layers + defines the architecture of base network and hence can only be selected from the set + [18, 34, 50, 101, 152, 200]. Default value: 152 + resize (int): Resize the image before using it for training. The images are resized so that the shortest + side is of this parameter. If the parameter is not set, then the training data is used as such + without resizing. + Note: This option is available only for inputs specified as application/x-image content-type in + training and validation channels. epochs (int): Number of training epochs. Default value: 30 learning_rate (float): Initial learning rate. Float. Range in [0, 1]. Default value: 0.1 lr_scheduler_factor (flaot): The ratio to reduce learning rate used in conjunction with the `lr_scheduler_step` parameter, - defined as `lr_new = lr_old * lr_scheduler_factor`. Valid values: Float. Range in [0, 1]. Default value: 0.1 + defined as `lr_new=lr_old * lr_scheduler_factor`. Valid values: Float. Range in [0, 1]. Default value: 0.1 lr_scheduler_step (str): The epochs at which to reduce the learning rate. As explained in the ``lr_scheduler_factor`` parameter, the learning rate is reduced by ``lr_scheduler_factor`` at these epochs. For example, if the value is set to "10, 20", then the learning rate is reduced by ``lr_scheduler_factor`` after 10th epoch and again by @@ -144,28 +150,29 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, machine and the weight updates are atomic. However, the order is not guaranteed. **kwargs: base class keyword argument values. """ - super(ImageClassification, self).__init__(role, train_instance_count, train_instance_type, **kwargs) + super(ImageClassification, self).__init__(role, train_instance_count, train_instance_type, + algorithm='image_classification', **kwargs) self.num_classes = num_classes self.num_training_samples = num_training_samples self.resize = resize self.lr_scheduler_step = lr_scheduler_step self.use_pretrained_model = use_pretrained_model self.checkpoint_frequency = checkpoint_frequency - self.num_layers = num_layers + self.num_layers = num_layers self.epochs = epochs self.learning_rate = learning_rate self.lr_schedule_factor = lr_schedule_factor - self.optimizetr = optimizer + self.optimizer = optimizer self.momentum = momentum self.weight_decay = weight_decay self.beta_1 = beta_1 self.beta_2 = beta_2 self.eps = eps self.gamma = gamma - self.mini_batch_size = mini_batch_size - self.image_shape = image_shape - self.augmentation_type = augmentation_type - self.top_k = top_k + self.mini_batch_size = mini_batch_size + self.image_shape = image_shape + self.augmentation_type = augmentation_type + self.top_k = top_k self.kv_store = kv_store def create_model(self): @@ -175,7 +182,7 @@ def create_model(self): def hyperparameters(self): """Return the SageMaker hyperparameters for training this ImageClassification Estimator""" - hp = dict(force_dense='True') # Not sure what this is. + hp = dict() hp.update(super(ImageClassification, self).hyperparameters()) return hp @@ -189,17 +196,20 @@ class ImageClassificationPredictor(RealTimePredictor): ``predict()`` returns """ def __init__(self, endpoint, sagemaker_session=None): - super(ImageClassifcationPredictor, self).__init__(endpoint, sagemaker_session, serializer=numpy_to_record_serializer(), - deserializer=record_deserializer(), content_type = 'application/x-image') + super(ImageClassificationPredictor, self).__init__(endpoint, sagemaker_session, + serializer=file_to_image_serializer(), + deserializer=response_deserializer(), + content_type='application/x-image') class ImageClassificationModel(Model): """Reference KMeans s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return - a Predictor to performs k-means cluster assignment.""" + a Predictor to performs classification assignment.""" def __init__(self, model_data, role, sagemaker_session=None): sagemaker_session = sagemaker_session or Session() - image = registry(sagemaker_session.boto_session.region_name, algorithm = 'image_classification') + \ - "/" + ImageClassification.repo - super(ImageClassificationModel, self).__init__(model_data, image, role, predictor_cls=ImageClassificationPredictor, - sagemaker_session=sagemaker_session) + image = registry(sagemaker_session.boto_session.region_name, algorithm='image_classification') + \ + "/" + ImageClassification.repo + super(ImageClassificationModel, self).__init__(model_data, image, role, + predictor_cls=ImageClassificationPredictor, + sagemaker_session=sagemaker_session) diff --git a/src/sagemaker/content_types.py b/src/sagemaker/content_types.py index 2ec9669c20..ff78a9fa62 100644 --- a/src/sagemaker/content_types.py +++ b/src/sagemaker/content_types.py @@ -13,3 +13,5 @@ CONTENT_TYPE_JSON = 'application/json' CONTENT_TYPE_CSV = 'text/csv' CONTENT_TYPE_OCTET_STREAM = 'application/octet-stream' +CONTENT_TYPE_IMAGES = 'application/x-image' +CONTENT_TYPE_RECORDIO = 'application/x-recordio' diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 2bfce13f59..ffbfd95339 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -245,7 +245,6 @@ def start_new(cls, estimator, inputs): Returns: sagemaker.estimator.Framework: Constructed object that captures all information about the started job. """ - input_config = _TrainingJob._format_inputs_to_input_config(inputs) role = estimator.sagemaker_session.expand_role(estimator.role) output_config = _TrainingJob._prepare_output_config(estimator.output_path, estimator.output_kms_key) diff --git a/tests/integ/test_image_classification.py b/tests/integ/test_image_classification.py index 750638268f..b31f62300c 100644 --- a/tests/integ/test_image_classification.py +++ b/tests/integ/test_image_classification.py @@ -36,35 +36,34 @@ def upload_to_s3(channel, file, bucket): key = channel + '/' + file s3.Bucket(bucket).put_object(Key=key, Body=data) + def test_image_classification(): - with timeout(minutes=15): + with timeout(minutes=45): sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) # caltech-256 download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec') upload_to_s3('train', 'caltech-256-60-train.rec', sagemaker_session.default_bucket()) download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec') - upload_to_s3('validation', 'caltech-256-60-val.rec', sagemaker_session.default_bucket()) + upload_to_s3('validation', 'caltech-256-60-val.rec', sagemaker_session.default_bucket()) ic = ImageClassification(role='SageMakerRole', train_instance_count=1, - train_instance_type='ml.p3.2xlarge', data_location = 's3://' + sagemaker_session.default_bucket(), - num_classes=257, num_training_samples=15420, epochs = 1, image_shape= '3,32,32', - sagemaker_session=sagemaker_session, base_job_name='test-ic') + train_instance_type='ml.p3.2xlarge', num_layers = 18, + num_classes=257, num_training_samples=15420, epochs=1, image_shape='3,32,32', + sagemaker_session=sagemaker_session, base_job_name='test-ic') ic.epochs = 1 - records = [] - records.append(ic.s3_record_set( 'training', channel = 'train')) - records.append(ic.s3_record_set( 'validation', channel = 'validation')) - ic.fit(records) - """ + data_location = 's3://' + sagemaker_session.default_bucket() + s3set = list() + s3set.append(ic.s3_record_set(data_location + '/validation/', channel='validation', + content_type='application/x-recordio')) + s3set.append(ic.s3_record_set(data_location + '/train/', channel='train', + content_type='application/x-recordio')) + ic.fit(s3set) + endpoint_name = name_from_base('ic') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): model = ImageClassificationModel(ic.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) - result = predictor.predict(train_set[0][:10]) + assert predictor is not None - assert len(result) == 10 - for record in result: - assert record.label["closest_cluster"] is not None - assert record.label["distance_to_cluster"] is not None - """ diff --git a/tests/unit/test_amazon_estimator.py b/tests/unit/test_amazon_estimator.py index 0dac3ce678..9915df6e81 100644 --- a/tests/unit/test_amazon_estimator.py +++ b/tests/unit/test_amazon_estimator.py @@ -68,6 +68,7 @@ def test_s3_init(sagemaker_session): ic = ImageClassification(epochs=12, num_classes=2, num_training_samples=2, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert ic.epochs == 12 + assert ic.num_classes == 2 def test_init_all_pca_hyperparameters(sagemaker_session): @@ -138,6 +139,10 @@ def test_ic_hyperparameters(sagemaker_session): def test_image(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.train_image() == registry('us-west-2') + '/pca:1' + ic = ImageClassification(data_location='s3://some-bucket/some-key/', + num_classes=257, num_training_samples=15420, epochs=1, + image_shape='3,32,32', sagemaker_session=sagemaker_session, + **COMMON_ARGS) @patch('time.strftime', return_value=TIMESTAMP) From 5b9eec0a3f98d84d71cfd86cfa324266660ab348 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Tue, 6 Feb 2018 11:46:33 -0800 Subject: [PATCH 09/19] removed duplicate doc --- src/sagemaker/amazon/amazon_estimator.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 07aa6a47af..113f53ea31 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -132,9 +132,6 @@ class AmazonS3AlgorithmEstimatorBase(EstimatorBase): intended to be instantiated directly. This is difference from the base class because this class handles S3 data""" - """Base class for Amazon first-party Estimator implementations. This class isn't intended - to be instantiated directly.""" - mini_batch_size = hp('mini_batch_size', (validation.isint, validation.gt(0))) def __init__(self, role, train_instance_count, train_instance_type, algorithm, **kwargs): @@ -318,4 +315,4 @@ def registry(region_name, algorithm=None): }[region_name] else: raise ValueError("Algorithm class:{} doesn't have mapping to account_id with images".format(algorithm)) - return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) \ No newline at end of file + return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) From ddd0e68e04701d656b906c1cdcf8e52ea0dee64e Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Thu, 15 Feb 2018 11:45:48 -0800 Subject: [PATCH 10/19] moving forward with the recent updates --- src/sagemaker/amazon/image_classification.py | 47 ++++++++++---------- tests/integ/test_image_classification.py | 10 ++--- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/src/sagemaker/amazon/image_classification.py b/src/sagemaker/amazon/image_classification.py index 8e44b24964..3791d7b4ef 100644 --- a/src/sagemaker/amazon/image_classification.py +++ b/src/sagemaker/amazon/image_classification.py @@ -12,7 +12,7 @@ # language governing permissions and limitations under the License. from sagemaker.amazon.amazon_estimator import AmazonS3AlgorithmEstimatorBase, registry from sagemaker.amazon.common import file_to_image_serializer, response_deserializer -from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa +from sagemaker.amazon.hyperparameter import Hyperparameter as hp from sagemaker.amazon.validation import gt, isin, isint, ge, isstr, le from sagemaker.predictor import RealTimePredictor from sagemaker.model import Model @@ -20,44 +20,43 @@ class ImageClassification(AmazonS3AlgorithmEstimatorBase): - - repo='image-classification:latest' + repo = 'image-classification:latest' num_classes = hp('num_classes', (gt(1), isint), 'num_classes should be an integer greater-than 1') - num_training_samples = hp('num_training_samples', (gt(1), isint), - 'num_training_samples should be an integer greater-than 1') - use_pretrained_model = hp('use_pretrained_model', (isin(0, 1), isint), - 'use_pretrained_model should be in the set, [0,1]') - checkpoint_frequency = hp('checkpoint_frequency', (ge(1), isint), - 'checkpoint_frequency should be an integer greater-than 1') - num_layers = hp('num_layers', (isin(18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110), isint), - 'num_layers should be in the set [18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110]' ) + num_training_samples = hp('num_training_samples', (gt(1)), + 'num_training_samples should be an integer greater-than 1', int) + use_pretrained_model = hp('use_pretrained_model', (isin(0, 1), ), + 'use_pretrained_model should be in the set, [0,1]', int) + checkpoint_frequency = hp('checkpoint_frequency', (ge(1), ), + 'checkpoint_frequency should be an integer greater-than 1', int) + num_layers = hp('num_layers', (isin(18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110), ), + 'num_layers should be in the set [18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110]', int) resize = hp('resize', (gt(1), isint), 'resize should be an integer greater-than 1') epochs = hp('epochs', (ge(1), isint), 'epochs should be an integer greater-than 1') - learning_rate = hp('learning_rate', (gt(0)), 'learning_rate shoudl be a floating point greater than 0' ) + learning_rate = hp('learning_rate', (gt(0)), 'learning_rate shoudl be a floating point greater than 0') lr_scheduler_factor = hp('lr_scheduler_factor', (gt(0)), - 'lr_schedule_factor should be a floating point greater than 0') - lr_scheduler_step = hp('lr_scheduler_step',(isstr), 'lr_scheduler_step should be a string input.') + 'lr_schedule_factor should be a floating point greater than 0') + lr_scheduler_step = hp('lr_scheduler_step', (isstr), 'lr_scheduler_step should be a string input.') optimizer = hp('optimizer', (isin('sgd', 'adam', 'rmsprop', 'nag')), 'Should be one optimizer among the list sgd, adam, rmsprop, or nag.') momentum = hp('momentum', (ge(0), le(1)), 'momentum is expected in the range 0, 1') weight_decay = hp('weight_decay', (ge(0), le(1)), 'weight_decay in range 0 , 1 ') - beta_1 = hp('beta_1', (ge(0), le(1)), 'beta_1 should be in range 0, 1') - beta_2 = hp('beta_2', (ge(0), le(1)), 'beta_2 should be in the range 0, 1') - eps = hp('eps', (gt(0), le(1)), 'eps should be in the range 0, 1') - gamma = hp('gamma', (ge(0), le(1)), 'gamma should be in the range 0, 1') - mini_batch_size = hp('mini_batch_size', (gt(0)), 'mini_batch_size should be an integer greater than 0') - image_shape = hp('image_shape', (isstr), 'image_shape is expected to be a string') - augmentation_type = hp('beta_1', (isin ('crop', 'crop_color', 'crop_color_transform')), + beta_1 = hp('beta_1', (ge(0), le(1)), 'beta_1 should be in range 0, 1') + beta_2 = hp('beta_2', (ge(0), le(1)), 'beta_2 should be in the range 0, 1') + eps = hp('eps', (gt(0), le(1)), 'eps should be in the range 0, 1') + gamma = hp('gamma', (ge(0), le(1)), 'gamma should be in the range 0, 1') + mini_batch_size = hp('mini_batch_size', (gt(0)), 'mini_batch_size should be an integer greater than 0') + image_shape = hp('image_shape', (isstr), 'image_shape is expected to be a string') + augmentation_type = hp('beta_1', (isin('crop', 'crop_color', 'crop_color_transform')), 'beta_1 must be from one option offered') top_k = hp('top_k', (ge(1), isint), 'top_k should be greater than or equal to 1') - kv_store=hp ('kv_store', (isin ('dist_sync', 'dist_async' )), 'Can be dist_sync or dist_async') + kv_store = hp('kv_store', (isin('dist_sync', 'dist_async')), 'Can be dist_sync or dist_async') def __init__(self, role, train_instance_count, train_instance_type, num_classes, num_training_samples, resize=None, - lr_scheduler_step=None, use_pretrained_model=0, checkpoint_frequency=1 , num_layers=18, + lr_scheduler_step=None, use_pretrained_model=0, checkpoint_frequency=1, num_layers=18, epochs=30, learning_rate=0.1, lr_schedule_factor=0.1, optimizer='sgd', momentum=0., weight_decay=0.0001, beta_1=0.9, - beta_2=0.999, eps=1e-8, gamma=0.9 , mini_batch_size=32 , image_shape='3,224,224', + beta_2=0.999, eps=1e-8, gamma=0.9, mini_batch_size=32, image_shape='3,224,224', augmentation_type=None, top_k=None, kv_store=None, **kwargs): """ An Image classification algorithm :class:`~sagemaker.amazon.AmazonAlgorithmEstimatorBase`. Learns a classifier model that diff --git a/tests/integ/test_image_classification.py b/tests/integ/test_image_classification.py index b31f62300c..d5d0ed1003 100644 --- a/tests/integ/test_image_classification.py +++ b/tests/integ/test_image_classification.py @@ -10,9 +10,6 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -import gzip -import pickle -import sys import boto3 import os @@ -24,12 +21,13 @@ from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name import urllib + def download(url): filename = url.split("/")[-1] if not os.path.exists(filename): urllib.request.urlretrieve(url, filename) - + def upload_to_s3(channel, file, bucket): s3 = boto3.resource('s3') data = open(file, "rb") @@ -38,7 +36,6 @@ def upload_to_s3(channel, file, bucket): def test_image_classification(): - with timeout(minutes=45): sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) @@ -48,7 +45,7 @@ def test_image_classification(): download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec') upload_to_s3('validation', 'caltech-256-60-val.rec', sagemaker_session.default_bucket()) ic = ImageClassification(role='SageMakerRole', train_instance_count=1, - train_instance_type='ml.p3.2xlarge', num_layers = 18, + train_instance_type='ml.p3.2xlarge', num_layers=18, num_classes=257, num_training_samples=15420, epochs=1, image_shape='3,32,32', sagemaker_session=sagemaker_session, base_job_name='test-ic') @@ -66,4 +63,3 @@ def test_image_classification(): model = ImageClassificationModel(ic.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) assert predictor is not None - From c61c7ef5fbf69d87c013710b49919497908d7744 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Thu, 15 Feb 2018 12:01:45 -0800 Subject: [PATCH 11/19] updating for sync --- src/sagemaker/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index d3c4e0aeba..b94151943e 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -35,6 +35,5 @@ LinearLearnerModel, LinearLearnerPredictor, LDA, LDAModel, LDAPredictor, FactorizationMachines, FactorizationMachinesModel, FactorizationMachinesPredictor, - Model, RealTimePredictor, Session, ImageClassification, ImageClassificationModel, ImageClassificationPredictor, container_def, s3_input, production_variant, get_execution_role] From 2825073f157f468e9c6863430ac1d2fb1ce63ebe Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Thu, 15 Feb 2018 12:02:30 -0800 Subject: [PATCH 12/19] Update __init__.py --- src/sagemaker/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index b94151943e..6b25b5307e 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -35,5 +35,4 @@ LinearLearnerModel, LinearLearnerPredictor, LDA, LDAModel, LDAPredictor, FactorizationMachines, FactorizationMachinesModel, FactorizationMachinesPredictor, - ImageClassification, ImageClassificationModel, ImageClassificationPredictor, container_def, s3_input, production_variant, get_execution_role] From 85564ef622d706fcd86b1220e76b798909abab6d Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Thu, 15 Feb 2018 12:22:43 -0800 Subject: [PATCH 13/19] style changes to code --- src/sagemaker/amazon/amazon_estimator.py | 10 +- src/sagemaker/amazon/image_classification.py | 115 +++++++++++-------- 2 files changed, 73 insertions(+), 52 deletions(-) diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 153aca3596..845c330ce3 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -217,8 +217,9 @@ def s3_record_set(self, s3_loc, content_type, channel="train"): return S3Set(s3_loc, content_type=content_type, channel=channel) -class S3Set (object): - def __init__(self, s3_location, content_type = None, s3_data_type='S3Prefix', distribution = 'FullyReplicated', channel='train'): +class S3Set(object): + def __init__(self, s3_location, content_type=None, s3_data_type='S3Prefix', distribution='FullyReplicated', + channel='train'): """A collection of Amazon :class:~`Record` objects serialized and stored in S3. Args: @@ -243,8 +244,7 @@ def __repr__(self): class RecordSet(object): - - def __init__(self, s3_data, num_records = None, feature_dim = None, s3_data_type='ManifestFile', channel='train'): + def __init__(self, s3_data, num_records=None, feature_dim=None, s3_data_type='ManifestFile', channel='train'): """A collection of Amazon :class:~`Record` objects serialized and stored in S3. Args: @@ -341,4 +341,4 @@ def registry(region_name, algorithm=None): }[region_name] else: raise ValueError("Algorithm class:{} doesn't have mapping to account_id with images".format(algorithm)) - return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) \ No newline at end of file + return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) diff --git a/src/sagemaker/amazon/image_classification.py b/src/sagemaker/amazon/image_classification.py index 3791d7b4ef..8c085d33e7 100644 --- a/src/sagemaker/amazon/image_classification.py +++ b/src/sagemaker/amazon/image_classification.py @@ -22,7 +22,7 @@ class ImageClassification(AmazonS3AlgorithmEstimatorBase): repo = 'image-classification:latest' - num_classes = hp('num_classes', (gt(1), isint), 'num_classes should be an integer greater-than 1') + num_classes = hp('num_classes', (gt(1)), 'num_classes should be an integer greater-than 1', int) num_training_samples = hp('num_training_samples', (gt(1)), 'num_training_samples should be an integer greater-than 1', int) use_pretrained_model = hp('use_pretrained_model', (isin(0, 1), ), @@ -31,12 +31,12 @@ class ImageClassification(AmazonS3AlgorithmEstimatorBase): 'checkpoint_frequency should be an integer greater-than 1', int) num_layers = hp('num_layers', (isin(18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110), ), 'num_layers should be in the set [18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110]', int) - resize = hp('resize', (gt(1), isint), 'resize should be an integer greater-than 1') - epochs = hp('epochs', (ge(1), isint), 'epochs should be an integer greater-than 1') + resize = hp('resize', (gt(1)), 'resize should be an integer greater-than 1', int) + epochs = hp('epochs', (ge(1)), 'epochs should be an integer greater-than 1', int) learning_rate = hp('learning_rate', (gt(0)), 'learning_rate shoudl be a floating point greater than 0') lr_scheduler_factor = hp('lr_scheduler_factor', (gt(0)), 'lr_schedule_factor should be a floating point greater than 0') - lr_scheduler_step = hp('lr_scheduler_step', (isstr), 'lr_scheduler_step should be a string input.') + lr_scheduler_step = hp('lr_scheduler_step', (), 'lr_scheduler_step should be a string input.', str) optimizer = hp('optimizer', (isin('sgd', 'adam', 'rmsprop', 'nag')), 'Should be one optimizer among the list sgd, adam, rmsprop, or nag.') momentum = hp('momentum', (ge(0), le(1)), 'momentum is expected in the range 0, 1') @@ -46,10 +46,10 @@ class ImageClassification(AmazonS3AlgorithmEstimatorBase): eps = hp('eps', (gt(0), le(1)), 'eps should be in the range 0, 1') gamma = hp('gamma', (ge(0), le(1)), 'gamma should be in the range 0, 1') mini_batch_size = hp('mini_batch_size', (gt(0)), 'mini_batch_size should be an integer greater than 0') - image_shape = hp('image_shape', (isstr), 'image_shape is expected to be a string') + image_shape = hp('image_shape', (), 'image_shape is expected to be a string', str) augmentation_type = hp('beta_1', (isin('crop', 'crop_color', 'crop_color_transform')), 'beta_1 must be from one option offered') - top_k = hp('top_k', (ge(1), isint), 'top_k should be greater than or equal to 1') + top_k = hp('top_k', (ge(1)), 'top_k should be greater than or equal to 1', int) kv_store = hp('kv_store', (isin('dist_sync', 'dist_async')), 'Can be dist_sync or dist_async') def __init__(self, role, train_instance_count, train_instance_type, num_classes, num_training_samples, resize=None, @@ -59,19 +59,21 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, beta_2=0.999, eps=1e-8, gamma=0.9, mini_batch_size=32, image_shape='3,224,224', augmentation_type=None, top_k=None, kv_store=None, **kwargs): """ - An Image classification algorithm :class:`~sagemaker.amazon.AmazonAlgorithmEstimatorBase`. Learns a classifier model that + An Image classification algorithm :class:`~sagemaker.amazon.AmazonAlgorithmEstimatorBase`. Learns a classifier + model that This Estimator may be fit via calls to :meth:`~sagemaker.amazon.amazon_estimator.AmazonS3AlgorithmEstimatorBase.fit` After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, - ``deploy`` returns a :class:`~sagemaker.amazon.kmeans.ImageClassificationPredictor` object that can be used to label - assignment, using the trained model hosted in the SageMaker Endpoint. + ``deploy`` returns a :class:`~sagemaker.amazon.kmeans.ImageClassificationPredictor` object that can be used to + label assignment, using the trained model hosted in the SageMaker Endpoint. ImageClassification Estimators can be configured by setting hyperparameters. The available hyperparameters for - ImageClassification are documented below. For further information on the AWS ImageClassification algorithm, please consult AWS technical - documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/IC-Hyperparameter.html + ImageClassification are documented below. For further information on the AWS ImageClassification algorithm, + please consult AWS technical documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/IC-Hyperparameter.html Args: role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and @@ -103,50 +105,69 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, resize (int): Resize the image before using it for training. The images are resized so that the shortest side is of this parameter. If the parameter is not set, then the training data is used as such without resizing. - Note: This option is available only for inputs specified as application/x-image content-type in - training and validation channels. + Note: This option is available only for inputs specified as application/x-image content-type + in training and validation channels. epochs (int): Number of training epochs. Default value: 30 learning_rate (float): Initial learning rate. Float. Range in [0, 1]. Default value: 0.1 - lr_scheduler_factor (flaot): The ratio to reduce learning rate used in conjunction with the `lr_scheduler_step` parameter, - defined as `lr_new=lr_old * lr_scheduler_factor`. Valid values: Float. Range in [0, 1]. Default value: 0.1 - lr_scheduler_step (str): The epochs at which to reduce the learning rate. As explained in the ``lr_scheduler_factor`` parameter, the - learning rate is reduced by ``lr_scheduler_factor`` at these epochs. For example, if the value is set - to "10, 20", then the learning rate is reduced by ``lr_scheduler_factor`` after 10th epoch and again by - ``lr_scheduler_factor`` after 20th epoch. The epochs are delimited by ",". - optimizer (str): The optimizer types. For more details of the parameters for the optimizers, please refer to MXNet's API. - Valid values: One of sgd, adam, rmsprop, or nag. Default value: `sgd`. - momentum (float): The momentum for sgd and nag, ignored for other optimizers. Valid values: Float. Range in [0, 1]. Default value: 0 - weight_decay (float): The coefficient weight decay for sgd and nag, ignored for other optimizers. Range in [0, 1]. Default value: 0.0001 - beta_1 (float): The beta1 for adam, in other words, exponential decay rate for the first moment estimates. Range in [0, 1]. Default value: 0.9 - beta_2 (float): The beta2 for adam, in other words, exponential decay rate for the second moment estimates. Range in [0, 1]. Default value: 0.999 - eps (float): The epsilon for adam and rmsprop. It is usually set to a small value to avoid division by 0. Range in [0, 1]. Default value: 1e-8 - gamma (float): The gamma for rmsprop. A decay factor of moving average of the squared gradient. Range in [0, 1]. Default value: 0.9 - mini_batch_size (int): The batch size for training. In a single-machine multi-GPU setting, each GPU handles mini_batch_size/num_gpu - training samples. For the multi-machine training in dist_sync mode, the actual batch size is mini_batch_size*number - of machines. See MXNet docs for more details. Default value: 32 + lr_scheduler_factor (flaot): The ratio to reduce learning rate used in conjunction with the + `lr_scheduler_step` parameter, defined as `lr_new=lr_old * lr_scheduler_factor`. + Valid values: Float. Range in [0, 1]. Default value: 0.1 + lr_scheduler_step (str): The epochs at which to reduce the learning rate. As explained in the + ``lr_scheduler_factor`` parameter, the learning rate is reduced by + ``lr_scheduler_factor`` at these epochs. For example, if the value is set + to "10, 20", then the learning rate is reduced by ``lr_scheduler_factor`` after 10th + epoch and again by ``lr_scheduler_factor`` after 20th epoch. The epochs are delimited + by ",". + optimizer (str): The optimizer types. For more details of the parameters for the optimizers, please refer to + MXNet's API. Valid values: One of sgd, adam, rmsprop, or nag. Default value: `sgd`. + momentum (float): The momentum for sgd and nag, ignored for other optimizers. Valid values: Float. Range in + [0, 1]. Default value: 0 + weight_decay (float): The coefficient weight decay for sgd and nag, ignored for other optimizers. + Range in [0, 1]. Default value: 0.0001 + beta_1 (float): The beta1 for adam, in other words, exponential decay rate for the first moment estimates. + Range in [0, 1]. Default value: 0.9 + beta_2 (float): The beta2 for adam, in other words, exponential decay rate for the second moment estimates. + Range in [0, 1]. Default value: 0.999 + eps (float): The epsilon for adam and rmsprop. It is usually set to a small value to avoid division by 0. + Range in [0, 1]. Default value: 1e-8 + gamma (float): The gamma for rmsprop. A decay factor of moving average of the squared gradient. + Range in [0, 1]. Default value: 0.9 + mini_batch_size (int): The batch size for training. In a single-machine multi-GPU setting, each GPU handles + mini_batch_size/num_gpu training samples. For the multi-machine training in + dist_sync mode, the actual batch size is mini_batch_size*number of machines. + See MXNet docs for more details. Default value: 32 image_shape (str): The input image dimensions, which is the same size as the input layer of the network. - The format is defined as 'num_channels, height, width'. The image dimension can take on any value as the - network can handle varied dimensions of the input. However, there may be memory constraints if a larger image - dimension is used. Typical image dimensions for image classification are '3, 224, 224'. This is similar to the ImageNet dataset. + The format is defined as 'num_channels, height, width'. The image dimension can take on + any value as the network can handle varied dimensions of the input. However, there may + be memory constraints if a larger image dimension is used. Typical image dimensions for + image classification are '3, 224, 224'. This is similar to the ImageNet dataset. Default value: ‘3, 224, 224’ - augmentation_type: (str): Data augmentation type. The input images can be augmented in multiple ways as specified below. + augmentation_type: (str): Data augmentation type. The input images can be augmented in multiple ways as + specified below. 'crop' - Randomly crop the image and flip the image horizontally - 'crop_color' - In addition to ‘crop’, three random values in the range [-36, 36], [-50, 50], and [-50, 50] - are added to the corresponding Hue-Saturation-Lightness channels respectively - 'crop_color_transform': In addition to crop_color, random transformations, including rotation, - shear, and aspect ratio variations are applied to the image. The maximum angle of rotation - is 10 degrees, the maximum shear ratio is 0.1, and the maximum aspect changing ratio is 0.25. + 'crop_color' - In addition to ‘crop’, three random values in the range [-36, 36], + [-50, 50], and [-50, 50] + are added to the corresponding Hue-Saturation-Lightness channels resptly. + 'crop_color_transform': In addition to crop_color, random transformations, including + rotation, shear, and aspect ratio variations are applied to the image. + The maximum angle of rotation is 10 degrees, the maximum shear ratio is 0.1, + and the maximum aspect changing ratio is 0.25. top_k (int): Report the top-k accuracy during training. This parameter has to be greater than 1, - since the top-1 training accuracy is the same as the regular training accuracy that has already been reported. - kv_store (str): Weight update synchronization mode during distributed training. The weight updates can be updated either synchronously - or asynchronously across machines. Synchronous updates typically provide better accuracy than asynchronous - updates but can be slower. See distributed training in MXNet for more details. This parameter is not applicable + since the top-1 training accuracy is the same as the regular training accuracy that has + already been reported. + kv_store (str): Weight update synchronization mode during distributed training. The weight updates can be + updated either synchronously or asynchronously across machines. Synchronous updates + typically provide better accuracy than asynchronous updates but can be slower. + See distributed training in MXNet for more details. This parameter is not applicable to single machine training. - 'dist_sync' - The gradients are synchronized after every batch with all the workers. With dist_sync, - batch-size now means the batch size used on each machine. So if there are n machines and we use + 'dist_sync' - The gradients are synchronized after every batch with all the workers. + With dist_sync, + batch-size now means the batch size used on each machine. So if there are n + machines and we use batch size b, then dist_sync behaves like local with batch size n*b - 'dist_async'- Performs asynchronous updates. The weights are updated whenever gradients are received from any - machine and the weight updates are atomic. However, the order is not guaranteed. + 'dist_async'- Performs asynchronous updates. The weights are updated whenever gradients + are received from any machine and the weight updates are atomic. However, the + order is not guaranteed. **kwargs: base class keyword argument values. """ super(ImageClassification, self).__init__(role, train_instance_count, train_instance_type, From c5ead9a315f3a8bf227024cc77f404bff68f4b35 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Thu, 15 Feb 2018 12:26:51 -0800 Subject: [PATCH 14/19] merge conflicts --- src/sagemaker/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index 6b25b5307e..9e34472188 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -35,4 +35,5 @@ LinearLearnerModel, LinearLearnerPredictor, LDA, LDAModel, LDAPredictor, FactorizationMachines, FactorizationMachinesModel, FactorizationMachinesPredictor, + ImageClassification, ImageClassificationModel, ImageClassificationPredictor container_def, s3_input, production_variant, get_execution_role] From 8e305fa6bdc59ceca3209376ff674d70dd4c6ddc Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Thu, 15 Feb 2018 13:29:19 -0800 Subject: [PATCH 15/19] unit tests fixed --- src/sagemaker/__init__.py | 2 +- src/sagemaker/amazon/amazon_estimator.py | 2 +- src/sagemaker/amazon/image_classification.py | 28 ++++++++++---------- src/sagemaker/amazon/validation.py | 7 +---- tests/unit/test_amazon_estimator.py | 10 +++---- 5 files changed, 21 insertions(+), 28 deletions(-) diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index 9e34472188..b94151943e 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -35,5 +35,5 @@ LinearLearnerModel, LinearLearnerPredictor, LDA, LDAModel, LDAPredictor, FactorizationMachines, FactorizationMachinesModel, FactorizationMachinesPredictor, - ImageClassification, ImageClassificationModel, ImageClassificationPredictor + ImageClassification, ImageClassificationModel, ImageClassificationPredictor, container_def, s3_input, production_variant, get_execution_role] diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 845c330ce3..71cd7cf2f1 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -158,7 +158,7 @@ class AmazonS3AlgorithmEstimatorBase(EstimatorBase): intended to be instantiated directly. This is difference from the base class because this class handles S3 data""" - mini_batch_size = hp('mini_batch_size', (validation.isint, validation.gt(0))) + mini_batch_size = hp('mini_batch_size', (validation, validation.gt(0))) def __init__(self, role, train_instance_count, train_instance_type, algorithm, **kwargs): """Initialize an AmazonAlgorithmEstimatorBase. diff --git a/src/sagemaker/amazon/image_classification.py b/src/sagemaker/amazon/image_classification.py index 8c085d33e7..5a2e08710b 100644 --- a/src/sagemaker/amazon/image_classification.py +++ b/src/sagemaker/amazon/image_classification.py @@ -13,7 +13,7 @@ from sagemaker.amazon.amazon_estimator import AmazonS3AlgorithmEstimatorBase, registry from sagemaker.amazon.common import file_to_image_serializer, response_deserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp -from sagemaker.amazon.validation import gt, isin, isint, ge, isstr, le +from sagemaker.amazon.validation import gt, isin, ge, le from sagemaker.predictor import RealTimePredictor from sagemaker.model import Model from sagemaker.session import Session @@ -33,24 +33,24 @@ class ImageClassification(AmazonS3AlgorithmEstimatorBase): 'num_layers should be in the set [18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110]', int) resize = hp('resize', (gt(1)), 'resize should be an integer greater-than 1', int) epochs = hp('epochs', (ge(1)), 'epochs should be an integer greater-than 1', int) - learning_rate = hp('learning_rate', (gt(0)), 'learning_rate shoudl be a floating point greater than 0') + learning_rate = hp('learning_rate', (gt(0)), 'learning_rate should be a floating point greater than 0', float) lr_scheduler_factor = hp('lr_scheduler_factor', (gt(0)), - 'lr_schedule_factor should be a floating point greater than 0') + 'lr_schedule_factor should be a floating point greater than 0', float) lr_scheduler_step = hp('lr_scheduler_step', (), 'lr_scheduler_step should be a string input.', str) optimizer = hp('optimizer', (isin('sgd', 'adam', 'rmsprop', 'nag')), - 'Should be one optimizer among the list sgd, adam, rmsprop, or nag.') - momentum = hp('momentum', (ge(0), le(1)), 'momentum is expected in the range 0, 1') - weight_decay = hp('weight_decay', (ge(0), le(1)), 'weight_decay in range 0 , 1 ') - beta_1 = hp('beta_1', (ge(0), le(1)), 'beta_1 should be in range 0, 1') - beta_2 = hp('beta_2', (ge(0), le(1)), 'beta_2 should be in the range 0, 1') - eps = hp('eps', (gt(0), le(1)), 'eps should be in the range 0, 1') - gamma = hp('gamma', (ge(0), le(1)), 'gamma should be in the range 0, 1') - mini_batch_size = hp('mini_batch_size', (gt(0)), 'mini_batch_size should be an integer greater than 0') + 'Should be one optimizer among the list sgd, adam, rmsprop, or nag.', str) + momentum = hp('momentum', (ge(0), le(1)), 'momentum is expected in the range 0, 1', float) + weight_decay = hp('weight_decay', (ge(0), le(1)), 'weight_decay in range 0 , 1 ', float) + beta_1 = hp('beta_1', (ge(0), le(1)), 'beta_1 should be in range 0, 1', float) + beta_2 = hp('beta_2', (ge(0), le(1)), 'beta_2 should be in the range 0, 1', float) + eps = hp('eps', (gt(0), le(1)), 'eps should be in the range 0, 1', float) + gamma = hp('gamma', (ge(0), le(1)), 'gamma should be in the range 0, 1', float ) + mini_batch_size = hp('mini_batch_size', (gt(0)), 'mini_batch_size should be an integer greater than 0', int) image_shape = hp('image_shape', (), 'image_shape is expected to be a string', str) - augmentation_type = hp('beta_1', (isin('crop', 'crop_color', 'crop_color_transform')), - 'beta_1 must be from one option offered') + augmentation_type = hp('augmentation_type', (isin('crop', 'crop_color', 'crop_color_transform')), + 'augmentation type must be from one option offered', str) top_k = hp('top_k', (ge(1)), 'top_k should be greater than or equal to 1', int) - kv_store = hp('kv_store', (isin('dist_sync', 'dist_async')), 'Can be dist_sync or dist_async') + kv_store = hp('kv_store', (isin('dist_sync', 'dist_async')), 'Can be dist_sync or dist_async', str) def __init__(self, role, train_instance_count, train_instance_type, num_classes, num_training_samples, resize=None, lr_scheduler_step=None, use_pretrained_model=0, checkpoint_frequency=1, num_layers=18, diff --git a/src/sagemaker/amazon/validation.py b/src/sagemaker/amazon/validation.py index 4bb5ee6bba..3c4c484fa6 100644 --- a/src/sagemaker/amazon/validation.py +++ b/src/sagemaker/amazon/validation.py @@ -43,9 +43,4 @@ def validate(value): def istype(expected): def validate(value): return isinstance(value, expected) - return validate - -isint = istype(int) -isbool = istype(bool) -isstr = istype(str) -isnumber = istype(numbers.Number) # noqa \ No newline at end of file + return validate \ No newline at end of file diff --git a/tests/unit/test_amazon_estimator.py b/tests/unit/test_amazon_estimator.py index 9915df6e81..11ad3e9996 100644 --- a/tests/unit/test_amazon_estimator.py +++ b/tests/unit/test_amazon_estimator.py @@ -81,7 +81,7 @@ def test_init_all_pca_hyperparameters(sagemaker_session): def test_init_all_ic_hyperparameters(sagemaker_session): - ic = ImageClassification(data_location='s3://some-bucket/some-key/', + ic = ImageClassification( num_classes=257, num_training_samples=15420, epochs=1, image_shape='3,32,32', sagemaker_session=sagemaker_session, **COMMON_ARGS) @@ -101,14 +101,13 @@ def test_init_estimator_args(sagemaker_session): def test_init_s3estimator_args(sagemaker_session): - ic = ImageClassification(data_location='s3://some-bucket/some-key/', + ic = ImageClassification( num_classes=257, num_training_samples=15420, epochs=1, image_shape='3,32,32', sagemaker_session=sagemaker_session, **COMMON_ARGS) assert ic.train_instance_type == COMMON_ARGS['train_instance_type'] assert ic.train_instance_count == COMMON_ARGS['train_instance_count'] assert ic.role == COMMON_ARGS['role'] - assert ic.data_location == 's3://some-bucket/some-key/' def test_data_location_validation(sagemaker_session): @@ -127,9 +126,8 @@ def test_pca_hyperparameters(sagemaker_session): subtract_mean='True', algorithm_mode='randomized') - def test_ic_hyperparameters(sagemaker_session): - ic = ImageClassification(data_location='s3://some-bucket/some-key/', + ic = ImageClassification( num_classes=257, num_training_samples=15420, epochs=1, image_shape='3,32,32', sagemaker_session=sagemaker_session, **COMMON_ARGS) @@ -139,7 +137,7 @@ def test_ic_hyperparameters(sagemaker_session): def test_image(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.train_image() == registry('us-west-2') + '/pca:1' - ic = ImageClassification(data_location='s3://some-bucket/some-key/', + ic = ImageClassification( num_classes=257, num_training_samples=15420, epochs=1, image_shape='3,32,32', sagemaker_session=sagemaker_session, **COMMON_ARGS) From 9c9469f5e2d0c96043c50f304221b0ed0b58b113 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Thu, 15 Feb 2018 14:30:15 -0800 Subject: [PATCH 16/19] flake errors fixed --- src/sagemaker/amazon/common.py | 11 +++----- src/sagemaker/amazon/image_classification.py | 28 ++++++++++---------- src/sagemaker/amazon/validation.py | 12 +++++---- tests/integ/test_image_classification.py | 2 +- tests/unit/test_amazon_estimator.py | 26 +++++++++--------- 5 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/sagemaker/amazon/common.py b/src/sagemaker/amazon/common.py index 4b3f511cfb..3f1d1b3f49 100644 --- a/src/sagemaker/amazon/common.py +++ b/src/sagemaker/amazon/common.py @@ -21,7 +21,6 @@ class numpy_to_record_serializer(object): - def __init__(self, content_type='application/x-recordio-protobuf'): self.content_type = content_type @@ -36,7 +35,6 @@ def __call__(self, array): class file_to_image_serializer(object): - def __init__(self, content_type='application/x-image'): self.content_type = content_type @@ -46,8 +44,8 @@ def __call__(self, file): payload = bytearray(payload) return payload -class record_deserializer(object): +class record_deserializer(object): def __init__(self, accept='application/x-recordio-protobuf'): self.accept = accept @@ -59,7 +57,6 @@ def __call__(self, stream, content_type): class response_deserializer(object): - def __init__(self, accept='application/json'): self.accept = accept @@ -114,7 +111,7 @@ def write_numpy_to_dense_tensor(file, array, labels=None): raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError("Label shape {} not compatible with array shape {}".format( - labels.shape, array.shape)) + labels.shape, array.shape)) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) @@ -142,7 +139,7 @@ def write_spmatrix_to_sparse_tensor(file, array, labels=None): raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError("Label shape {} not compatible with array shape {}".format( - labels.shape, array.shape)) + labels.shape, array.shape)) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) @@ -202,7 +199,7 @@ def _write_recordio(f, data): def _read_recordio(f): - while(True): + while (True): try: read_kmagic, = struct.unpack('I', f.read(4)) except struct.error: diff --git a/src/sagemaker/amazon/image_classification.py b/src/sagemaker/amazon/image_classification.py index 5a2e08710b..311edb96f1 100644 --- a/src/sagemaker/amazon/image_classification.py +++ b/src/sagemaker/amazon/image_classification.py @@ -25,11 +25,11 @@ class ImageClassification(AmazonS3AlgorithmEstimatorBase): num_classes = hp('num_classes', (gt(1)), 'num_classes should be an integer greater-than 1', int) num_training_samples = hp('num_training_samples', (gt(1)), 'num_training_samples should be an integer greater-than 1', int) - use_pretrained_model = hp('use_pretrained_model', (isin(0, 1), ), + use_pretrained_model = hp('use_pretrained_model', (isin(0, 1),), 'use_pretrained_model should be in the set, [0,1]', int) - checkpoint_frequency = hp('checkpoint_frequency', (ge(1), ), + checkpoint_frequency = hp('checkpoint_frequency', (ge(1),), 'checkpoint_frequency should be an integer greater-than 1', int) - num_layers = hp('num_layers', (isin(18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110), ), + num_layers = hp('num_layers', (isin(18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110),), 'num_layers should be in the set [18, 34, 50, 101, 152, 200, 20, 32, 44, 56, 110]', int) resize = hp('resize', (gt(1)), 'resize should be an integer greater-than 1', int) epochs = hp('epochs', (ge(1)), 'epochs should be an integer greater-than 1', int) @@ -44,7 +44,7 @@ class ImageClassification(AmazonS3AlgorithmEstimatorBase): beta_1 = hp('beta_1', (ge(0), le(1)), 'beta_1 should be in range 0, 1', float) beta_2 = hp('beta_2', (ge(0), le(1)), 'beta_2 should be in the range 0, 1', float) eps = hp('eps', (gt(0), le(1)), 'eps should be in the range 0, 1', float) - gamma = hp('gamma', (ge(0), le(1)), 'gamma should be in the range 0, 1', float ) + gamma = hp('gamma', (ge(0), le(1)), 'gamma should be in the range 0, 1', float) mini_batch_size = hp('mini_batch_size', (gt(0)), 'mini_batch_size should be an integer greater than 0', int) image_shape = hp('image_shape', (), 'image_shape is expected to be a string', str) augmentation_type = hp('augmentation_type', (isin('crop', 'crop_color', 'crop_color_transform')), @@ -85,13 +85,13 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. num_classes (int): Number of output classes. This parameter defines the dimensions of the network output and is typically set to the number of classes in the dataset. - num_training_samples (int): Number of training examples in the input dataset. If there is a - mismatch between this value and the number of samples in the training - set, then the behavior of the lr_scheduler_step parameter is undefined + num_training_samples (int): Number of training examples in the input dataset. If there is a + mismatch between this value and the number of samples in the training + set, then the behavior of the lr_scheduler_step parameter is undefined and distributed training accuracy might be affected. - use_pretrained_model (int): Flag to indicate whether to use pre-trained model for training. - If set to `1`, then the pretrained model with the corresponding number - of layers is loaded and used for training. Only the top FC layer are + use_pretrained_model (int): Flag to indicate whether to use pre-trained model for training. + If set to `1`, then the pretrained model with the corresponding number + of layers is loaded and used for training. Only the top FC layer are reinitialized with random weights. Otherwise, the network is trained from scratch. Default value: 0 checkpoint_frequency (int): Period to store model parameters (in number of epochs). Default value: 1 @@ -136,7 +136,7 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, mini_batch_size/num_gpu training samples. For the multi-machine training in dist_sync mode, the actual batch size is mini_batch_size*number of machines. See MXNet docs for more details. Default value: 32 - image_shape (str): The input image dimensions, which is the same size as the input layer of the network. + image_shape (str): The input image dimensions, which is the same size as the input layer of the network. \ The format is defined as 'num_channels, height, width'. The image dimension can take on any value as the network can handle varied dimensions of the input. However, there may be memory constraints if a larger image dimension is used. Typical image dimensions for @@ -152,7 +152,7 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, rotation, shear, and aspect ratio variations are applied to the image. The maximum angle of rotation is 10 degrees, the maximum shear ratio is 0.1, and the maximum aspect changing ratio is 0.25. - top_k (int): Report the top-k accuracy during training. This parameter has to be greater than 1, + top_k (int): Report the top-k accuracy during training. This parameter has to be greater than 1, since the top-1 training accuracy is the same as the regular training accuracy that has already been reported. kv_store (str): Weight update synchronization mode during distributed training. The weight updates can be @@ -228,8 +228,8 @@ class ImageClassificationModel(Model): def __init__(self, model_data, role, sagemaker_session=None): sagemaker_session = sagemaker_session or Session() - image = registry(sagemaker_session.boto_session.region_name, algorithm='image_classification') + \ - "/" + ImageClassification.repo + image = registry(sagemaker_session.boto_session.region_name, + 'image_classification') + "/" + ImageClassification.repo super(ImageClassificationModel, self).__init__(model_data, image, role, predictor_cls=ImageClassificationPredictor, sagemaker_session=sagemaker_session) diff --git a/src/sagemaker/amazon/validation.py b/src/sagemaker/amazon/validation.py index def9efc65f..ed9c722291 100644 --- a/src/sagemaker/amazon/validation.py +++ b/src/sagemaker/amazon/validation.py @@ -15,38 +15,40 @@ def gt(minimum): def validate(value): return value > minimum + return validate def ge(minimum): def validate(value): return value >= minimum + return validate def lt(maximum): def validate(value): return value < maximum - return validate -def le(maximum): - def validate(value): - return value <= maximum return validate + def le(maximum): def validate(value): return value <= maximum + return validate def isin(*expected): def validate(value): return value in expected + return validate def istype(expected): def validate(value): return isinstance(value, expected) - return validate \ No newline at end of file + + return validate diff --git a/tests/integ/test_image_classification.py b/tests/integ/test_image_classification.py index d5d0ed1003..8eb2c2f72b 100644 --- a/tests/integ/test_image_classification.py +++ b/tests/integ/test_image_classification.py @@ -17,7 +17,7 @@ import sagemaker from sagemaker import ImageClassification, ImageClassificationModel from sagemaker.utils import name_from_base -from tests.integ import DATA_DIR, REGION +from tests.integ import REGION from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name import urllib diff --git a/tests/unit/test_amazon_estimator.py b/tests/unit/test_amazon_estimator.py index 11ad3e9996..c38e01ac74 100644 --- a/tests/unit/test_amazon_estimator.py +++ b/tests/unit/test_amazon_estimator.py @@ -82,9 +82,9 @@ def test_init_all_pca_hyperparameters(sagemaker_session): def test_init_all_ic_hyperparameters(sagemaker_session): ic = ImageClassification( - num_classes=257, num_training_samples=15420, epochs=1, - image_shape='3,32,32', sagemaker_session=sagemaker_session, - **COMMON_ARGS) + num_classes=257, num_training_samples=15420, epochs=1, + image_shape='3,32,32', sagemaker_session=sagemaker_session, + **COMMON_ARGS) assert ic.num_classes == 257 assert ic.num_training_samples == 15420 assert ic.image_shape == '3,32,32' @@ -102,9 +102,9 @@ def test_init_estimator_args(sagemaker_session): def test_init_s3estimator_args(sagemaker_session): ic = ImageClassification( - num_classes=257, num_training_samples=15420, epochs=1, - image_shape='3,32,32', sagemaker_session=sagemaker_session, - **COMMON_ARGS) + num_classes=257, num_training_samples=15420, epochs=1, + image_shape='3,32,32', sagemaker_session=sagemaker_session, + **COMMON_ARGS) assert ic.train_instance_type == COMMON_ARGS['train_instance_type'] assert ic.train_instance_count == COMMON_ARGS['train_instance_count'] assert ic.role == COMMON_ARGS['role'] @@ -126,11 +126,12 @@ def test_pca_hyperparameters(sagemaker_session): subtract_mean='True', algorithm_mode='randomized') + def test_ic_hyperparameters(sagemaker_session): ic = ImageClassification( - num_classes=257, num_training_samples=15420, epochs=1, - image_shape='3,32,32', sagemaker_session=sagemaker_session, - **COMMON_ARGS) + num_classes=257, num_training_samples=15420, epochs=1, + image_shape='3,32,32', sagemaker_session=sagemaker_session, + **COMMON_ARGS) assert isinstance(ic.hyperparameters(), dict) @@ -138,9 +139,10 @@ def test_image(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.train_image() == registry('us-west-2') + '/pca:1' ic = ImageClassification( - num_classes=257, num_training_samples=15420, epochs=1, - image_shape='3,32,32', sagemaker_session=sagemaker_session, - **COMMON_ARGS) + num_classes=257, num_training_samples=15420, epochs=1, + image_shape='3,32,32', sagemaker_session=sagemaker_session, + **COMMON_ARGS) + assert ic.train_image() == registry('us-west-2', 'image_classification') + '/image-classification:latest' @patch('time.strftime', return_value=TIMESTAMP) From 8a4f3eaf88b604a73bc5c69da7b577bf0ca44092 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Fri, 16 Feb 2018 12:38:08 -0800 Subject: [PATCH 17/19] integ tests environment fix --- tests/integ/test_image_classification.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/integ/test_image_classification.py b/tests/integ/test_image_classification.py index 8eb2c2f72b..8550bccfd3 100644 --- a/tests/integ/test_image_classification.py +++ b/tests/integ/test_image_classification.py @@ -19,13 +19,12 @@ from sagemaker.utils import name_from_base from tests.integ import REGION from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name -import urllib - +from six.moves.urllib.request import urlretrieve def download(url): filename = url.split("/")[-1] if not os.path.exists(filename): - urllib.request.urlretrieve(url, filename) + urlretrieve(url, filename) def upload_to_s3(channel, file, bucket): From 8557394cba7b0551bfb166b65d492f80201f8cab Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Fri, 16 Feb 2018 12:43:16 -0800 Subject: [PATCH 18/19] flake fails fixed --- tests/integ/test_image_classification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integ/test_image_classification.py b/tests/integ/test_image_classification.py index 8550bccfd3..289eef0bda 100644 --- a/tests/integ/test_image_classification.py +++ b/tests/integ/test_image_classification.py @@ -21,6 +21,7 @@ from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name from six.moves.urllib.request import urlretrieve + def download(url): filename = url.split("/")[-1] if not os.path.exists(filename): From 5754cba47b293f7c153813241289ecc4bacf43f4 Mon Sep 17 00:00:00 2001 From: Ragav Venkatesan Date: Thu, 1 Mar 2018 13:47:00 -0800 Subject: [PATCH 19/19] answered all the review suggestions --- src/sagemaker/amazon/amazon_estimator.py | 2 +- src/sagemaker/amazon/common.py | 4 +- src/sagemaker/amazon/image_classification.py | 48 +++++++++----------- tests/integ/test_image_classification.py | 2 +- 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 43b2fb0843..e00138ea80 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -164,7 +164,7 @@ def __init__(self, role, train_instance_count, train_instance_type, algorithm, * """Initialize an AmazonAlgorithmEstimatorBase. Args: - algortihm (str): Use one of the supported algorithms + algorithm (str): Use one of the supported algorithms """ super(AmazonS3AlgorithmEstimatorBase, self).__init__(role, train_instance_count, train_instance_type, **kwargs) diff --git a/src/sagemaker/amazon/common.py b/src/sagemaker/amazon/common.py index 3f1d1b3f49..d9e35ee4ff 100644 --- a/src/sagemaker/amazon/common.py +++ b/src/sagemaker/amazon/common.py @@ -11,12 +11,12 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import io +import json import struct import sys import numpy as np from scipy.sparse import issparse -import json from sagemaker.amazon.record_pb2 import Record @@ -199,7 +199,7 @@ def _write_recordio(f, data): def _read_recordio(f): - while (True): + while(True): try: read_kmagic, = struct.unpack('I', f.read(4)) except struct.error: diff --git a/src/sagemaker/amazon/image_classification.py b/src/sagemaker/amazon/image_classification.py index 311edb96f1..0535bba11c 100644 --- a/src/sagemaker/amazon/image_classification.py +++ b/src/sagemaker/amazon/image_classification.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -12,10 +12,10 @@ # language governing permissions and limitations under the License. from sagemaker.amazon.amazon_estimator import AmazonS3AlgorithmEstimatorBase, registry from sagemaker.amazon.common import file_to_image_serializer, response_deserializer -from sagemaker.amazon.hyperparameter import Hyperparameter as hp from sagemaker.amazon.validation import gt, isin, ge, le -from sagemaker.predictor import RealTimePredictor +from sagemaker.amazon.hyperparameter import Hyperparameter as hp from sagemaker.model import Model +from sagemaker.predictor import RealTimePredictor from sagemaker.session import Session @@ -59,8 +59,7 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, beta_2=0.999, eps=1e-8, gamma=0.9, mini_batch_size=32, image_shape='3,224,224', augmentation_type=None, top_k=None, kv_store=None, **kwargs): """ - An Image classification algorithm :class:`~sagemaker.amazon.AmazonAlgorithmEstimatorBase`. Learns a classifier - model that + An Image classification algorithm :class:`~sagemaker.amazon.AmazonAlgorithmEstimatorBase`. This Estimator may be fit via calls to :meth:`~sagemaker.amazon.amazon_estimator.AmazonS3AlgorithmEstimatorBase.fit` @@ -80,7 +79,6 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if accessing AWS resource. - For more information, see ???. train_instance_count (int): Number of Amazon EC2 instances to use for training. train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. num_classes (int): Number of output classes. This parameter defines the dimensions of the network output @@ -132,7 +130,7 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, Range in [0, 1]. Default value: 1e-8 gamma (float): The gamma for rmsprop. A decay factor of moving average of the squared gradient. Range in [0, 1]. Default value: 0.9 - mini_batch_size (int): The batch size for training. In a single-machine multi-GPU setting, each GPU handles + mini_batch_size (int): The batch size for training. In a single-machine multi-GPU setting, each GPU handles mini_batch_size/num_gpu training samples. For the multi-machine training in dist_sync mode, the actual batch size is mini_batch_size*number of machines. See MXNet docs for more details. Default value: 32 @@ -142,7 +140,7 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, be memory constraints if a larger image dimension is used. Typical image dimensions for image classification are '3, 224, 224'. This is similar to the ImageNet dataset. Default value: ‘3, 224, 224’ - augmentation_type: (str): Data augmentation type. The input images can be augmented in multiple ways as + augmentation_type (str): Data augmentation type. The input images can be augmented in multiple ways as specified below. 'crop' - Randomly crop the image and flip the image horizontally 'crop_color' - In addition to ‘crop’, three random values in the range [-36, 36], @@ -151,23 +149,23 @@ def __init__(self, role, train_instance_count, train_instance_type, num_classes, 'crop_color_transform': In addition to crop_color, random transformations, including rotation, shear, and aspect ratio variations are applied to the image. The maximum angle of rotation is 10 degrees, the maximum shear ratio is 0.1, - and the maximum aspect changing ratio is 0.25. + and the maximum aspect changing ratio is 0.25. top_k (int): Report the top-k accuracy during training. This parameter has to be greater than 1, since the top-1 training accuracy is the same as the regular training accuracy that has already been reported. kv_store (str): Weight update synchronization mode during distributed training. The weight updates can be - updated either synchronously or asynchronously across machines. Synchronous updates - typically provide better accuracy than asynchronous updates but can be slower. - See distributed training in MXNet for more details. This parameter is not applicable - to single machine training. - 'dist_sync' - The gradients are synchronized after every batch with all the workers. - With dist_sync, - batch-size now means the batch size used on each machine. So if there are n - machines and we use - batch size b, then dist_sync behaves like local with batch size n*b - 'dist_async'- Performs asynchronous updates. The weights are updated whenever gradients - are received from any machine and the weight updates are atomic. However, the - order is not guaranteed. + updated either synchronously or asynchronously across machines. Synchronous updates + typically provide better accuracy than asynchronous updates but can be slower. + See distributed training in MXNet for more details. This parameter is not applicable + to single machine training. + 'dist_sync' - The gradients are synchronized after every batch with all the workers. + With dist_sync, + batch-size now means the batch size used on each machine. So if there are n + machines and we use + batch size b, then dist_sync behaves like local with batch size n*b + 'dist_async'- Performs asynchronous updates. The weights are updated whenever gradients + are received from any machine and the weight updates are atomic. However, the + order is not guaranteed. **kwargs: base class keyword argument values. """ super(ImageClassification, self).__init__(role, train_instance_count, train_instance_type, @@ -211,9 +209,7 @@ class ImageClassificationPredictor(RealTimePredictor): """Assigns input vectors to their closest cluster in a ImageClassification model. The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a `x-image` as input. - - ``predict()`` returns """ + `RealTimePredictor` requires a `x-image` as input.""" def __init__(self, endpoint, sagemaker_session=None): super(ImageClassificationPredictor, self).__init__(endpoint, sagemaker_session, @@ -223,8 +219,8 @@ def __init__(self, endpoint, sagemaker_session=None): class ImageClassificationModel(Model): - """Reference KMeans s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return - a Predictor to performs classification assignment.""" + """Reference ImageClassification s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and + return a Predictor to performs classification assignment.""" def __init__(self, model_data, role, sagemaker_session=None): sagemaker_session = sagemaker_session or Session() diff --git a/tests/integ/test_image_classification.py b/tests/integ/test_image_classification.py index 289eef0bda..b71f8b74a3 100644 --- a/tests/integ/test_image_classification.py +++ b/tests/integ/test_image_classification.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of