diff --git a/.gitignore b/.gitignore index d67b44258a..02fbd0283e 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ doc/_build **/.DS_Store venv/ *~ +.pytest_cache/ diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 86b55eb7fa..fc8b4c9882 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,11 +2,41 @@ CHANGELOG ========= +1.1.0 +===== + +* feature: Estimators: add support for TensorFlow-1.5.0 +* feature: Estimators: add support for MXNet-1.0.0 +* feature: Tests: use ``sagemaker_timestamp`` when creating endpoint names in integration tests +* feature: Session: print out billable seconds after training completes +* bug-fix: Estimators: fix LinearLearner and add unit tests +* bug-fix: Tests: fix timeouts for PCA async integration test +* feature: Predictors: allow ``predictor.predict()`` in the JSON serializer to accept dictionaries + +1.0.4 +===== + +* feature: Estimators: add support for Amazon Neural Topic Model(NTM) algorithm +* feature: Documentation: fix description of an argument of sagemaker.session.train +* feature: Documentation: add FM and LDA to the documentation +* feature: Estimators: add support for async fit +* bug-fix: Estimators: fix estimator role expansion + +1.0.3 +===== + +* feature: Estimators: add support for Amazon LDA algorithm +* feature: Hyperparameters: add data_type to hyperparameters +* feature: Documentation: update TensorFlow examples following API change +* feature: Session: support multi-part uploads +* feature: add new SageMaker CLI + + 1.0.2 ===== * feature: Estimators: add support for Amazon FactorizationMachines algorithm -* feature: Session: Correctly handle TooManyBuckets error_code in default_bucket method +* feature: Session: correctly handle TooManyBuckets error_code in default_bucket method * feature: Tests: add training failure tests for TF and MXNet * feature: Documentation: show how to make predictions against existing endpoint * feature: Estimators: implement write_spmatrix_to_sparse_tensor to support any scipy.sparse matrix @@ -27,4 +57,3 @@ CHANGELOG ===== * Initial commit - diff --git a/README.rst b/README.rst index e06dea8b3c..12f8cf9b8e 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ You can install from source by cloning this repository and issuing a pip install git clone https://github.com/aws/sagemaker-python-sdk.git python setup.py sdist - pip install dist/sagemaker-1.0.0.tar.gz + pip install dist/sagemaker-1.1.0.tar.gz Supported Python versions ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -97,7 +97,7 @@ SageMaker Python SDK provides several high-level abstractions for working with A - **Estimators**: Encapsulate training on SageMaker. Can be ``fit()`` to run training, then the resulting model ``deploy()`` ed to a SageMaker Endpoint. - **Models**: Encapsulate built ML models. Can be ``deploy()`` ed to a SageMaker Endpoint. - **Predictors**: Provide real-time inference and transformation using Python data-types against a SageMaker Endpoint. -- **Session**: Provides a collection of convience methods for working with SageMaker resources. +- **Session**: Provides a collection of convenience methods for working with SageMaker resources. Estimator and Model implementations for MXNet, TensorFlow, and Amazon ML algorithms are included. There's also an Estimator that runs SageMaker compatible custom Docker containers, allowing you to run your own ML algorithms via SageMaker Python SDK. @@ -114,6 +114,8 @@ MXNet SageMaker Estimators With MXNet Estimators, you can train and host MXNet models on Amazon SageMaker. +Supported versions of MXNet: ``1.0.0``, ``0.12.1``. + Training with MXNet ~~~~~~~~~~~~~~~~~~~ @@ -185,7 +187,7 @@ If you want to run your training script locally via the Python interpreter, look Using MXNet and numpy ^^^^^^^^^^^^^^^^^^^^^ -You can import both ``mxnet`` and ``numpy`` in your training script. When your script runs in SageMaker, it will run with access to MXNet version 0.12 and numpy version 1.12.0. For more information on the environment your script runs in, please see `SageMaker MXNet Containers <#sagemaker-mxnet-containers>`__. +You can import both ``mxnet`` and ``numpy`` in your training script. When your script runs in SageMaker, it will run with access to MXNet version 1.0.0 and numpy version 1.13.3 by default. For more information on the environment your script runs in, please see `SageMaker MXNet Containers <#sagemaker-mxnet-containers>`__. Running an MXNet training script in SageMaker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -682,19 +684,24 @@ When training and deploying training scripts, SageMaker runs your Python script SageMaker runs MXNet Estimator scripts in either Python 2.7 or Python 3.5. You can select the Python version by passing a ``py_version`` keyword arg to the MXNet Estimator constructor. Setting this to ``py2`` (the default) will cause your training script to be run on Python 2.7. Setting this to ``py3`` will cause your training script to be run on Python 3.5. This Python version applies to both the Training Job, created by fit, and the Endpoint, created by deploy. -Your MXNet training script will be run on version 0.12 of MXNet, built for either GPU or CPU use. The decision to use the GPU or CPU version of MXNet is made by the train_instance_type, set on the MXNet constructor. If you choose a GPU instance type, your training job will be run on a GPU version of MXNet. If you choose a CPU instance type, your training job will be run on a CPU version of MXNet. Similarly, when you call deploy, specifying a GPU or CPU deploy_instance_type, will control which MXNet build your Endpoint runs. +Your MXNet training script will be run on version 1.0.0 (by default) or 0.12 of MXNet, built for either GPU or CPU use. The decision to use the GPU or CPU version of MXNet is made by the ``train_instance_type``, set on the MXNet constructor. If you choose a GPU instance type, your training job will be run on a GPU version of MXNet. If you choose a CPU instance type, your training job will be run on a CPU version of MXNet. Similarly, when you call deploy, specifying a GPU or CPU deploy_instance_type, will control which MXNet build your Endpoint runs. -Each Docker container has the following dependencies installed: +The Docker images have the following dependencies installed: -- Python 2.7 or Python 3.5, depending on the ``py_version`` argument on - the MXNet constructor. -- MXNet 0.12, built for either GPU or CPU, depending on the instance - type for training or deploying. -- CUDA 9.0 -- numpy 1.12 ++-------------------------+--------------+-------------+ +| Dependencies | MXNet 0.12.1 | MXNet 1.0.0 | ++-------------------------+--------------+-------------+ +| Python | 2.7 or 3.5 | 2.7 or 3.5| ++-------------------------+--------------+-------------+ +| CUDA | 9.0 | 9.0 | ++-------------------------+--------------+-------------+ +| numpy | 1.13.3 | 1.13.3 | ++-------------------------+--------------+-------------+ The Docker images extend Ubuntu 16.04. +You can select version of MXNet by passing a ``framework_version`` keyword arg to the MXNet Estimator constructor. Currently supported versions are ``1.0.0`` and ``0.12.1``. You can also set ``framework_version`` to ``1.0 (default)`` or ``0.12`` which will cause your training script to be run on the latest supported MXNet 1.0 or 0.12 versions respectively. + TensorFlow SageMaker Estimators ------------------------------- @@ -702,6 +709,8 @@ TensorFlow SageMaker Estimators allow you to run your own TensorFlow training algorithms on SageMaker Learner, and to host your own TensorFlow models on SageMaker Hosting. +Supported versions of TensorFlow: ``1.4.1``, ``1.5.0``. + Training with TensorFlow ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -735,7 +744,7 @@ Preparing the TensorFlow training script ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Your TensorFlow training script must be a **Python 2.7** source file. The current supported TensorFlow -version is **1.4.0**. This training script **must contain** the following functions: +versions are **1.5.0 (default)** and **1.4.1**. This training script **must contain** the following functions: - ``model_fn``: defines the model that will be trained. - ``train_input_fn``: preprocess and load training data. @@ -1150,6 +1159,7 @@ Optional arguments - ``wait (bool)``: Defaults to True, whether to block and wait for the training script to complete before returning. + If set to False, it will return immediately, and can later be attached to. - ``logs (bool)``: Defaults to True, whether to show logs produced by training job in the Python session. Only meaningful when wait is True. - ``run_tensorboard_locally (bool)``: Defaults to False. Executes TensorBoard in a different @@ -1178,9 +1188,25 @@ the ``TensorFlow`` estimator parameter ``training_steps`` is finished or when th job execution time reaches the ``TensorFlow`` estimator parameter ``train_max_run``. When the training job finishes, a `TensorFlow serving `_ -with the result of the training is generated and saved to the S3 location define by +with the result of the training is generated and saved to the S3 location defined by the ``TensorFlow`` estimator parameter ``output_path``. + +If the ``wait=False`` flag is passed to ``fit``, then it will return immediately. The training job will continue running +asynchronously. At a later time, a Tensorflow Estimator can be obtained by attaching to the existing training job. If +the training job is not finished it will start showing the standard output of training and wait until it completes. +After attaching, the estimator can be deployed as usual. + +.. code:: python + + tf_estimator.fit(your_input_data, wait=False) + training_job_name = tf_estimator.latest_training_job.name + + # after some time, or in a separate python notebook, we can attach to it again. + + tf_estimator = TensorFlow.attach(training_job_name=training_job_name) + + The evaluation process """""""""""""""""""""" @@ -1244,6 +1270,8 @@ You can access TensorBoard locally at http://localhost:6006 or using your SakeMa `https*workspace_base_url*proxy/6006/ `_ (TensorBoard will not work if you forget to put the slash, '/', in end of the url). If TensorBoard started on a different port, adjust these URLs to match. +Note that TensorBoard is not supported when passing wait=False to ``fit``. + Deploying TensorFlow Serving models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1400,27 +1428,49 @@ A example with ``input_fn`` and ``output_fn`` above can be found in SageMaker TensorFlow Docker containers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The TensorFlow Docker container supports Python 2.7. The Docker container has the following Python modules installed: -- awscli 1.12.1 -- boto3 1.4.7 -- botocore 1.5.92 -- futures 2.2.0 -- gevent 1.2.2 -- grpcio 1.7.0 -- numpy 1.13.3 -- pandas 0.21.0 -- protobuf 3.4.0 -- requests 2.14.2 -- scikit-learn 0.19.1 -- scipy 1.0.0 -- six 1.10.0 -- sklearn 0.0 -- tensorflow 1.4.0 -- tensorflow-serving-api 1.4.0 -- tensorflow-tensorboard 0.4.0rc2 +The TensorFlow Docker images support Python 2.7 and have the following Python modules installed: + ++------------------------+------------------+------------------+ +| Dependencies | tensorflow 1.4.1 | tensorflow 1.5.0 | ++------------------------+------------------+------------------+ +| awscli | 1.12.1 | 1.14.35 | ++------------------------+------------------+------------------+ +| boto3 | 1.4.7 | 1.5.22 | ++------------------------+------------------+------------------+ +| botocore | 1.5.92 | 1.8.36 | ++------------------------+------------------+------------------+ +| futures | 2.2.0 | 2.2.0 | ++------------------------+------------------+------------------+ +| gevent | 1.2.2 | 1.2.2 | ++------------------------+------------------+------------------+ +| grpcio | 1.7.0 | 1.9.0 | ++------------------------+------------------+------------------+ +| numpy | 1.13.3 | 1.14.0 | ++------------------------+------------------+------------------+ +| pandas | 0.21.0 | 0.22.0 | ++------------------------+------------------+------------------+ +| protobuf | 3.4.0 | 3.5.1 | ++------------------------+------------------+------------------+ +| requests | 2.14.2 | 2.18.4 | ++------------------------+------------------+------------------+ +| scikit-learn | 0.19.1 | 0.19.1 | ++------------------------+------------------+------------------+ +| scipy | 1.0.0 | 1.0.0 | ++------------------------+------------------+------------------+ +| six | 1.10.0 | 1.10.0 | ++------------------------+------------------+------------------+ +| sklearn | 0.0 | 0.0 | ++------------------------+------------------+------------------+ +| tensorflow | 1.4.1 | 1.5.0 | ++------------------------+------------------+------------------+ +| tensorflow-serving-api | 1.4.0 | 1.5.0 | ++------------------------+------------------+------------------+ +| tensorflow-tensorboard | 0.4.0 | 1.5.1 | ++------------------------+------------------+------------------+ The Docker images extend Ubuntu 16.04. +You can select version of TensorFlow by passing a ``framework_version`` keyword arg to the TensorFlow Estimator constructor. Currently supported versions are ``1.5.0`` and ``1.4.1``. You can also set ``framework_version`` to ``1.5 (default)`` or ``1.4`` which will cause your training script to be run on the latest supported TensorFlow 1.5 or 1.4 versions respectively. AWS SageMaker Estimators ------------------------ @@ -1428,11 +1478,11 @@ Amazon SageMaker provides several built-in machine learning algorithms that you The full list of algorithms is available on the AWS website: https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html -SageMaker Python SDK includes Estimator wrappers for the AWS K-means, Principal Components Analysis, and Linear Learner algorithms. +SageMaker Python SDK includes Estimator wrappers for the AWS K-means, Principal Components Analysis(PCA), Linear Learner, Factorization Machines, Latent Dirichlet Allocation(LDA) and Neural Topic Model(NTM) algorithms. Definition and usage ~~~~~~~~~~~~~~~~~~~~ -Estimators that wrap Amazon's built-in algorithms define algorithm's hyperparameters with defaults. When a default is not possible you need to provide the value during construction: +Estimators that wrap Amazon's built-in algorithms define algorithm's hyperparameters with defaults. When a default is not possible you need to provide the value during construction, e.g.: - ``KMeans`` Estimator requires parameter ``k`` to define number of clusters - ``PCA`` Estimator requires parameter ``num_components`` to define number of principal components diff --git a/doc/conf.py b/doc/conf.py index 83105e3ab7..5ec0183dc6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -18,7 +18,7 @@ def __getattr__(cls, name): 'tensorflow.python.framework', 'tensorflow_serving', 'tensorflow_serving.apis'] sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) -version = '1.0' +version = '1.1.0' project = u'sagemaker' # Add any Sphinx extension module names here, as strings. They can be extensions diff --git a/doc/factorization_machines.rst b/doc/factorization_machines.rst new file mode 100644 index 0000000000..4427f0cd35 --- /dev/null +++ b/doc/factorization_machines.rst @@ -0,0 +1,22 @@ +FactorizationMachines +------------------------- + +The Amazon SageMaker Factorization Machines algorithm. + +.. autoclass:: sagemaker.FactorizationMachines + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + :exclude-members: image, num_factors, predictor_type, epochs, clip_gradient, mini_batch_size, feature_dim, eps, rescale_grad, bias_lr, linear_lr, factors_lr, bias_wd, linear_wd, factors_wd, bias_init_method, bias_init_scale, bias_init_sigma, bias_init_value, linear_init_method, linear_init_scale, linear_init_sigma, linear_init_value, factors_init_method, factors_init_scale, factors_init_sigma, factors_init_value + + +.. autoclass:: sagemaker.FactorizationMachinesModel + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: sagemaker.FactorizationMachinesPredictor + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/index.rst b/doc/index.rst index 968cdb1024..9e97ecba83 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -38,7 +38,7 @@ A managed environment for TensorFlow training and hosting on Amazon SageMaker SageMaker First-Party Algorithms -------------------------------- -Amazon provides implementations of some common machine learning algortithms optimized for GPU archicture and massive datasets. +Amazon provides implementations of some common machine learning algortithms optimized for GPU architecture and massive datasets. .. toctree:: :maxdepth: 2 @@ -46,3 +46,7 @@ Amazon provides implementations of some common machine learning algortithms opti kmeans pca linear_learner + sagemaker.amazon.amazon_estimator + factorization_machines + lda + ntm diff --git a/doc/lda.rst b/doc/lda.rst new file mode 100644 index 0000000000..f6b965e577 --- /dev/null +++ b/doc/lda.rst @@ -0,0 +1,22 @@ +LDA +-------------------- + +The Amazon SageMaker LDA algorithm. + +.. autoclass:: sagemaker.LDA + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + :exclude-members: image, num_topics, alpha0, max_restarts, max_iterations, mini_batch_size, feature_dim, tol + + +.. autoclass:: sagemaker.LDAModel + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: sagemaker.LDAPredictor + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/ntm.rst b/doc/ntm.rst new file mode 100644 index 0000000000..628cfd7de8 --- /dev/null +++ b/doc/ntm.rst @@ -0,0 +1,23 @@ +NTM +-------------------- + +The Amazon SageMaker NTM algorithm. + +.. autoclass:: sagemaker.NTM + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + :exclude-members: image, num_topics, encoder_layers, epochs, encoder_layers_activation, optimizer, tolerance, + num_patience_epochs, batch_norm, rescale_gradient, clip_gradient, weight_decay, learning_rate + + +.. autoclass:: sagemaker.NTMModel + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: sagemaker.NTMPredictor + :members: + :undoc-members: + :show-inheritance: diff --git a/setup.py b/setup.py index 6b048f83fc..a98d0dc753 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ def read(fname): setup(name="sagemaker", - version="1.0.2", + version="1.1.0", description="Open source library for training and deploying models on Amazon SageMaker.", packages=find_packages('src'), package_dir={'': 'src'}, diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index 098c067f96..93a62c2a72 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -15,9 +15,11 @@ from sagemaker import estimator from sagemaker.amazon.kmeans import KMeans, KMeansModel, KMeansPredictor from sagemaker.amazon.pca import PCA, PCAModel, PCAPredictor +from sagemaker.amazon.lda import LDA, LDAModel, LDAPredictor from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerModel, LinearLearnerPredictor from sagemaker.amazon.factorization_machines import FactorizationMachines, FactorizationMachinesModel from sagemaker.amazon.factorization_machines import FactorizationMachinesPredictor +from sagemaker.amazon.ntm import NTM, NTMModel, NTMPredictor from sagemaker.model import Model from sagemaker.predictor import RealTimePredictor @@ -30,6 +32,7 @@ __all__ = [estimator, KMeans, KMeansModel, KMeansPredictor, PCA, PCAModel, PCAPredictor, LinearLearner, LinearLearnerModel, LinearLearnerPredictor, + LDA, LDAModel, LDAPredictor, FactorizationMachines, FactorizationMachinesModel, FactorizationMachinesPredictor, - Model, RealTimePredictor, Session, + Model, NTM, NTMModel, NTMPredictor, RealTimePredictor, Session, container_def, s3_input, production_variant, get_execution_role] diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 9ed28c1894..22022c65f3 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -28,8 +28,8 @@ class AmazonAlgorithmEstimatorBase(EstimatorBase): """Base class for Amazon first-party Estimator implementations. This class isn't intended to be instantiated directly.""" - feature_dim = hp('feature_dim', (validation.isint, validation.gt(0))) - mini_batch_size = hp('mini_batch_size', (validation.isint, validation.gt(0))) + feature_dim = hp('feature_dim', validation.gt(0), data_type=int) + mini_batch_size = hp('mini_batch_size', validation.gt(0), data_type=int) def __init__(self, role, train_instance_count, train_instance_type, data_location=None, **kwargs): """Initialize an AmazonAlgorithmEstimatorBase. @@ -47,7 +47,8 @@ def __init__(self, role, train_instance_count, train_instance_type, data_locatio self.data_location = data_location def train_image(self): - return registry(self.sagemaker_session.boto_region_name) + "/" + type(self).repo + repo = '{}:{}'.format(type(self).repo_name, type(self).repo_version) + return '{}/{}'.format(registry(self.sagemaker_session.boto_region_name, type(self).repo_name), repo) def hyperparameters(self): return hp.serialize_all(self) @@ -64,6 +65,31 @@ def data_location(self, data_location): data_location = data_location + '/' self._data_location = data_location + @classmethod + def _prepare_init_params_from_job_description(cls, job_details): + """Convert the job description to init params that can be handled by the class constructor + + Args: + job_details: the returned job details from a describe_training_job API call. + + Returns: + dictionary: The transformed init_params + + """ + init_params = super(AmazonAlgorithmEstimatorBase, cls)._prepare_init_params_from_job_description(job_details) + + # The hyperparam names may not be the same as the class attribute that holds them, + # for instance: local_lloyd_init_method is called local_init_method. We need to map these + # and pass the correct name to the constructor. + for attribute, value in cls.__dict__.items(): + if isinstance(value, hp): + if value.name in init_params['hyperparameters']: + init_params[attribute] = init_params['hyperparameters'][value.name] + + del init_params['hyperparameters'] + del init_params['image'] + return init_params + def fit(self, records, mini_batch_size=None, **kwargs): """Fit this Estimator on serialized Record objects, stored in S3. @@ -200,12 +226,22 @@ def upload_numpy_to_s3_shards(num_shards, s3, bucket, key_prefix, array, labels= raise ex -def registry(region_name): +def registry(region_name, algorithm=None): """Return docker registry for the given AWS region""" - account_id = { - "us-east-1": "382416733822", - "us-east-2": "404615174143", - "us-west-2": "174872318107", - "eu-west-1": "438346466558" - }[region_name] + if algorithm in [None, "pca", "kmeans", "linear-learner", "factorization-machines", "ntm"]: + account_id = { + "us-east-1": "382416733822", + "us-east-2": "404615174143", + "us-west-2": "174872318107", + "eu-west-1": "438346466558" + }[region_name] + elif algorithm in ["lda"]: + account_id = { + "us-east-1": "766337827248", + "us-east-2": "999911452149", + "us-west-2": "266724342769", + "eu-west-1": "999678624901" + }[region_name] + else: + raise ValueError("Algorithm class:{} doesn't have mapping to account_id with images".format(algorithm)) return "{}.dkr.ecr.{}.amazonaws.com".format(account_id, region_name) diff --git a/src/sagemaker/amazon/factorization_machines.py b/src/sagemaker/amazon/factorization_machines.py index 5340fa11f0..5297367947 100644 --- a/src/sagemaker/amazon/factorization_machines.py +++ b/src/sagemaker/amazon/factorization_machines.py @@ -13,7 +13,7 @@ from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa -from sagemaker.amazon.validation import gt, isin, isint, ge, isnumber +from sagemaker.amazon.validation import gt, isin, ge from sagemaker.predictor import RealTimePredictor from sagemaker.model import Model from sagemaker.session import Session @@ -21,36 +21,37 @@ class FactorizationMachines(AmazonAlgorithmEstimatorBase): - repo = 'factorization-machines:1' + repo_name = 'factorization-machines' + repo_version = 1 - num_factors = hp('num_factors', (gt(0), isint), 'An integer greater than zero') + num_factors = hp('num_factors', gt(0), 'An integer greater than zero', int) predictor_type = hp('predictor_type', isin('binary_classifier', 'regressor'), - 'Value "binary_classifier" or "regressor"') - epochs = hp('epochs', (gt(0), isint), "An integer greater than 0") - clip_gradient = hp('clip_gradient', isnumber, "A float value") - eps = hp('eps', isnumber, "A float value") - rescale_grad = hp('rescale_grad', isnumber, "A float value") - bias_lr = hp('bias_lr', (ge(0), isnumber), "A non-negative float") - linear_lr = hp('linear_lr', (ge(0), isnumber), "A non-negative float") - factors_lr = hp('factors_lr', (ge(0), isnumber), "A non-negative float") - bias_wd = hp('bias_wd', (ge(0), isnumber), "A non-negative float") - linear_wd = hp('linear_wd', (ge(0), isnumber), "A non-negative float") - factors_wd = hp('factors_wd', (ge(0), isnumber), "A non-negative float") + 'Value "binary_classifier" or "regressor"', str) + epochs = hp('epochs', gt(0), "An integer greater than 0", int) + clip_gradient = hp('clip_gradient', (), "A float value", float) + eps = hp('eps', (), "A float value", float) + rescale_grad = hp('rescale_grad', (), "A float value", float) + bias_lr = hp('bias_lr', ge(0), "A non-negative float", float) + linear_lr = hp('linear_lr', ge(0), "A non-negative float", float) + factors_lr = hp('factors_lr', ge(0), "A non-negative float", float) + bias_wd = hp('bias_wd', ge(0), "A non-negative float", float) + linear_wd = hp('linear_wd', ge(0), "A non-negative float", float) + factors_wd = hp('factors_wd', ge(0), "A non-negative float", float) bias_init_method = hp('bias_init_method', isin('normal', 'uniform', 'constant'), - 'Value "normal", "uniform" or "constant"') - bias_init_scale = hp('bias_init_scale', (ge(0), isnumber), "A non-negative float") - bias_init_sigma = hp('bias_init_sigma', (ge(0), isnumber), "A non-negative float") - bias_init_value = hp('bias_init_value', isnumber, "A float value") + 'Value "normal", "uniform" or "constant"', str) + bias_init_scale = hp('bias_init_scale', ge(0), "A non-negative float", float) + bias_init_sigma = hp('bias_init_sigma', ge(0), "A non-negative float", float) + bias_init_value = hp('bias_init_value', (), "A float value", float) linear_init_method = hp('linear_init_method', isin('normal', 'uniform', 'constant'), - 'Value "normal", "uniform" or "constant"') - linear_init_scale = hp('linear_init_scale', (ge(0), isnumber), "A non-negative float") - linear_init_sigma = hp('linear_init_sigma', (ge(0), isnumber), "A non-negative float") - linear_init_value = hp('linear_init_value', isnumber, "A float value") + 'Value "normal", "uniform" or "constant"', str) + linear_init_scale = hp('linear_init_scale', ge(0), "A non-negative float", float) + linear_init_sigma = hp('linear_init_sigma', ge(0), "A non-negative float", float) + linear_init_value = hp('linear_init_value', (), "A float value", float) factors_init_method = hp('factors_init_method', isin('normal', 'uniform', 'constant'), - 'Value "normal", "uniform" or "constant"') - factors_init_scale = hp('factors_init_scale', (ge(0), isnumber), "A non-negative float") - factors_init_sigma = hp('factors_init_sigma', (ge(0), isnumber), "A non-negative float") - factors_init_value = hp('factors_init_value', isnumber, "A float value") + 'Value "normal", "uniform" or "constant"', str) + factors_init_scale = hp('factors_init_scale', ge(0), "A non-negative float", float) + factors_init_sigma = hp('factors_init_sigma', ge(0), "A non-negative float", float) + factors_init_value = hp('factors_init_value', (), "A float value", float) def __init__(self, role, train_instance_count, train_instance_type, num_factors, predictor_type, @@ -194,7 +195,8 @@ class FactorizationMachinesModel(Model): def __init__(self, model_data, role, sagemaker_session=None): sagemaker_session = sagemaker_session or Session() - image = registry(sagemaker_session.boto_session.region_name) + "/" + FactorizationMachines.repo + repo = '{}:{}'.format(FactorizationMachines.repo_name, FactorizationMachines.repo_version) + image = '{}/{}'.format(registry(sagemaker_session.boto_session.region_name), repo) super(FactorizationMachinesModel, self).__init__(model_data, image, role, diff --git a/src/sagemaker/amazon/hyperparameter.py b/src/sagemaker/amazon/hyperparameter.py index 0d86191474..a3ac76367c 100644 --- a/src/sagemaker/amazon/hyperparameter.py +++ b/src/sagemaker/amazon/hyperparameter.py @@ -16,7 +16,7 @@ class Hyperparameter(object): """An algorithm hyperparameter with optional validation. Implemented as a python descriptor object.""" - def __init__(self, name, validate=lambda _: True, validation_message=""): + def __init__(self, name, validate=lambda _: True, validation_message="", data_type=str): """Args: name (str): The name of this hyperparameter validate (callable[object]->[bool]): A validation function or list of validation functions. @@ -27,6 +27,7 @@ def __init__(self, name, validate=lambda _: True, validation_message=""): self.validation = validate self.validation_message = validation_message self.name = name + self.data_type = data_type try: iter(self.validation) except TypeError: @@ -35,9 +36,10 @@ def __init__(self, name, validate=lambda _: True, validation_message=""): def validate(self, value): if value is None: # We allow assignment from None, but Nones are not sent to training. return + for valid in self.validation: if not valid(value): - error_message = "Invalid hyperparameter value {}".format(value) + error_message = "Invalid hyperparameter value {} for {}".format(value, self.name) if self.validation_message: error_message = error_message + ". Expecting: " + self.validation_message raise ValueError(error_message) @@ -50,6 +52,7 @@ def __get__(self, obj, objtype): def __set__(self, obj, value): """Validate the supplied value and set this hyperparameter to value""" + value = None if value is None else self.data_type(value) self.validate(value) if '_hyperparameters' not in dir(obj): obj._hyperparameters = dict() diff --git a/src/sagemaker/amazon/kmeans.py b/src/sagemaker/amazon/kmeans.py index d3fb5d670e..b684b68f07 100644 --- a/src/sagemaker/amazon/kmeans.py +++ b/src/sagemaker/amazon/kmeans.py @@ -13,7 +13,7 @@ from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa -from sagemaker.amazon.validation import gt, isin, isint, ge +from sagemaker.amazon.validation import gt, isin, ge from sagemaker.predictor import RealTimePredictor from sagemaker.model import Model from sagemaker.session import Session @@ -21,17 +21,18 @@ class KMeans(AmazonAlgorithmEstimatorBase): - repo = 'kmeans:1' + repo_name = 'kmeans' + repo_version = 1 - k = hp('k', (gt(1), isint), 'An integer greater-than 1') - init_method = hp('init_method', isin('random', 'kmeans++'), 'One of "random", "kmeans++"') - max_iterations = hp('local_lloyd_max_iterations', (gt(0), isint), 'An integer greater-than 0') - tol = hp('local_lloyd_tol', (gt(0), isint), 'An integer greater-than 0') - num_trials = hp('local_lloyd_num_trials', (gt(0), isint), 'An integer greater-than 0') - local_init_method = hp('local_lloyd_init_method', isin('random', 'kmeans++'), 'One of "random", "kmeans++"') - half_life_time_size = hp('half_life_time_size', (ge(0), isint), 'An integer greater-than-or-equal-to 0') - epochs = hp('epochs', (gt(0), isint), 'An integer greater-than 0') - center_factor = hp('extra_center_factor', (gt(0), isint), 'An integer greater-than 0') + k = hp('k', gt(1), 'An integer greater-than 1', int) + init_method = hp('init_method', isin('random', 'kmeans++'), 'One of "random", "kmeans++"', str) + max_iterations = hp('local_lloyd_max_iterations', gt(0), 'An integer greater-than 0', int) + tol = hp('local_lloyd_tol', gt(0), 'An integer greater-than 0', int) + num_trials = hp('local_lloyd_num_trials', gt(0), 'An integer greater-than 0', int) + local_init_method = hp('local_lloyd_init_method', isin('random', 'kmeans++'), 'One of "random", "kmeans++"', str) + half_life_time_size = hp('half_life_time_size', ge(0), 'An integer greater-than-or-equal-to 0', int) + epochs = hp('epochs', gt(0), 'An integer greater-than 0', int) + center_factor = hp('extra_center_factor', gt(0), 'An integer greater-than 0', int) def __init__(self, role, train_instance_count, train_instance_type, k, init_method=None, max_iterations=None, tol=None, num_trials=None, local_init_method=None, @@ -132,6 +133,7 @@ class KMeansModel(Model): def __init__(self, model_data, role, sagemaker_session=None): sagemaker_session = sagemaker_session or Session() - image = registry(sagemaker_session.boto_session.region_name) + "/" + KMeans.repo + repo = '{}:{}'.format(KMeans.repo_name, KMeans.repo_version) + image = '{}/{}'.format(registry(sagemaker_session.boto_session.region_name), repo) super(KMeansModel, self).__init__(model_data, image, role, predictor_cls=KMeansPredictor, sagemaker_session=sagemaker_session) diff --git a/src/sagemaker/amazon/lda.py b/src/sagemaker/amazon/lda.py new file mode 100644 index 0000000000..30367b2b0f --- /dev/null +++ b/src/sagemaker/amazon/lda.py @@ -0,0 +1,127 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry +from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer +from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa +from sagemaker.amazon.validation import gt +from sagemaker.predictor import RealTimePredictor +from sagemaker.model import Model +from sagemaker.session import Session + + +class LDA(AmazonAlgorithmEstimatorBase): + + repo_name = 'lda' + repo_version = 1 + + num_topics = hp('num_topics', gt(0), 'An integer greater than zero', int) + alpha0 = hp('alpha0', gt(0), 'A positive float', float) + max_restarts = hp('max_restarts', gt(0), 'An integer greater than zero', int) + max_iterations = hp('max_iterations', gt(0), 'An integer greater than zero', int) + tol = hp('tol', gt(0), 'A positive float', float) + + def __init__(self, role, train_instance_type, num_topics, + alpha0=None, max_restarts=None, max_iterations=None, tol=None, **kwargs): + """Latent Dirichlet Allocation (LDA) is :class:`Estimator` used for unsupervised learning. + + Amazon SageMaker Latent Dirichlet Allocation is an unsupervised learning algorithm that attempts to describe + a set of observations as a mixture of distinct categories. LDA is most commonly used to discover + a user-specified number of topics shared by documents within a text corpus. + Here each observation is a document, the features are the presence (or occurrence count) of each word, and + the categories are the topics. + + This Estimator may be fit via calls to + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. It requires Amazon + :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to be stored in S3. + There is an utility :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` that + can be used to upload data to S3 and creates :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed + to the `fit` call. + + To learn more about the Amazon protobuf Record class and how to prepare bulk data in this format, please + consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html + + After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker + Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, + deploy returns a :class:`~sagemaker.amazon.lda.LDAPredictor` object that can be used + for inference calls using the trained model hosted in the SageMaker Endpoint. + + LDA Estimators can be configured by setting hyperparameters. The available hyperparameters for + LDA are documented below. + + For further information on the AWS LDA algorithm, + please consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/lda.html + + Args: + role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and + APIs that create Amazon SageMaker endpoints use this role to access + training data and model artifacts. After the endpoint is created, + the inference code might use the IAM role, if accessing AWS resource. + train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. + num_topics (int): The number of topics for LDA to find within the data. + alpha0 (float): Optional. Initial guess for the concentration parameter + max_restarts (int): Optional. The number of restarts to perform during the Alternating Least Squares (ALS) + spectral decomposition phase of the algorithm. + max_iterations (int): Optional. The maximum number of iterations to perform during the + ALS phase of the algorithm. + tol (float): Optional. Target error tolerance for the ALS phase of the algorithm. + **kwargs: base class keyword argument values. + """ + + # this algorithm only supports single instance training + super(LDA, self).__init__(role, 1, train_instance_type, **kwargs) + self.num_topics = num_topics + self.alpha0 = alpha0 + self.max_restarts = max_restarts + self.max_iterations = max_iterations + self.tol = tol + + def create_model(self): + """Return a :class:`~sagemaker.amazon.LDAModel` referencing the latest + s3 model data produced by this Estimator.""" + + return LDAModel(self.model_data, self.role, sagemaker_session=self.sagemaker_session) + + def fit(self, records, mini_batch_size, **kwargs): + # mini_batch_size is required, prevent explicit calls with None + if mini_batch_size is None: + raise ValueError("mini_batch_size must be set") + super(LDA, self).fit(records, mini_batch_size, **kwargs) + + +class LDAPredictor(RealTimePredictor): + """Transforms input vectors to lower-dimesional representations. + + The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain the + same number of columns as the feature-dimension of the data used to fit the model this + Predictor performs inference on. + + :meth:`predict()` returns a list of :class:`~sagemaker.amazon.record_pb2.Record` objects, one + for each row in the input ``ndarray``. The lower dimension vector result is stored in the ``projection`` + key of the ``Record.label`` field.""" + + def __init__(self, endpoint, sagemaker_session=None): + super(LDAPredictor, self).__init__(endpoint, sagemaker_session, serializer=numpy_to_record_serializer(), + deserializer=record_deserializer()) + + +class LDAModel(Model): + """Reference LDA s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return + a Predictor that transforms vectors to a lower-dimensional representation.""" + + def __init__(self, model_data, role, sagemaker_session=None): + sagemaker_session = sagemaker_session or Session() + repo = '{}:{}'.format(LDA.repo_name, LDA.repo_version) + image = '{}/{}'.format(registry(sagemaker_session.boto_session.region_name, LDA.repo_name), repo) + super(LDAModel, self).__init__(model_data, image, role, predictor_cls=LDAPredictor, + sagemaker_session=sagemaker_session) diff --git a/src/sagemaker/amazon/linear_learner.py b/src/sagemaker/amazon/linear_learner.py index af336a8e6b..b265ce7576 100644 --- a/src/sagemaker/amazon/linear_learner.py +++ b/src/sagemaker/amazon/linear_learner.py @@ -13,7 +13,7 @@ from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa -from sagemaker.amazon.validation import isin, gt, lt, isint, isbool, isnumber +from sagemaker.amazon.validation import isin, gt, lt from sagemaker.predictor import RealTimePredictor from sagemaker.model import Model from sagemaker.session import Session @@ -21,46 +21,48 @@ class LinearLearner(AmazonAlgorithmEstimatorBase): - repo = 'linear-learner:1' + repo_name = 'linear-learner' + repo_version = 1 DEFAULT_MINI_BATCH_SIZE = 1000 binary_classifier_model_selection_criteria = hp('binary_classifier_model_selection_criteria', isin('accuracy', 'f1', 'precision_at_target_recall', - 'recall_at_target_precision', 'cross_entropy_loss')) - target_recall = hp('target_recall', (gt(0), lt(1)), "A float in (0,1)") - target_precision = hp('target_precision', (gt(0), lt(1)), "A float in (0,1)") - positive_example_weight_mult = hp('positive_example_weight_mult', gt(0), "A float greater than 0") - epochs = hp('epochs', (gt(0), isint), "An integer greater-than 0") + 'recall_at_target_precision', 'cross_entropy_loss'), + data_type=str) + target_recall = hp('target_recall', (gt(0), lt(1)), "A float in (0,1)", float) + target_precision = hp('target_precision', (gt(0), lt(1)), "A float in (0,1)", float) + positive_example_weight_mult = hp('positive_example_weight_mult', gt(0), "A float greater than 0", float) + epochs = hp('epochs', gt(0), "An integer greater-than 0", int) predictor_type = hp('predictor_type', isin('binary_classifier', 'regressor'), - 'One of "binary_classifier" or "regressor"') - use_bias = hp('use_bias', isbool, "Either True or False") - num_models = hp('num_models', (gt(0), isint), "An integer greater-than 0") - num_calibration_samples = hp('num_calibration_samples', (gt(0), isint), "An integer greater-than 0") - init_method = hp('init_method', isin('uniform', 'normal'), 'One of "uniform" or "normal"') - init_scale = hp('init_scale', (gt(-1), lt(1)), 'A float in (-1, 1)') - init_sigma = hp('init_sigma', (gt(0), lt(1)), 'A float in (0, 1)') - init_bias = hp('init_bias', isnumber, 'A number') - optimizer = hp('optimizer', isin('sgd', 'adam', 'auto'), 'One of "sgd", "adam" or "auto') + 'One of "binary_classifier" or "regressor"', str) + use_bias = hp('use_bias', (), "Either True or False", bool) + num_models = hp('num_models', gt(0), "An integer greater-than 0", int) + num_calibration_samples = hp('num_calibration_samples', gt(0), "An integer greater-than 0", int) + init_method = hp('init_method', isin('uniform', 'normal'), 'One of "uniform" or "normal"', str) + init_scale = hp('init_scale', (gt(-1), lt(1)), 'A float in (-1, 1)', float) + init_sigma = hp('init_sigma', (gt(0), lt(1)), 'A float in (0, 1)', float) + init_bias = hp('init_bias', (), 'A number', float) + optimizer = hp('optimizer', isin('sgd', 'adam', 'auto'), 'One of "sgd", "adam" or "auto', str) loss = hp('loss', isin('logistic', 'squared_loss', 'absolute_loss', 'auto'), - '"logistic", "squared_loss", "absolute_loss" or"auto"') - wd = hp('wd', (gt(0), lt(1)), 'A float in (0,1)') - l1 = hp('l1', (gt(0), lt(1)), 'A float in (0,1)') - momentum = hp('momentum', (gt(0), lt(1)), 'A float in (0,1)') - learning_rate = hp('learning_rate', (gt(0), lt(1)), 'A float in (0,1)') - beta_1 = hp('beta_1', (gt(0), lt(1)), 'A float in (0,1)') - beta_2 = hp('beta_1', (gt(0), lt(1)), 'A float in (0,1)') - bias_lr_mult = hp('bias_lr_mult', gt(0), 'A float greater-than 0') - bias_wd_mult = hp('bias_wd_mult', gt(0), 'A float greater-than 0') - use_lr_scheduler = hp('use_lr_scheduler', isbool, 'A boolean') - lr_scheduler_step = hp('lr_scheduler_step', (gt(0), isint), 'An integer greater-than 0') - lr_scheduler_factor = hp('lr_scheduler_factor', (gt(0), lt(1)), 'A float in (0,1)') - lr_scheduler_minimum_lr = hp('lr_scheduler_minimum_lr', gt(0), 'A float greater-than 0') - normalize_data = hp('normalize_data', isbool, 'A boolean') - normalize_label = hp('normalize_label', isbool, 'A boolean') - unbias_data = hp('unbias_data', isbool, 'A boolean') - unbias_label = hp('unbias_label', isbool, 'A boolean') - num_point_for_scalar = hp('num_point_for_scalar', (isint, gt(0)), 'An integer greater-than 0') + '"logistic", "squared_loss", "absolute_loss" or"auto"', str) + wd = hp('wd', (gt(0), lt(1)), 'A float in (0,1)', float) + l1 = hp('l1', (gt(0), lt(1)), 'A float in (0,1)', float) + momentum = hp('momentum', (gt(0), lt(1)), 'A float in (0,1)', float) + learning_rate = hp('learning_rate', (gt(0), lt(1)), 'A float in (0,1)', float) + beta_1 = hp('beta_1', (gt(0), lt(1)), 'A float in (0,1)', float) + beta_2 = hp('beta_2', (gt(0), lt(1)), 'A float in (0,1)', float) + bias_lr_mult = hp('bias_lr_mult', gt(0), 'A float greater-than 0', float) + bias_wd_mult = hp('bias_wd_mult', gt(0), 'A float greater-than 0', float) + use_lr_scheduler = hp('use_lr_scheduler', (), 'A boolean', bool) + lr_scheduler_step = hp('lr_scheduler_step', gt(0), 'An integer greater-than 0', int) + lr_scheduler_factor = hp('lr_scheduler_factor', (gt(0), lt(1)), 'A float in (0,1)', float) + lr_scheduler_minimum_lr = hp('lr_scheduler_minimum_lr', gt(0), 'A float greater-than 0', float) + normalize_data = hp('normalize_data', (), 'A boolean', bool) + normalize_label = hp('normalize_label', (), 'A boolean', bool) + unbias_data = hp('unbias_data', (), 'A boolean', bool) + unbias_label = hp('unbias_label', (), 'A boolean', bool) + num_point_for_scaler = hp('num_point_for_scaler', gt(0), 'An integer greater-than 0', int) def __init__(self, role, train_instance_count, train_instance_type, predictor_type='binary_classifier', binary_classifier_model_selection_criteria=None, target_recall=None, target_precision=None, @@ -69,7 +71,7 @@ def __init__(self, role, train_instance_count, train_instance_type, predictor_ty optimizer=None, loss=None, wd=None, l1=None, momentum=None, learning_rate=None, beta_1=None, beta_2=None, bias_lr_mult=None, bias_wd_mult=None, use_lr_scheduler=None, lr_scheduler_step=None, lr_scheduler_factor=None, lr_scheduler_minimum_lr=None, normalize_data=None, - normalize_label=None, unbias_data=None, unbias_label=None, num_point_for_scalar=None, **kwargs): + normalize_label=None, unbias_data=None, unbias_label=None, num_point_for_scaler=None, **kwargs): """An :class:`Estimator` for binary classification and regression. Amazon SageMaker Linear Learner provides a solution for both classification and regression problems, allowing @@ -184,14 +186,14 @@ def __init__(self, role, train_instance_count, train_instance_type, predictor_ty self.normalize_data = normalize_data self.normalize_label = normalize_label self.unbias_data = unbias_data - self.ubias_label = unbias_label - self.num_point_for_scaler = num_point_for_scalar + self.unbias_label = unbias_label + self.num_point_for_scaler = num_point_for_scaler def create_model(self): """Return a :class:`~sagemaker.amazon.kmeans.LinearLearnerModel` referencing the latest s3 model data produced by this Estimator.""" - return LinearLearnerModel(self, self.model_data, self.role, self.sagemaker_session) + return LinearLearnerModel(self.model_data, self.role, self.sagemaker_session) def fit(self, records, mini_batch_size=None, **kwargs): # mini_batch_size can't be greater than number of records or training job fails @@ -225,7 +227,8 @@ class LinearLearnerModel(Model): def __init__(self, model_data, role, sagemaker_session=None): sagemaker_session = sagemaker_session or Session() - image = registry(sagemaker_session.boto_session.region_name) + "/" + LinearLearner.repo + repo = '{}:{}'.format(LinearLearner.repo_name, LinearLearner.repo_version) + image = '{}/{}'.format(registry(sagemaker_session.boto_session.region_name), repo) super(LinearLearnerModel, self).__init__(model_data, image, role, predictor_cls=LinearLearnerPredictor, sagemaker_session=sagemaker_session) diff --git a/src/sagemaker/amazon/ntm.py b/src/sagemaker/amazon/ntm.py new file mode 100644 index 0000000000..21f0c8f1aa --- /dev/null +++ b/src/sagemaker/amazon/ntm.py @@ -0,0 +1,146 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry +from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer +from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa +from sagemaker.amazon.validation import ge, le, isin +from sagemaker.predictor import RealTimePredictor +from sagemaker.model import Model +from sagemaker.session import Session + + +class NTM(AmazonAlgorithmEstimatorBase): + + repo_name = 'ntm' + repo_version = 1 + + num_topics = hp('num_topics', (ge(2), le(1000)), 'An integer in [2, 1000]', int) + encoder_layers = hp(name='encoder_layers', validation_message='A comma separated list of ' + 'positive integers', data_type=list) + epochs = hp('epochs', (ge(1), le(100)), 'An integer in [1, 100]', int) + encoder_layers_activation = hp('encoder_layers_activation', isin('sigmoid', 'tanh', 'relu'), + 'One of "sigmoid", "tanh" or "relu"', str) + optimizer = hp('optimizer', isin('adagrad', 'adam', 'rmsprop', 'sgd', 'adadelta'), + 'One of "adagrad", "adam", "rmsprop", "sgd" and "adadelta"', str) + tolerance = hp('tolerance', (ge(1e-6), le(0.1)), 'A float in [1e-6, 0.1]', float) + num_patience_epochs = hp('num_patience_epochs', (ge(1), le(10)), 'An integer in [1, 10]', int) + batch_norm = hp(name='batch_norm', validation_message='Value must be a boolean', data_type=bool) + rescale_gradient = hp('rescale_gradient', (ge(1e-3), le(1.0)), 'A float in [1e-3, 1.0]', float) + clip_gradient = hp('clip_gradient', ge(1e-3), 'A float greater equal to 1e-3', float) + weight_decay = hp('weight_decay', (ge(0.0), le(1.0)), 'A float in [0.0, 1.0]', float) + learning_rate = hp('learning_rate', (ge(1e-6), le(1.0)), 'A float in [1e-6, 1.0]', float) + + def __init__(self, role, train_instance_count, train_instance_type, num_topics, + encoder_layers=None, epochs=None, encoder_layers_activation=None, optimizer=None, tolerance=None, + num_patience_epochs=None, batch_norm=None, rescale_gradient=None, clip_gradient=None, + weight_decay=None, learning_rate=None, **kwargs): + """Neural Topic Model (NTM) is :class:`Estimator` used for unsupervised learning. + + This Estimator may be fit via calls to + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. It requires Amazon + :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to be stored in S3. + There is an utility :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` that + can be used to upload data to S3 and creates :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed + to the `fit` call. + + To learn more about the Amazon protobuf Record class and how to prepare bulk data in this format, please + consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html + + After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker + Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, + deploy returns a :class:`~sagemaker.amazon.ntm.NTMPredictor` object that can be used + for inference calls using the trained model hosted in the SageMaker Endpoint. + + NTM Estimators can be configured by setting hyperparameters. The available hyperparameters for + NTM are documented below. + + For further information on the AWS NTM algorithm, + please consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/ntm.html + + Args: + role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and + APIs that create Amazon SageMaker endpoints use this role to access + training data and model artifacts. After the endpoint is created, + the inference code might use the IAM role, if accessing AWS resource. + train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. + num_topics (int): Required. The number of topics for NTM to find within the data. + encoder_layers (list): Optional. Represents number of layers in the encoder and the output size of + each layer. + epochs (int): Optional. Maximum number of passes over the training data. + encoder_layers_activation (str): Optional. Activation function to use in the encoder layers. + optimizer (str): Optional. Optimizer to use for training. + tolerance (float): Optional. Maximum relative change in the loss function within the last + num_patience_epochs number of epochs below which early stopping is triggered. + num_patience_epochs (int): Optional. Number of successive epochs over which early stopping criterion + is evaluated. + batch_norm (bool): Optional. Whether to use batch normalization during training. + rescale_gradient (float): Optional. Rescale factor for gradient. + clip_gradient (float): Optional. Maximum magnitude for each gradient component. + weight_decay (float): Optional. Weight decay coefficient. Adds L2 regularization. + learning_rate (float): Optional. Learning rate for the optimizer. + **kwargs: base class keyword argument values. + """ + + super(NTM, self).__init__(role, train_instance_count, train_instance_type, **kwargs) + self.num_topics = num_topics + self.encoder_layers = encoder_layers + self.epochs = epochs + self.encoder_layers_activation = encoder_layers_activation + self.optimizer = optimizer + self.tolerance = tolerance + self.num_patience_epochs = num_patience_epochs + self.batch_norm = batch_norm + self.rescale_gradient = rescale_gradient + self.clip_gradient = clip_gradient + self.weight_decay = weight_decay + self.learning_rate = learning_rate + + def create_model(self): + """Return a :class:`~sagemaker.amazon.NTMModel` referencing the latest + s3 model data produced by this Estimator.""" + + return NTMModel(self.model_data, self.role, sagemaker_session=self.sagemaker_session) + + def fit(self, records, mini_batch_size=None, **kwargs): + if mini_batch_size is not None and (mini_batch_size < 1 or mini_batch_size > 10000): + raise ValueError("mini_batch_size must be in [1, 10000]") + super(NTM, self).fit(records, mini_batch_size, **kwargs) + + +class NTMPredictor(RealTimePredictor): + """Transforms input vectors to lower-dimesional representations. + + The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain the + same number of columns as the feature-dimension of the data used to fit the model this + Predictor performs inference on. + + :meth:`predict()` returns a list of :class:`~sagemaker.amazon.record_pb2.Record` objects, one + for each row in the input ``ndarray``. The lower dimension vector result is stored in the ``projection`` + key of the ``Record.label`` field.""" + + def __init__(self, endpoint, sagemaker_session=None): + super(NTMPredictor, self).__init__(endpoint, sagemaker_session, serializer=numpy_to_record_serializer(), + deserializer=record_deserializer()) + + +class NTMModel(Model): + """Reference NTM s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return + a Predictor that transforms vectors to a lower-dimensional representation.""" + + def __init__(self, model_data, role, sagemaker_session=None): + sagemaker_session = sagemaker_session or Session() + repo = '{}:{}'.format(NTM.repo_name, NTM.repo_version) + image = '{}/{}'.format(registry(sagemaker_session.boto_session.region_name, NTM.repo_name), repo) + super(NTMModel, self).__init__(model_data, image, role, predictor_cls=NTMPredictor, + sagemaker_session=sagemaker_session) diff --git a/src/sagemaker/amazon/pca.py b/src/sagemaker/amazon/pca.py index 7a23f60c7c..fa0f7e7217 100644 --- a/src/sagemaker/amazon/pca.py +++ b/src/sagemaker/amazon/pca.py @@ -20,18 +20,18 @@ class PCA(AmazonAlgorithmEstimatorBase): - repo = 'pca:1' + repo_name = 'pca' + repo_version = 1 DEFAULT_MINI_BATCH_SIZE = 500 - num_components = hp(name='num_components', validate=lambda x: x > 0 and isinstance(x, int), - validation_message='Value must be an integer greater than zero') + num_components = hp(name='num_components', validate=lambda x: x > 0, + validation_message='Value must be an integer greater than zero', data_type=int) algorithm_mode = hp(name='algorithm_mode', validate=lambda x: x in ['regular', 'stable', 'randomized'], - validation_message='Value must be one of "regular", "stable", "randomized"') - subtract_mean = hp(name='subtract_mean', validate=lambda x: isinstance(x, bool), - validation_message='Value must be a boolean') - extra_components = hp(name='extra_components', validate=lambda x: x >= 0 and isinstance(x, int), - validation_message="Value must be an integer greater than or equal to 0") + validation_message='Value must be one of "regular", "stable", "randomized"', data_type=str) + subtract_mean = hp(name='subtract_mean', validation_message='Value must be a boolean', data_type=bool) + extra_components = hp(name='extra_components', validate=lambda x: x >= 0, + validation_message="Value must be an integer greater than or equal to 0", data_type=int) def __init__(self, role, train_instance_count, train_instance_type, num_components, algorithm_mode=None, subtract_mean=None, extra_components=None, **kwargs): @@ -119,6 +119,7 @@ class PCAModel(Model): def __init__(self, model_data, role, sagemaker_session=None): sagemaker_session = sagemaker_session or Session() - image = registry(sagemaker_session.boto_session.region_name) + "/" + PCA.repo + repo = '{}:{}'.format(PCA.repo_name, PCA.repo_version) + image = '{}/{}'.format(registry(sagemaker_session.boto_session.region_name), repo) super(PCAModel, self).__init__(model_data, image, role, predictor_cls=PCAPredictor, sagemaker_session=sagemaker_session) diff --git a/src/sagemaker/amazon/validation.py b/src/sagemaker/amazon/validation.py index ff3259be8f..7c7fa4f2a0 100644 --- a/src/sagemaker/amazon/validation.py +++ b/src/sagemaker/amazon/validation.py @@ -10,7 +10,6 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -import numbers def gt(minimum): @@ -31,6 +30,12 @@ def validate(value): return validate +def le(maximum): + def validate(value): + return value <= maximum + return validate + + def isin(*expected): def validate(value): return value in expected @@ -41,8 +46,3 @@ def istype(expected): def validate(value): return isinstance(value, expected) return validate - - -isint = istype(int) -isbool = istype(bool) -isnumber = istype(numbers.Number) # noqa diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 2bfce13f59..c672703315 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -152,8 +152,60 @@ def fit(self, inputs, wait=True, logs=True, job_name=None): self.latest_training_job = _TrainingJob.start_new(self, inputs) if wait: self.latest_training_job.wait(logs=logs) - else: - raise NotImplemented('Asynchronous fit not available') + + @classmethod + def _from_training_job(cls, init_params, hyperparameters, image, sagemaker_session): + """Create an Estimator from existing training job data. + + Args: + init_params (dict): The init_params the training job was created with. + hyperparameters (dict): The hyperparameters the training job was created with. + image (str): Container image (if any) the training job was created with + sagemaker_session (sagemaker.session.Session): A sagemaker Session to pass to the estimator. + + Returns: An instance of the calling Estimator Class. + + """ + raise NotImplementedError() + + @classmethod + def attach(cls, training_job_name, sagemaker_session=None, job_details=None): + """Attach to an existing training job. + + Create an Estimator bound to an existing training job, each subclass is responsible to implement + ``_prepare_init_params_from_job_description()`` as this method delegates the actual conversion of a training + job description to the arguments that the class constructor expects. After attaching, if the training job has a + Complete status, it can be ``deploy()`` ed to create a SageMaker Endpoint and return a ``Predictor``. + + If the training job is in progress, attach will block and display log messages + from the training job, until the training job completes. + + Args: + training_job_name (str): The name of the training job to attach to. + sagemaker_session (sagemaker.session.Session): Session object which manages interactions with + Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one + using the default AWS configuration chain. + + Examples: + >>> my_estimator.fit(wait=False) + >>> training_job_name = my_estimator.latest_training_job.name + Later on: + >>> attached_estimator = Estimator.attach(training_job_name) + >>> attached_estimator.deploy() + + Returns: + Instance of the calling ``Estimator`` Class with the attached training job. + """ + sagemaker_session = sagemaker_session or Session() + + job_details = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=training_job_name) + init_params = cls._prepare_init_params_from_job_description(job_details) + + estimator = cls(sagemaker_session=sagemaker_session, **init_params) + estimator.latest_training_job = _TrainingJob(sagemaker_session=sagemaker_session, + training_job_name=init_params['base_job_name']) + estimator.latest_training_job.wait() + return estimator def deploy(self, initial_instance_count, instance_type, endpoint_name=None, **kwargs): """Deploy the trained model to an Amazon SageMaker endpoint and return a ``sagemaker.RealTimePredictor`` object. @@ -202,21 +254,33 @@ def create_model(self, **kwargs): """ pass - @staticmethod - def _prepare_estimator_params_from_job_description(job_details): - estimator_params = dict() + @classmethod + def _prepare_init_params_from_job_description(cls, job_details): + """Convert the job description to init params that can be handled by the class constructor + + Args: + job_details: the returned job details from a describe_training_job API call. + + Returns: + dictionary: The transformed init_params + + """ + init_params = dict() - estimator_params['role'] = job_details['RoleArn'] - estimator_params['train_instance_count'] = job_details['ResourceConfig']['InstanceCount'] - estimator_params['train_instance_type'] = job_details['ResourceConfig']['InstanceType'] - estimator_params['train_volume_size'] = job_details['ResourceConfig']['VolumeSizeInGB'] - estimator_params['train_max_run'] = job_details['StoppingCondition']['MaxRuntimeInSeconds'] - estimator_params['input_mode'] = job_details['AlgorithmSpecification']['TrainingInputMode'] - estimator_params['base_job_name'] = job_details['TrainingJobName'] - estimator_params['output_path'] = job_details['OutputDataConfig']['S3OutputPath'] - estimator_params['output_kms_key'] = job_details['OutputDataConfig']['KmsKeyId'] + init_params['role'] = job_details['RoleArn'] + init_params['train_instance_count'] = job_details['ResourceConfig']['InstanceCount'] + init_params['train_instance_type'] = job_details['ResourceConfig']['InstanceType'] + init_params['train_volume_size'] = job_details['ResourceConfig']['VolumeSizeInGB'] + init_params['train_max_run'] = job_details['StoppingCondition']['MaxRuntimeInSeconds'] + init_params['input_mode'] = job_details['AlgorithmSpecification']['TrainingInputMode'] + init_params['base_job_name'] = job_details['TrainingJobName'] + init_params['output_path'] = job_details['OutputDataConfig']['S3OutputPath'] + init_params['output_kms_key'] = job_details['OutputDataConfig']['KmsKeyId'] - return estimator_params, job_details['HyperParameters'], job_details['AlgorithmSpecification']['TrainingImage'] + init_params['hyperparameters'] = job_details['HyperParameters'] + init_params['image'] = job_details['AlgorithmSpecification']['TrainingImage'] + + return init_params def delete_endpoint(self): """Delete an Amazon SageMaker ``Endpoint``. @@ -333,7 +397,8 @@ class Estimator(EstimatorBase): def __init__(self, image_name, role, train_instance_count, train_instance_type, train_volume_size=30, train_max_run=24 * 60 * 60, input_mode='File', - output_path=None, output_kms_key=None, base_job_name=None, sagemaker_session=None): + output_path=None, output_kms_key=None, base_job_name=None, sagemaker_session=None, + hyperparameters=None): """Initialize an ``Estimator`` instance. Args: @@ -365,9 +430,10 @@ def __init__(self, image_name, role, train_instance_count, train_instance_type, sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. + hyperparameters (dict): Dictionary containing the hyperparameters to initialize this estimator with. """ self.image_name = image_name - self.hyperparam_dict = {} + self.hyperparam_dict = hyperparameters.copy() if hyperparameters else {} super(Estimator, self).__init__(role, train_instance_count, train_instance_type, train_volume_size, train_max_run, input_mode, output_path, output_kms_key, base_job_name, sagemaker_session) @@ -422,6 +488,22 @@ def predict_wrapper(endpoint, session): return Model(self.model_data, image or self.train_image(), self.role, sagemaker_session=self.sagemaker_session, predictor_cls=predictor_cls, **kwargs) + @classmethod + def _prepare_init_params_from_job_description(cls, job_details): + """Convert the job description to init params that can be handled by the class constructor + + Args: + job_details: the returned job details from a describe_training_job API call. + + Returns: + dictionary: The transformed init_params + + """ + init_params = super(Estimator, cls)._prepare_init_params_from_job_description(job_details) + + init_params['image_name'] = init_params.pop('image') + return init_params + class Framework(EstimatorBase): """Base class that cannot be instantiated directly. @@ -528,12 +610,37 @@ def hyperparameters(self): return self._json_encode_hyperparameters(self._hyperparameters) @classmethod - def attach(cls, training_job_name, sagemaker_session=None, **kwargs): + def _prepare_init_params_from_job_description(cls, job_details): + """Convert the job description to init params that can be handled by the class constructor + + Args: + job_details: the returned job details from a describe_training_job API call. + + Returns: + dictionary: The transformed init_params + + """ + init_params = super(Framework, cls)._prepare_init_params_from_job_description(job_details) + + init_params['entry_point'] = json.loads(init_params['hyperparameters'].get(SCRIPT_PARAM_NAME)) + init_params['source_dir'] = json.loads(init_params['hyperparameters'].get(DIR_PARAM_NAME)) + init_params['enable_cloudwatch_metrics'] = json.loads( + init_params['hyperparameters'].get(CLOUDWATCH_METRICS_PARAM_NAME)) + init_params['container_log_level'] = json.loads( + init_params['hyperparameters'].get(CONTAINER_LOG_LEVEL_PARAM_NAME)) + + init_params['hyperparameters'] = {k: json.loads(v) for k, v in init_params['hyperparameters'].items()} + + return init_params + + @classmethod + def attach(cls, training_job_name, sagemaker_session=None): """Attach to an existing training job. - Create an Estimator bound to an existing training job. After attaching, if - the training job has a Complete status, it can be ``deploy()`` ed to create - a SageMaker Endpoint and return a ``Predictor``. + Create an Estimator bound to an existing training job, each subclass is responsible to implement + ``_prepare_init_params_from_job_description()`` as this method delegates the actual conversion of a training + job description to the arguments that the class constructor expects. After attaching, if the training job has a + Complete status, it can be ``deploy()`` ed to create a SageMaker Endpoint and return a ``Predictor``. If the training job is in progress, attach will block and display log messages from the training job, until the training job completes. @@ -543,41 +650,18 @@ def attach(cls, training_job_name, sagemaker_session=None, **kwargs): sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. - **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Estimator` constructor. + + Examples: + >>> my_estimator.fit(wait=False) + >>> training_job_name = my_estimator.latest_training_job.name + Later on: + >>> attached_estimator = Estimator.attach(training_job_name) + >>> attached_estimator.deploy() Returns: - sagemaker.estimator.Framework: ``Estimator`` with the attached training job. + Instance of the calling ``Estimator`` Class with the attached training job. """ - sagemaker_session = sagemaker_session or Session() - - if training_job_name is not None: - job_details = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=training_job_name) - init_params, hp, _ = cls._prepare_estimator_params_from_job_description(job_details) - - else: - # this case is only valid when called from inheriting class and then the class must declare framework - if not hasattr(cls, '__framework_name__'): - raise ValueError('must specify training_job name') - init_params = dict(kwargs) - hp = init_params.pop('hyperparameters') - - # parameters for framework classes - framework_init_params = dict() - framework_init_params['entry_point'] = json.loads(hp.get(SCRIPT_PARAM_NAME)) - framework_init_params['source_dir'] = json.loads(hp.get(DIR_PARAM_NAME)) - framework_init_params['enable_cloudwatch_metrics'] = json.loads(hp.get(CLOUDWATCH_METRICS_PARAM_NAME)) - framework_init_params['container_log_level'] = json.loads(hp.get(CONTAINER_LOG_LEVEL_PARAM_NAME)) - - # drop json and remove other SageMaker specific additions - hyperparameters = {entry: json.loads(hp[entry]) for entry in hp} - framework_init_params['hyperparameters'] = hyperparameters - - init_params.update(framework_init_params) - - estimator = cls(sagemaker_session=sagemaker_session, **init_params) - estimator.latest_training_job = _TrainingJob(sagemaker_session=sagemaker_session, - training_job_name=init_params['base_job_name']) - estimator.latest_training_job.wait() + estimator = super(Framework, cls).attach(training_job_name, sagemaker_session) estimator.uploaded_code = UploadedCode(estimator.source_dir, estimator.entry_point) return estimator diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index e2c19c65d3..7d39c63119 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -28,26 +28,28 @@ """ -def create_image_uri(region, framework, instance_type, py_version='py2', tag='1.0', account='520713654638'): +def create_image_uri(region, framework, instance_type, framework_version, py_version, account='520713654638'): """Return the ECR URI of an image. Args: region (str): AWS region where the image is uploaded. framework (str): framework used by the image. instance_type (str): EC2 instance type. Used to determine whether to use the CPU image or GPU image. - py_version (str): Python version. (default: 'py2') - tag (str): ECR image tag, which denotes the image version. (default: '1.0') + framework_version (str): The version of the framework. + py_version (str): Python version. One of 'py2' or 'py3'. account (str): AWS account that contains the image. (default: '520713654638') Returns: str: The appropriate image URI based on the given parameters. """ - device_version = 'cpu' - # Instance types that start with G, P are GPU powered: https://aws.amazon.com/ec2/instance-types/ + device_type = 'cpu' + # Instance types that start with G, P are GPU powered: https://aws.amazon.com/sagemaker/pricing/instance-types/ if instance_type[3] in ['g', 'p']: - device_version = 'gpu' + device_type = 'gpu' + + tag = "{}-{}-{}".format(framework_version, device_type, py_version) return "{}.dkr.ecr.{}.amazonaws.com/sagemaker-{}-{}-{}:{}" \ - .format(account, region, framework, py_version, device_version, tag) + .format(account, region, framework, py_version, device_type, tag) def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory): @@ -112,21 +114,37 @@ def framework_name_from_image(image_name): tuple: A tuple containing: str: The framework name str: The Python version + str: The image tag """ # image name format: .dkr.ecr..amazonaws.com/sagemaker---: - sagemaker_pattern = re.compile('^(\d+)(\.)dkr(\.)ecr(\.)(.+)(\.)amazonaws.com(/)(.*)(:)(.*)$') + sagemaker_pattern = re.compile('^(\d+)(\.)dkr(\.)ecr(\.)(.+)(\.)amazonaws.com(/)(.*:.*)$') sagemaker_match = sagemaker_pattern.match(image_name) if sagemaker_match is None: - return None, None + return None, None, None else: - # extract framework and python version - name_pattern = re.compile('^sagemaker-(tensorflow|mxnet)-(py2|py3)-(cpu|gpu)$') + # extract framework, python version and image tag + name_pattern = re.compile('^sagemaker-(tensorflow|mxnet)-(py2|py3)-(cpu|gpu):(.*)$') + name_match = name_pattern.match(sagemaker_match.group(8)) if name_match is None: - return None, None + return None, None, None else: - return name_match.group(1), name_match.group(2) + return name_match.group(1), name_match.group(2), name_match.group(4) + + +def framework_version_from_tag(image_tag): + """Extract the framework version from the image tag. + + Args: + image_tag (str): Image tag, which should take the form '--' + + Returns: + str: The framework version. + """ + tag_pattern = re.compile('^(.*)-(cpu|gpu)-(py2|py3)$') + tag_match = tag_pattern.match(image_tag) + return None if tag_match is None else tag_match.group(1) def parse_s3_url(url): diff --git a/src/sagemaker/mxnet/__init__.py b/src/sagemaker/mxnet/__init__.py index b0c1db825a..0bfe3fb16b 100644 --- a/src/sagemaker/mxnet/__init__.py +++ b/src/sagemaker/mxnet/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -14,5 +14,3 @@ from sagemaker.mxnet.model import MXNetModel, MXNetPredictor __all__ = [MXNet, MXNetModel, MXNetPredictor] - -DOCKER_TAG = "1.0" diff --git a/src/sagemaker/mxnet/defaults.py b/src/sagemaker/mxnet/defaults.py new file mode 100644 index 0000000000..9ea27c56de --- /dev/null +++ b/src/sagemaker/mxnet/defaults.py @@ -0,0 +1,13 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +MXNET_VERSION = '1.0' diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index b41975345c..42cf12af36 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -10,11 +10,10 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -import sagemaker from sagemaker.estimator import Framework -from sagemaker.fw_utils import create_image_uri, framework_name_from_image +from sagemaker.fw_utils import create_image_uri, framework_name_from_image, framework_version_from_tag +from sagemaker.mxnet.defaults import MXNET_VERSION from sagemaker.mxnet.model import MXNetModel -from sagemaker.session import Session class MXNet(Framework): @@ -22,7 +21,8 @@ class MXNet(Framework): __framework_name__ = "mxnet" - def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_version='py2', **kwargs): + def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_version='py2', + framework_version=MXNET_VERSION, **kwargs): """ This ``Estimator`` executes an MXNet script in a managed MXNet execution environment, within a SageMaker Training Job. The managed MXNet environment is an Amazon-built Docker container that executes functions @@ -48,10 +48,13 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_versio to convert them before training. py_version (str): Python version you want to use for executing your model training code (default: 'py2'). One of 'py2' or 'py3'. + framework_version (str): MXNet version you want to use for executing your model training code. + List of supported versions https://github.com/aws/sagemaker-python-sdk#mxnet-sagemaker-estimators **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor. """ super(MXNet, self).__init__(entry_point, source_dir, hyperparameters, **kwargs) self.py_version = py_version + self.framework_version = framework_version def train_image(self): """Return the Docker image to use for training. @@ -63,7 +66,8 @@ def train_image(self): str: The URI of the Docker image. """ return create_image_uri(self.sagemaker_session.boto_session.region_name, self.__framework_name__, - self.train_instance_type, py_version=self.py_version, tag=sagemaker.mxnet.DOCKER_TAG) + self.train_instance_type, framework_version=self.framework_version, + py_version=self.py_version) def create_model(self, model_server_workers=None): """Create a SageMaker ``MXNetModel`` object that can be deployed to an ``Endpoint``. @@ -79,46 +83,34 @@ def create_model(self, model_server_workers=None): return MXNetModel(self.model_data, self.role, self.entry_point, source_dir=self.source_dir, enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, code_location=self.code_location, - py_version=self.py_version, model_server_workers=model_server_workers, - sagemaker_session=self.sagemaker_session) + py_version=self.py_version, framework_version=self.framework_version, + model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session) @classmethod - def attach(cls, training_job_name, sagemaker_session=None): - """Attach to an existing training job. - - Create an ``Estimator`` bound to an existing training job. After attaching, if - the training job is in a Complete status, it can be ``deploy``ed to create - a SageMaker ``Endpoint`` and return a ``Predictor``. - - If the training job is in progress, attach will block and display log messages - from the training job, until the training job completes. + def _prepare_init_params_from_job_description(cls, job_details): + """Convert the job description to init params that can be handled by the class constructor Args: - training_job_name (str): The name of the training job to attach to. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one - using the default AWS configuration chain. + job_details: the returned job details from a describe_training_job API call. Returns: - sagemaker.mxnet.estimator.MXNet: ``Estimator`` with the attached training job. + dictionary: The transformed init_params - Raises: - ValueError: If `training_job_name` is None or the image name does not match the framework. """ - sagemaker_session = sagemaker_session or Session() - - if training_job_name is None: - raise ValueError("must specify training_job name") + init_params = super(MXNet, cls)._prepare_init_params_from_job_description(job_details) + framework, py_version, tag = framework_name_from_image(init_params.pop('image')) - job_details = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=training_job_name) - init_params, hp, image = cls._prepare_estimator_params_from_job_description(job_details) + init_params['py_version'] = py_version - init_params.update({'hyperparameters': hp}) + # We switched image tagging scheme from regular image version (e.g. '1.0') to more expressive + # containing framework version, device type and python version (e.g. '0.12-gpu-py2'). + # For backward compatibility map deprecated image tag '1.0' to a '0.12' framework version + # otherwise extract framework version from the tag itself. + init_params['framework_version'] = '0.12' if tag == '1.0' else framework_version_from_tag(tag) - framework, py_version = framework_name_from_image(image) - init_params.update({'py_version': py_version}) + training_job_name = init_params['base_job_name'] if framework != cls.__framework_name__: raise ValueError("Training job: {} didn't use image for requested framework".format(training_job_name)) - return super(MXNet, cls).attach(training_job_name=None, sagemaker_session=sagemaker_session, **init_params) + return init_params diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py index 931b50676a..f0bd7b94f5 100644 --- a/src/sagemaker/mxnet/model.py +++ b/src/sagemaker/mxnet/model.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -13,6 +13,7 @@ import sagemaker from sagemaker.fw_utils import create_image_uri from sagemaker.model import FrameworkModel, MODEL_SERVER_WORKERS_PARAM_NAME +from sagemaker.mxnet.defaults import MXNET_VERSION from sagemaker.predictor import RealTimePredictor, json_serializer, json_deserializer from sagemaker.utils import name_from_image @@ -20,7 +21,8 @@ class MXNetPredictor(RealTimePredictor): """A RealTimePredictor for inference against MXNet Endpoints. - This is able to serialize Python lists and numpy arrays to multidimensional tensors for MXNet inference.""" + This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for MXNet + inference.""" def __init__(self, endpoint_name, sagemaker_session=None): """Initialize an ``MXNetPredictor``. @@ -39,7 +41,7 @@ class MXNetModel(FrameworkModel): __framework_name__ = 'mxnet' - def __init__(self, model_data, role, entry_point, image=None, py_version='py2', + def __init__(self, model_data, role, entry_point, image=None, py_version='py2', framework_version=MXNET_VERSION, predictor_cls=MXNetPredictor, model_server_workers=None, **kwargs): """Initialize an MXNetModel. @@ -53,6 +55,7 @@ def __init__(self, model_data, role, entry_point, image=None, py_version='py2', as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5. image (str): A Docker image URI (default: None). If not specified, a default image for MXNet will be used. py_version (str): Python version you want to use for executing your model training code (default: 'py2'). + framework_version (str): MXNet version you want to use for executing your model training code. predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of invoking this function on the created endpoint name. @@ -63,6 +66,7 @@ def __init__(self, model_data, role, entry_point, image=None, py_version='py2', super(MXNetModel, self).__init__(model_data, image, role, entry_point, predictor_cls=predictor_cls, **kwargs) self.py_version = py_version + self.framework_version = framework_version self.model_server_workers = model_server_workers def prepare_container_def(self, instance_type): @@ -77,8 +81,8 @@ def prepare_container_def(self, instance_type): deploy_image = self.image if not deploy_image: region_name = self.sagemaker_session.boto_session.region_name - deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type, self.py_version, - sagemaker.mxnet.DOCKER_TAG) + deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type, + self.framework_version, self.py_version) deploy_key_prefix = self.key_prefix or self.name or name_from_image(deploy_image) self._upload_code(deploy_key_prefix) deploy_env = dict(self.env) diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py index 7afc3dbbc8..2e3ed0101b 100644 --- a/src/sagemaker/predictor.py +++ b/src/sagemaker/predictor.py @@ -240,7 +240,12 @@ def __call__(self, data): if isinstance(data, list): if not len(data) > 0: raise ValueError("empty array can't be serialized") - return _json_serialize_python_array(data) + return _json_serialize_python_object(data) + + if isinstance(data, dict): + if not len(data.keys()) > 0: + raise ValueError("empty dictionary can't be serialized") + return _json_serialize_python_object(data) # files and buffers if hasattr(data, 'read'): @@ -254,10 +259,10 @@ def __call__(self, data): def _json_serialize_numpy_array(data): # numpy arrays can't be serialized but we know they have uniform type - return _json_serialize_python_array(data.tolist()) + return _json_serialize_python_object(data.tolist()) -def _json_serialize_python_array(data): +def _json_serialize_python_object(data): return _json_serialize_object(data) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 0a175825b3..cf7ace5c93 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -195,17 +195,9 @@ def train(self, image, input_mode, input_config, role, job_name, output_config, a directory in the Docker container. * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. - input_config (str or dict or sagemaker.session.s3_input): Information about the training data. - This can be one of three types: - - * (str) - the S3 location where training data is saved. - * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for - training data, you can specify a dict mapping channel names - to strings or :func:`~sagemaker.session.s3_input` objects. - * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide - additional information about the training dataset. See :func:`sagemaker.session.s3_input` - for full details. - + input_config (list): A list of Channel objects. Each channel is a named input source. Please refer to + the format details described: + https://botocore.readthedocs.io/en/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. You must grant sufficient permissions to this role. @@ -522,8 +514,8 @@ def endpoint_from_production_variants(self, name, production_variants, wait=True def expand_role(self, role): """Expand an IAM role name into an ARN. - If the role is already in the form of an ARN, then the role is simply returned. Otherwise, the role - is formatted as an ARN, using the current account as the IAM role's AWS account. + If the role is already in the form of an ARN, then the role is simply returned. Otherwise we retrieve the full + ARN and return it. Args: role (str): An AWS IAM role (either name or full ARN). @@ -534,8 +526,7 @@ def expand_role(self, role): if '/' in role: return role else: - account = self.boto_session.client('sts').get_caller_identity()['Account'] - return 'arn:aws:iam::{}:role/{}'.format(account, role) + return boto3.resource("iam").Role(role).arn def get_caller_identity_arn(self): """Returns the ARN user or role whose credentials are used to call the API. @@ -654,6 +645,9 @@ def logs_for_job(self, job_name, wait=False, poll=5): # noqa: C901 - suppress c if dot: print() print('===== Job Complete =====') + # Customers are not billed for hardware provisioning, so billable time is less than total time + billable_time = (description['TrainingEndTime'] - description['TrainingStartTime']) * instance_count + print('Billable seconds:', int(billable_time.total_seconds()) + 1) def container_def(image, model_data_url=None, env=None): diff --git a/src/sagemaker/tensorflow/__init__.py b/src/sagemaker/tensorflow/__init__.py index 8f8940cdd5..e6e93d4b01 100644 --- a/src/sagemaker/tensorflow/__init__.py +++ b/src/sagemaker/tensorflow/__init__.py @@ -28,6 +28,4 @@ from sagemaker.tensorflow.estimator import TensorFlow # noqa: E402 from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor # noqa: E402 -DOCKER_TAG = "1.0" - __all__ = [TensorFlow, TensorFlowModel, TensorFlowPredictor] diff --git a/src/sagemaker/tensorflow/defaults.py b/src/sagemaker/tensorflow/defaults.py new file mode 100644 index 0000000000..8f6baf6ff1 --- /dev/null +++ b/src/sagemaker/tensorflow/defaults.py @@ -0,0 +1,13 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +TF_VERSION = '1.5' diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 1545b57bf2..dd4624d3f1 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -11,16 +11,15 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import logging +import os import subprocess import tempfile import threading -import os - -import sagemaker.tensorflow from sagemaker.estimator import Framework -from sagemaker.fw_utils import create_image_uri, framework_name_from_image -from sagemaker.session import Session +from sagemaker.fw_utils import create_image_uri, framework_name_from_image, framework_version_from_tag + +from sagemaker.tensorflow.defaults import TF_VERSION from sagemaker.tensorflow.model import TensorFlowModel logging.basicConfig() @@ -136,7 +135,8 @@ class TensorFlow(Framework): __framework_name__ = 'tensorflow' - def __init__(self, training_steps=None, evaluation_steps=None, checkpoint_path=None, py_version="py2", **kwargs): + def __init__(self, training_steps=None, evaluation_steps=None, checkpoint_path=None, py_version="py2", + framework_version=TF_VERSION, **kwargs): """Initialize an ``TensorFlow`` estimator. Args: training_steps (int): Perform this many steps of training. `None`, the default means train forever. @@ -145,11 +145,14 @@ def __init__(self, training_steps=None, evaluation_steps=None, checkpoint_path=N checkpoint_path (str): Identifies S3 location where checkpoint data during model training can be saved (default: None). For distributed model training, this parameter is required. py_version (str): Python version you want to use for executing your model training code (default: 'py2'). + framework_version (str): TensorFlow version you want to use for executing your model training code. + List of supported versions https://github.com/aws/sagemaker-python-sdk#tensorflow-sagemaker-estimators **kwargs: Additional kwargs passed to the Framework constructor. """ super(TensorFlow, self).__init__(**kwargs) self.checkpoint_path = checkpoint_path self.py_version = py_version + self.framework_version = framework_version self.training_steps = training_steps self.evaluation_steps = evaluation_steps @@ -180,6 +183,9 @@ def fit(self, inputs, wait=True, logs=True, job_name=None, run_tensorboard_local def fit_super(): super(TensorFlow, self).fit(inputs, wait, logs, job_name) + if run_tensorboard_locally and wait is False: + raise ValueError("Tensorboard is not supported with async fit") + if run_tensorboard_locally: tensorboard = Tensorboard(self) tensorboard.validate_requirements() @@ -193,48 +199,38 @@ def fit_super(): fit_super() @classmethod - def attach(cls, training_job_name, sagemaker_session=None): - """Attach to an existing training job. - - Create an ``Estimator`` bound to an existing training job. After attaching, if - the training job is in a Complete status, it can be ``deploy``ed to create - a SageMaker ``Endpoint`` and return a ``Predictor``. - - If the training job is in progress, attach will block and display log messages - from the training job, until the training job completes. + def _prepare_init_params_from_job_description(cls, job_details): + """Convert the job description to init params that can be handled by the class constructor Args: - training_job_name (str): The name of the training job to attach to. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one - using the default AWS configuration chain. + job_details: the returned job details from a describe_training_job API call. Returns: - sagemaker.tensorflow.estimator.TensorFlow: ``Estimator`` with the attached training job. + dictionary: The transformed init_params - Raises: - ValueError: If `training_job_name` is None or the image name does not match the framework. """ - sagemaker_session = sagemaker_session or Session() - - if training_job_name is None: - raise ValueError("must specify training_job name") - - job_details = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=training_job_name) - init_params, hp, image = cls._prepare_estimator_params_from_job_description(job_details) + init_params = super(TensorFlow, cls)._prepare_init_params_from_job_description(job_details) - updated_params = cls._update_init_params(hp, ['checkpoint_path', 'training_steps', 'evaluation_steps']) - init_params.update(updated_params) + # Move some of the tensorflow specific init params from hyperparameters into the main init params. + for argument in ['checkpoint_path', 'training_steps', 'evaluation_steps']: + value = init_params['hyperparameters'].pop(argument, None) + if value is not None: + init_params[argument] = value - init_params.update({'hyperparameters': hp}) + framework, py_version, tag = framework_name_from_image(init_params.pop('image')) + init_params['py_version'] = py_version - framework, py_version = framework_name_from_image(image) - init_params.update({'py_version': py_version}) + # We switched image tagging scheme from regular image version (e.g. '1.0') to more expressive + # containing framework version, device type and python version (e.g. '1.5-gpu-py2'). + # For backward compatibility map deprecated image tag '1.0' to a '1.4' framework version + # otherwise extract framework version from the tag itself. + init_params['framework_version'] = '1.4' if tag == '1.0' else framework_version_from_tag(tag) + training_job_name = init_params['base_job_name'] if framework != cls.__framework_name__: raise ValueError("Training job: {} didn't use image for requested framework".format(training_job_name)) - return super(TensorFlow, cls).attach(training_job_name=None, sagemaker_session=sagemaker_session, **init_params) + return init_params def train_image(self): """Return the Docker image to use for training. @@ -246,8 +242,7 @@ def train_image(self): str: The URI of the Docker image. """ return create_image_uri(self.sagemaker_session.boto_session.region_name, self.__framework_name__, - self.train_instance_type, py_version=self.py_version, - tag=sagemaker.tensorflow.DOCKER_TAG) + self.train_instance_type, self.framework_version, py_version=self.py_version) def create_model(self, model_server_workers=None): """Create a SageMaker ``TensorFlowModel`` object that can be deployed to an ``Endpoint``. @@ -263,8 +258,8 @@ def create_model(self, model_server_workers=None): return TensorFlowModel(self.model_data, self.role, self.entry_point, source_dir=self.source_dir, enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, code_location=self.code_location, - py_version=self.py_version, model_server_workers=model_server_workers, - sagemaker_session=self.sagemaker_session) + py_version=self.py_version, framework_version=self.framework_version, + model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session) def hyperparameters(self): """Return hyperparameters used by your custom TensorFlow code during model training.""" diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py index 20a47188e2..e4d2a5811c 100644 --- a/src/sagemaker/tensorflow/model.py +++ b/src/sagemaker/tensorflow/model.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -14,13 +14,16 @@ from sagemaker.fw_utils import create_image_uri from sagemaker.model import FrameworkModel, MODEL_SERVER_WORKERS_PARAM_NAME from sagemaker.predictor import RealTimePredictor +from sagemaker.tensorflow.defaults import TF_VERSION from sagemaker.tensorflow.predictor import tf_json_serializer, tf_json_deserializer from sagemaker.utils import name_from_image class TensorFlowPredictor(RealTimePredictor): - """A ``RealTimePredictor`` for inference against MXNet ``Endpoint``s.""" + """A ``RealTimePredictor`` for inference against TensorFlow ``Endpoint``s. + This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for MXNet + inference""" def __init__(self, endpoint_name, sagemaker_session=None): """Initialize an ``TensorFlowPredictor``. @@ -38,7 +41,7 @@ class TensorFlowModel(FrameworkModel): __framework_name__ = 'tensorflow' - def __init__(self, model_data, role, entry_point, image=None, py_version='py2', + def __init__(self, model_data, role, entry_point, image=None, py_version='py2', framework_version=TF_VERSION, predictor_cls=TensorFlowPredictor, model_server_workers=None, **kwargs): """Initialize an TensorFlowModel. @@ -53,6 +56,7 @@ def __init__(self, model_data, role, entry_point, image=None, py_version='py2', image (str): A Docker image URI (default: None). If not specified, a default image for TensorFlow will be used. py_version (str): Python version you want to use for executing your model training code (default: 'py2'). + framework_version (str): TensorFlow version you want to use for executing your model training code. predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of invoking this function on the created endpoint name. @@ -63,6 +67,7 @@ def __init__(self, model_data, role, entry_point, image=None, py_version='py2', super(TensorFlowModel, self).__init__(model_data, image, role, entry_point, predictor_cls=predictor_cls, **kwargs) self.py_version = py_version + self.framework_version = framework_version self.model_server_workers = model_server_workers def prepare_container_def(self, instance_type): @@ -79,8 +84,8 @@ def prepare_container_def(self, instance_type): deploy_image = self.image if not deploy_image: region_name = self.sagemaker_session.boto_session.region_name - deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type, self.py_version, - sagemaker.tensorflow.DOCKER_TAG) + deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type, + self.framework_version, self.py_version) deploy_key_prefix = self.key_prefix or self.name or name_from_image(deploy_image) self._upload_code(deploy_key_prefix) deploy_env = dict(self.env) diff --git a/src/sagemaker/tensorflow/predictor.py b/src/sagemaker/tensorflow/predictor.py index 6d326c56ce..1b6dd260c6 100644 --- a/src/sagemaker/tensorflow/predictor.py +++ b/src/sagemaker/tensorflow/predictor.py @@ -32,7 +32,7 @@ def __init__(self): self.content_type = CONTENT_TYPE_OCTET_STREAM def __call__(self, data): - # isintance does not work here because a same protobuf message can be imported from a different module. + # isinstance does not work here because a same protobuf message can be imported from a different module. # for example sagemaker.tensorflow.tensorflow_serving.regression_pb2 and tensorflow_serving.apis.regression_pb2 predict_type = data.__class__.__name__ diff --git a/tests/component/__init__.py b/tests/component/__init__.py new file mode 100644 index 0000000000..e1e6f4571f --- /dev/null +++ b/tests/component/__init__.py @@ -0,0 +1,12 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. diff --git a/tests/component/test_mxnet_estimator.py b/tests/component/test_mxnet_estimator.py new file mode 100644 index 0000000000..2a6bb088d3 --- /dev/null +++ b/tests/component/test_mxnet_estimator.py @@ -0,0 +1,68 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import pytest +from mock import Mock +from sagemaker.mxnet import MXNet + + +SCRIPT = 'resnet_cifar_10.py' +TIMESTAMP = '2017-11-06-14:14:15.673' +TIME = 1510006209.073025 +BUCKET_NAME = 'mybucket' +INSTANCE_COUNT = 1 +INSTANCE_TYPE_GPU = 'ml.p2.xlarge' +INSTANCE_TYPE_CPU = 'ml.m4.xlarge' +CPU_IMAGE_NAME = 'sagemaker-mxnet-py2-cpu' +GPU_IMAGE_NAME = 'sagemaker-mxnet-py2-gpu' +REGION = 'us-west-2' +IMAGE_URI_FORMAT_STRING = "520713654638.dkr.ecr.{}.amazonaws.com/{}:{}-{}-{}" +REGION = 'us-west-2' +ROLE = 'SagemakerRole' +SOURCE_DIR = 's3://fefergerger' + + +@pytest.fixture() +def sagemaker_session(): + boto_mock = Mock(name='boto_session', region_name=REGION) + ims = Mock(name='sagemaker_session', boto_session=boto_mock) + ims.default_bucket = Mock(name='default_bucket', return_value=BUCKET_NAME) + ims.expand_role = Mock(name="expand_role", return_value=ROLE) + ims.sagemaker_client.describe_training_job = Mock(return_value={'ModelArtifacts': + {'S3ModelArtifacts': 's3://m/m.tar.gz'}}) + return ims + + +# Test that we pass all necessary fields from estimator to the session when we call deploy +def test_deploy(sagemaker_session, tf_version): + estimator = MXNet(entry_point=SCRIPT, source_dir=SOURCE_DIR, role=ROLE, + framework_version=tf_version, + train_instance_count=2, train_instance_type=INSTANCE_TYPE_GPU, + sagemaker_session=sagemaker_session, + base_job_name='test-cifar') + + estimator.fit('s3://mybucket/train') + print('job succeeded: {}'.format(estimator.latest_training_job.name)) + + estimator.deploy(initial_instance_count=1, instance_type=INSTANCE_TYPE_CPU) + image = IMAGE_URI_FORMAT_STRING.format(REGION, CPU_IMAGE_NAME, tf_version, 'cpu', 'py2') + sagemaker_session.create_model.assert_called_with( + estimator._current_job_name, + ROLE, + {'Environment': + {'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', + 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', + 'SAGEMAKER_SUBMIT_DIRECTORY': SOURCE_DIR, + 'SAGEMAKER_REGION': REGION, + 'SAGEMAKER_PROGRAM': SCRIPT}, + 'Image': image, + 'ModelDataUrl': 's3://m/m.tar.gz'}) diff --git a/tests/component/test_tf_estimator.py b/tests/component/test_tf_estimator.py new file mode 100644 index 0000000000..5d718558a6 --- /dev/null +++ b/tests/component/test_tf_estimator.py @@ -0,0 +1,68 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import pytest +from mock import Mock +from sagemaker.tensorflow import TensorFlow + + +SCRIPT = 'resnet_cifar_10.py' +TIMESTAMP = '2017-11-06-14:14:15.673' +TIME = 1510006209.073025 +BUCKET_NAME = 'mybucket' +INSTANCE_COUNT = 1 +INSTANCE_TYPE_GPU = 'ml.p2.xlarge' +INSTANCE_TYPE_CPU = 'ml.m4.xlarge' +CPU_IMAGE_NAME = 'sagemaker-tensorflow-py2-cpu' +GPU_IMAGE_NAME = 'sagemaker-tensorflow-py2-gpu' +REGION = 'us-west-2' +IMAGE_URI_FORMAT_STRING = "520713654638.dkr.ecr.{}.amazonaws.com/{}:{}-{}-{}" +REGION = 'us-west-2' +ROLE = 'SagemakerRole' +SOURCE_DIR = 's3://fefergerger' + + +@pytest.fixture() +def sagemaker_session(): + boto_mock = Mock(name='boto_session', region_name=REGION) + ims = Mock(name='sagemaker_session', boto_session=boto_mock) + ims.default_bucket = Mock(name='default_bucket', return_value=BUCKET_NAME) + ims.expand_role = Mock(name="expand_role", return_value=ROLE) + ims.sagemaker_client.describe_training_job = Mock(return_value={'ModelArtifacts': + {'S3ModelArtifacts': 's3://m/m.tar.gz'}}) + return ims + + +# Test that we pass all necessary fields from estimator to the session when we call deploy +def test_deploy(sagemaker_session, tf_version): + estimator = TensorFlow(entry_point=SCRIPT, source_dir=SOURCE_DIR, role=ROLE, + framework_version=tf_version, + train_instance_count=2, train_instance_type=INSTANCE_TYPE_CPU, + sagemaker_session=sagemaker_session, + base_job_name='test-cifar') + + estimator.fit('s3://mybucket/train') + print('job succeeded: {}'.format(estimator.latest_training_job.name)) + + estimator.deploy(initial_instance_count=1, instance_type=INSTANCE_TYPE_CPU) + image = IMAGE_URI_FORMAT_STRING.format(REGION, CPU_IMAGE_NAME, tf_version, 'cpu', 'py2') + sagemaker_session.create_model.assert_called_with( + estimator._current_job_name, + ROLE, + {'Environment': + {'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', + 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', + 'SAGEMAKER_SUBMIT_DIRECTORY': SOURCE_DIR, + 'SAGEMAKER_REGION': REGION, + 'SAGEMAKER_PROGRAM': SCRIPT}, + 'Image': image, + 'ModelDataUrl': 's3://m/m.tar.gz'}) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..5c79f39272 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,33 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import pytest + + +@pytest.fixture(scope='module', params=["1.4", "1.4.1", "1.5", "1.5.0"]) +def tf_version(request): + return request.param + + +@pytest.fixture(scope='module', params=["0.12", "0.12.1", "1.0", "1.0.0"]) +def mxnet_version(request): + return request.param + + +@pytest.fixture(scope='module', params=["1.4.1", "1.5.0"]) +def tf_full_version(request): + return request.param + + +@pytest.fixture(scope='module', params=["0.12.1", "1.0.0"]) +def mxnet_full_version(request): + return request.param diff --git a/tests/data/lda/nips-train_1.pbr b/tests/data/lda/nips-train_1.pbr new file mode 100644 index 0000000000..193cc98860 Binary files /dev/null and b/tests/data/lda/nips-train_1.pbr differ diff --git a/tests/data/ntm/nips-train_1.pbr b/tests/data/ntm/nips-train_1.pbr new file mode 100644 index 0000000000..193cc98860 Binary files /dev/null and b/tests/data/ntm/nips-train_1.pbr differ diff --git a/tests/integ/record_set.py b/tests/integ/record_set.py new file mode 100644 index 0000000000..587ed88d14 --- /dev/null +++ b/tests/integ/record_set.py @@ -0,0 +1,23 @@ +from six.moves.urllib.parse import urlparse + +from sagemaker.amazon.amazon_estimator import RecordSet +from sagemaker.utils import sagemaker_timestamp + + +def prepare_record_set_from_local_files(dir_path, destination, num_records, feature_dim, sagemaker_session): + """Build a :class:`~RecordSet` by pointing to local files. + + Args: + dir_path (string): Path to local directory from where the files shall be uploaded. + destination (string): S3 path to upload the file to. + num_records (int): Number of records in all the files + feature_dim (int): Number of features in the data set + sagemaker_session (sagemaker.session.Session): Session object to manage interactions with Amazon SageMaker APIs. + Returns: + RecordSet: A RecordSet specified by S3Prefix to to be used in training. + """ + key_prefix = urlparse(destination).path + key_prefix = key_prefix + '{}-{}'.format("testfiles", sagemaker_timestamp()) + key_prefix = key_prefix.lstrip('/') + uploaded_location = sagemaker_session.upload_data(path=dir_path, key_prefix=key_prefix) + return RecordSet(uploaded_location, num_records, feature_dim, s3_data_type='S3Prefix') diff --git a/tests/integ/test_byo_estimator.py b/tests/integ/test_byo_estimator.py index d0c1a18e07..71f3c86862 100644 --- a/tests/integ/test_byo_estimator.py +++ b/tests/integ/test_byo_estimator.py @@ -29,6 +29,13 @@ from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name +def fm_serializer(data): + js = {'instances': []} + for row in data: + js['instances'].append({'features': row.tolist()}) + return json.dumps(js) + + def test_byo_estimator(): """Use Factorization Machines algorithm as an example here. @@ -79,12 +86,6 @@ def test_byo_estimator(): endpoint_name = name_from_base('byo') - def fm_serializer(data): - js = {'instances': []} - for row in data: - js['instances'].append({'features': row.tolist()}) - return json.dumps(js) - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): model = estimator.create_model() predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) @@ -97,3 +98,61 @@ def fm_serializer(data): assert len(result['predictions']) == 10 for prediction in result['predictions']: assert prediction['score'] is not None + + +def test_async_byo_estimator(): + image_name = registry(REGION) + "/factorization-machines:1" + endpoint_name = name_from_base('byo') + training_job_name = "" + + with timeout(minutes=5): + sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) + data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') + pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} + + with gzip.open(data_path, 'rb') as f: + train_set, _, _ = pickle.load(f, **pickle_args) + + # take 100 examples for faster execution + vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32') + labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32') + + buf = io.BytesIO() + write_numpy_to_dense_tensor(buf, vectors, labels) + buf.seek(0) + + bucket = sagemaker_session.default_bucket() + prefix = 'test_byo_estimator' + key = 'recordio-pb-data' + boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf) + s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key) + + estimator = Estimator(image_name=image_name, + role='SageMakerRole', train_instance_count=1, + train_instance_type='ml.c4.xlarge', + sagemaker_session=sagemaker_session, base_job_name='test-byo') + + estimator.set_hyperparameters(num_factors=10, + feature_dim=784, + mini_batch_size=100, + predictor_type='binary_classifier') + + # training labels must be 'float32' + estimator.fit({'train': s3_train_data}, wait=False) + training_job_name = estimator.latest_training_job.name + + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=30): + estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) + model = estimator.create_model() + predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) + predictor.serializer = fm_serializer + predictor.content_type = 'application/json' + predictor.deserializer = sagemaker.predictor.json_deserializer + + result = predictor.predict(train_set[0][:10]) + + assert len(result['predictions']) == 10 + for prediction in result['predictions']: + assert prediction['score'] is not None + + assert estimator.train_image() == image_name diff --git a/tests/integ/test_factorization_machines.py b/tests/integ/test_factorization_machines.py index 76fbb93ac7..cc04ed8d6a 100644 --- a/tests/integ/test_factorization_machines.py +++ b/tests/integ/test_factorization_machines.py @@ -13,6 +13,7 @@ import gzip import pickle import sys +import time import boto3 import os @@ -53,3 +54,45 @@ def test_factorization_machines(): assert len(result) == 10 for record in result: assert record.label["score"] is not None + + +def test_async_factorization_machines(): + + training_job_name = "" + endpoint_name = name_from_base('factorizationMachines') + sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) + + with timeout(minutes=5): + + data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') + pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} + + # Load the data into memory as numpy arrays + with gzip.open(data_path, 'rb') as f: + train_set, _, _ = pickle.load(f, **pickle_args) + + fm = FactorizationMachines(role='SageMakerRole', train_instance_count=1, + train_instance_type='ml.c4.xlarge', + num_factors=10, predictor_type='regressor', + epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0 / 100, + sagemaker_session=sagemaker_session, base_job_name='test-fm') + + # training labels must be 'float32' + fm.fit(fm.record_set(train_set[0][:200], train_set[1][:200].astype('float32')), wait=False) + training_job_name = fm.latest_training_job.name + + print("Detached from training job. Will re-attach in 20 seconds") + time.sleep(20) + print("attaching now...") + + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + estimator = FactorizationMachines.attach(training_job_name=training_job_name, + sagemaker_session=sagemaker_session) + model = FactorizationMachinesModel(estimator.model_data, role='SageMakerRole', + sagemaker_session=sagemaker_session) + predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) + result = predictor.predict(train_set[0][:10]) + + assert len(result) == 10 + for record in result: + assert record.label["score"] is not None diff --git a/tests/integ/test_kmeans.py b/tests/integ/test_kmeans.py index 09780f69cd..bcaba3ce02 100644 --- a/tests/integ/test_kmeans.py +++ b/tests/integ/test_kmeans.py @@ -16,6 +16,7 @@ import boto3 import os +import time import sagemaker from sagemaker import KMeans, KMeansModel @@ -60,3 +61,49 @@ def test_kmeans(): for record in result: assert record.label["closest_cluster"] is not None assert record.label["distance_to_cluster"] is not None + + +def test_async_kmeans(): + + training_job_name = "" + endpoint_name = name_from_base('kmeans') + + with timeout(minutes=5): + sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) + data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') + pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} + + # Load the data into memory as numpy arrays + with gzip.open(data_path, 'rb') as f: + train_set, _, _ = pickle.load(f, **pickle_args) + + kmeans = KMeans(role='SageMakerRole', train_instance_count=1, + train_instance_type='ml.c4.xlarge', + k=10, sagemaker_session=sagemaker_session, base_job_name='test-kmeans') + + kmeans.init_method = 'random' + kmeans.max_iterators = 1 + kmeans.tol = 1 + kmeans.num_trials = 1 + kmeans.local_init_method = 'kmeans++' + kmeans.half_life_time_size = 1 + kmeans.epochs = 1 + kmeans.center_factor = 1 + + kmeans.fit(kmeans.record_set(train_set[0][:100]), wait=False) + training_job_name = kmeans.latest_training_job.name + + print("Detached from training job. Will re-attach in 20 seconds") + time.sleep(20) + print("attaching now...") + + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + estimator = KMeans.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) + model = KMeansModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) + predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) + result = predictor.predict(train_set[0][:10]) + + assert len(result) == 10 + for record in result: + assert record.label["closest_cluster"] is not None + assert record.label["distance_to_cluster"] is not None diff --git a/tests/integ/test_lda.py b/tests/integ/test_lda.py new file mode 100644 index 0000000000..5e7619796e --- /dev/null +++ b/tests/integ/test_lda.py @@ -0,0 +1,57 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import boto3 +import numpy as np +import os + +import sagemaker +from sagemaker import LDA, LDAModel +from sagemaker.amazon.common import read_records +from sagemaker.utils import name_from_base + +from tests.integ import DATA_DIR, REGION +from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name +from tests.integ.record_set import prepare_record_set_from_local_files + + +def test_lda(): + + with timeout(minutes=15): + sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) + data_path = os.path.join(DATA_DIR, 'lda') + data_filename = 'nips-train_1.pbr' + + with open(os.path.join(data_path, data_filename), 'rb') as f: + all_records = read_records(f) + + # all records must be same + feature_num = int(all_records[0].features['values'].float32_tensor.shape[0]) + + lda = LDA(role='SageMakerRole', train_instance_type='ml.c4.xlarge', num_topics=10, + sagemaker_session=sagemaker_session, base_job_name='test-lda') + + record_set = prepare_record_set_from_local_files(data_path, lda.data_location, + len(all_records), feature_num, sagemaker_session) + lda.fit(record_set, 100) + + endpoint_name = name_from_base('lda') + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + model = LDAModel(lda.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) + predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) + + predict_input = np.random.rand(1, feature_num) + result = predictor.predict(predict_input) + + assert len(result) == 1 + for record in result: + assert record.label["topic_mixture"] is not None diff --git a/tests/integ/test_linear_learner.py b/tests/integ/test_linear_learner.py index 31b9f506f3..1db3534b5b 100644 --- a/tests/integ/test_linear_learner.py +++ b/tests/integ/test_linear_learner.py @@ -14,12 +14,14 @@ import os import pickle import sys +import time import pytest # noqa import boto3 import numpy as np + import sagemaker from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerModel -from sagemaker.utils import name_from_base +from sagemaker.utils import name_from_base, sagemaker_timestamp from tests.integ import DATA_DIR, REGION from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @@ -42,7 +44,7 @@ def test_linear_learner(): ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', sagemaker_session=sagemaker_session) ll.binary_classifier_model_selection_criteria = 'accuracy' - ll.target_reacall = 0.5 + ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 @@ -70,13 +72,81 @@ def test_linear_learner(): ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False - ll.num_point_for_scala = 10000 + ll.num_point_for_scaler = 10000 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200])) endpoint_name = name_from_base('linear-learner') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): - model = LinearLearnerModel(ll.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) + predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) + + result = predictor.predict(train_set[0][0:100]) + assert len(result) == 100 + for record in result: + assert record.label["predicted_label"] is not None + assert record.label["score"] is not None + + +def test_async_linear_learner(): + + training_job_name = "" + endpoint_name = 'test-linear-learner-async-{}'.format(sagemaker_timestamp()) + sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) + + with timeout(minutes=5): + + data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') + pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} + + # Load the data into memory as numpy arrays + with gzip.open(data_path, 'rb') as f: + train_set, _, _ = pickle.load(f, **pickle_args) + + train_set[1][:100] = 1 + train_set[1][100:200] = 0 + train_set = train_set[0], train_set[1].astype(np.dtype('float32')) + + ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', + sagemaker_session=sagemaker_session) + ll.binary_classifier_model_selection_criteria = 'accuracy' + ll.target_recall = 0.5 + ll.target_precision = 0.5 + ll.positive_example_weight_mult = 0.1 + ll.epochs = 1 + ll.predictor_type = 'binary_classifier' + ll.use_bias = True + ll.num_models = 1 + ll.num_calibration_samples = 1 + ll.init_method = 'uniform' + ll.init_scale = 0.5 + ll.init_sigma = 0.2 + ll.init_bias = 5 + ll.optimizer = 'adam' + ll.loss = 'logistic' + ll.wd = 0.5 + ll.l1 = 0.5 + ll.momentum = 0.5 + ll.learning_rate = 0.1 + ll.beta_1 = 0.1 + ll.beta_2 = 0.1 + ll.use_lr_scheduler = True + ll.lr_scheduler_step = 2 + ll.lr_scheduler_factor = 0.5 + ll.lr_scheduler_minimum_lr = 0.1 + ll.normalize_data = False + ll.normalize_label = False + ll.unbias_data = True + ll.unbias_label = False + ll.num_point_for_scaler = 10000 + ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]), wait=False) + training_job_name = ll.latest_training_job.name + + print("Waiting to re-attach to the training job: %s" % training_job_name) + time.sleep(20) + + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + estimator = LinearLearner.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) + model = LinearLearnerModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][0:100]) diff --git a/tests/integ/test_mxnet_train.py b/tests/integ/test_mxnet_train.py index 94feb6e9e1..2508c6dd30 100644 --- a/tests/integ/test_mxnet_train.py +++ b/tests/integ/test_mxnet_train.py @@ -19,6 +19,7 @@ from sagemaker import Session from sagemaker.mxnet.estimator import MXNet from sagemaker.mxnet.model import MXNetModel +from sagemaker.utils import sagemaker_timestamp from tests.integ import DATA_DIR, REGION from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @@ -30,12 +31,12 @@ def sagemaker_session(): @pytest.fixture(scope='module') -def mxnet_training_job(sagemaker_session): +def mxnet_training_job(sagemaker_session, mxnet_full_version): with timeout(minutes=15): script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py') data_path = os.path.join(DATA_DIR, 'mxnet_mnist') - mx = MXNet(entry_point=script_path, role='SageMakerRole', + mx = MXNet(entry_point=script_path, role='SageMakerRole', framework_version=mxnet_full_version, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session) @@ -49,7 +50,7 @@ def mxnet_training_job(sagemaker_session): def test_attach_deploy(mxnet_training_job, sagemaker_session): - endpoint_name = 'test-mxnet-attach-deploy-{}'.format(int(time.time())) + endpoint_name = 'test-mxnet-attach-deploy-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): estimator = MXNet.attach(mxnet_training_job, sagemaker_session=sagemaker_session) @@ -58,8 +59,40 @@ def test_attach_deploy(mxnet_training_job, sagemaker_session): predictor.predict(data) +def test_async_fit(sagemaker_session, mxnet_full_version): + + training_job_name = "" + endpoint_name = 'test-mxnet-attach-deploy-{}'.format(sagemaker_timestamp()) + + with timeout(minutes=5): + script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py') + data_path = os.path.join(DATA_DIR, 'mxnet_mnist') + + mx = MXNet(entry_point=script_path, role='SageMakerRole', framework_version=mxnet_full_version, + train_instance_count=1, train_instance_type='ml.c4.xlarge', + sagemaker_session=sagemaker_session) + + train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'), + key_prefix='integ-test-data/mxnet_mnist/train') + test_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'), + key_prefix='integ-test-data/mxnet_mnist/test') + + mx.fit({'train': train_input, 'test': test_input}, wait=False) + training_job_name = mx.latest_training_job.name + + print("Waiting to re-attach to the training job: %s" % training_job_name) + time.sleep(20) + + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + print("Re-attaching now to: %s" % training_job_name) + estimator = MXNet.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) + predictor = estimator.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) + data = numpy.zeros(shape=(1, 1, 28, 28)) + predictor.predict(data) + + def test_deploy_model(mxnet_training_job, sagemaker_session): - endpoint_name = 'test-mxnet-deploy-model-{}'.format(int(time.time())) + endpoint_name = 'test-mxnet-deploy-model-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=mxnet_training_job) @@ -72,12 +105,12 @@ def test_deploy_model(mxnet_training_job, sagemaker_session): predictor.predict(data) -def test_failed_training_job(sagemaker_session): +def test_failed_training_job(sagemaker_session, mxnet_full_version): with timeout(minutes=15): script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'failure_script.py') data_path = os.path.join(DATA_DIR, 'mxnet_mnist') - mx = MXNet(entry_point=script_path, role='SageMakerRole', + mx = MXNet(entry_point=script_path, role='SageMakerRole', framework_version=mxnet_full_version, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session) diff --git a/tests/integ/test_ntm.py b/tests/integ/test_ntm.py new file mode 100644 index 0000000000..6be0a2f3e9 --- /dev/null +++ b/tests/integ/test_ntm.py @@ -0,0 +1,57 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import boto3 +import numpy as np +import os + +import sagemaker +from sagemaker import NTM, NTMModel +from sagemaker.amazon.common import read_records +from sagemaker.utils import name_from_base + +from tests.integ import DATA_DIR, REGION +from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name +from tests.integ.record_set import prepare_record_set_from_local_files + + +def test_ntm(): + + with timeout(minutes=15): + sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) + data_path = os.path.join(DATA_DIR, 'ntm') + data_filename = 'nips-train_1.pbr' + + with open(os.path.join(data_path, data_filename), 'rb') as f: + all_records = read_records(f) + + # all records must be same + feature_num = int(all_records[0].features['values'].float32_tensor.shape[0]) + + ntm = NTM(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', num_topics=10, + sagemaker_session=sagemaker_session, base_job_name='test-ntm') + + record_set = prepare_record_set_from_local_files(data_path, ntm.data_location, + len(all_records), feature_num, sagemaker_session) + ntm.fit(record_set, None) + + endpoint_name = name_from_base('ntm') + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + model = NTMModel(ntm.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) + predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) + + predict_input = np.random.rand(1, feature_num) + result = predictor.predict(predict_input) + + assert len(result) == 1 + for record in result: + assert record.label["topic_weights"] is not None diff --git a/tests/integ/test_pca.py b/tests/integ/test_pca.py index adec22345e..219e722c13 100644 --- a/tests/integ/test_pca.py +++ b/tests/integ/test_pca.py @@ -14,8 +14,11 @@ import os import pickle import sys +import time + import pytest # noqa import boto3 + import sagemaker import sagemaker.amazon.pca from sagemaker.utils import name_from_base @@ -55,3 +58,47 @@ def test_pca(): assert len(result) == 5 for record in result: assert record.label["projection"] is not None + + +def test_async_pca(): + + training_job_name = "" + endpoint_name = name_from_base('pca') + sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) + + with timeout(minutes=5): + + data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') + pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} + + # Load the data into memory as numpy arrays + with gzip.open(data_path, 'rb') as f: + train_set, _, _ = pickle.load(f, **pickle_args) + + pca = sagemaker.amazon.pca.PCA(role='SageMakerRole', train_instance_count=1, + train_instance_type='ml.m4.xlarge', + num_components=48, sagemaker_session=sagemaker_session, base_job_name='test-pca') + + pca.algorithm_mode = 'randomized' + pca.subtract_mean = True + pca.extra_components = 5 + pca.fit(pca.record_set(train_set[0][:100]), wait=False) + training_job_name = pca.latest_training_job.name + + print("Detached from training job. Will re-attach in 20 seconds") + time.sleep(20) + + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + estimator = sagemaker.amazon.pca.PCA.attach(training_job_name=training_job_name, + sagemaker_session=sagemaker_session) + + model = sagemaker.amazon.pca.PCAModel(estimator.model_data, role='SageMakerRole', + sagemaker_session=sagemaker_session) + predictor = model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge", + endpoint_name=endpoint_name) + + result = predictor.predict(train_set[0][:5]) + + assert len(result) == 5 + for record in result: + assert record.label["projection"] is not None diff --git a/tests/integ/test_tf.py b/tests/integ/test_tf.py index bb602b83fe..0add482acf 100644 --- a/tests/integ/test_tf.py +++ b/tests/integ/test_tf.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -13,6 +13,7 @@ import boto3 import os import pytest +import time from sagemaker import Session from sagemaker.tensorflow import TensorFlow @@ -27,12 +28,13 @@ def sagemaker_session(): return Session(boto_session=boto3.Session(region_name=REGION)) -def test_tf(sagemaker_session): +def test_tf(sagemaker_session, tf_full_version): with timeout(minutes=15): script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') estimator = TensorFlow(entry_point=script_path, role='SageMakerRole', + framework_version=tf_full_version, training_steps=1, evaluation_steps=1, hyperparameters={'input_tensor_name': 'inputs'}, @@ -48,15 +50,51 @@ def test_tf(sagemaker_session): with timeout_and_delete_endpoint(estimator=estimator, minutes=20): json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge') + features = [6.4, 3.2, 4.5, 1.5] + dict_result = json_predictor.predict({'inputs': features}) + print('predict result: {}'.format(dict_result)) + list_result = json_predictor.predict(features) + print('predict result: {}'.format(list_result)) + + assert dict_result == list_result + + +def test_tf_async(sagemaker_session, tf_full_version): + + training_job_name = "" + with timeout(minutes=5): + script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') + + estimator = TensorFlow(entry_point=script_path, + role='SageMakerRole', + framework_version=tf_full_version, + training_steps=1, + evaluation_steps=1, + hyperparameters={'input_tensor_name': 'inputs'}, + train_instance_count=1, + train_instance_type='ml.c4.xlarge', + sagemaker_session=sagemaker_session, + base_job_name='test-tf') + + inputs = estimator.sagemaker_session.upload_data(path=DATA_PATH, key_prefix='integ-test-data/tf_iris') + estimator.fit(inputs, wait=False) + training_job_name = estimator.latest_training_job.name + time.sleep(20) + + with timeout_and_delete_endpoint(estimator=estimator, minutes=35): + estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) + json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge') + result = json_predictor.predict([6.4, 3.2, 4.5, 1.5]) print('predict result: {}'.format(result)) -def test_failed_tf_training(sagemaker_session): +def test_failed_tf_training(sagemaker_session, tf_full_version): with timeout(minutes=15): script_path = os.path.join(DATA_DIR, 'iris', 'failure_script.py') estimator = TensorFlow(entry_point=script_path, role='SageMakerRole', + framework_version=tf_full_version, training_steps=1, evaluation_steps=1, hyperparameters={'input_tensor_name': 'inputs'}, diff --git a/tests/integ/test_tf_cifar.py b/tests/integ/test_tf_cifar.py index c08426a93a..967fb24188 100644 --- a/tests/integ/test_tf_cifar.py +++ b/tests/integ/test_tf_cifar.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -38,20 +38,20 @@ def __call__(self, data): return pickle.dumps(data, protocol=2) -def test_cifar(sagemaker_session): +def test_cifar(sagemaker_session, tf_full_version): with timeout(minutes=15): script_path = os.path.join(DATA_DIR, 'cifar_10', 'source') dataset_path = os.path.join(DATA_DIR, 'cifar_10', 'data') estimator = TensorFlow(entry_point='resnet_cifar_10.py', source_dir=script_path, role='SageMakerRole', - training_steps=20, evaluation_steps=5, + framework_version=tf_full_version, training_steps=20, evaluation_steps=5, train_instance_count=2, train_instance_type='ml.p2.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-cifar') inputs = estimator.sagemaker_session.upload_data(path=dataset_path, key_prefix='data/cifar10') - estimator.fit(inputs) + estimator.fit(inputs, logs=False) print('job succeeded: {}'.format(estimator.latest_training_job.name)) with timeout_and_delete_endpoint(estimator=estimator, minutes=20): diff --git a/tests/unit/test_amazon_estimator.py b/tests/unit/test_amazon_estimator.py index a9eb15886e..005a3ee9d8 100644 --- a/tests/unit/test_amazon_estimator.py +++ b/tests/unit/test_amazon_estimator.py @@ -18,7 +18,6 @@ from sagemaker.amazon.pca import PCA from sagemaker.amazon.amazon_estimator import upload_numpy_to_s3_shards, _build_shards, registry - COMMON_ARGS = {'role': 'myrole', 'train_instance_count': 1, 'train_instance_type': 'ml.c4.xlarge'} REGION = "us-west-2" diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index eb999c4a18..1c957b8b98 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -17,7 +17,6 @@ from mock import Mock, patch from sagemaker.estimator import Estimator, Framework, _TrainingJob -from sagemaker.fw_utils import framework_name_from_image from sagemaker.session import s3_input from sagemaker.model import FrameworkModel from sagemaker.predictor import RealTimePredictor @@ -70,6 +69,12 @@ def train_image(self): def create_model(self): return DummyFrameworkModel(self.sagemaker_session) + @classmethod + def _prepare_init_params_from_job_description(cls, job_details): + init_params = super(DummyFramework, cls)._prepare_init_params_from_job_description(job_details) + init_params.pop("image", None) + return init_params + class DummyFrameworkModel(FrameworkModel): @@ -251,12 +256,6 @@ def test_attach_framework(sagemaker_session): assert framework_estimator.entry_point == 'iris-dnn-classifier.py' -def test_attach_no_job_name_framework(sagemaker_session): - with pytest.raises(ValueError) as error: - Framework.attach(training_job_name=None, sagemaker_session=sagemaker_session) - assert 'must specify training_job name' in str(error) - - def test_fit_then_fit_again(sagemaker_session): fw = DummyFramework(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, @@ -324,18 +323,6 @@ def test_init_with_source_dir_s3(strftime, sagemaker_session): assert actual_hyperparameter == expected_hyperparameters -def test_framework_name_from_framework_image(): - framework, py_ver = framework_name_from_image('123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-gpu:1') - assert framework == 'mxnet' - assert py_ver == 'py2' - - -def test_framework_name_from_other(): - framework, py_ver = framework_name_from_image('123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-myown-py2-gpu:1') - assert framework is None - assert py_ver is None - - # _TrainingJob 'utils' def test_format_input_single_unamed_channel(): input_dict = _TrainingJob._format_inputs_to_input_config('s3://blah/blah') diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py new file mode 100644 index 0000000000..c170728b4f --- /dev/null +++ b/tests/unit/test_fw_utils.py @@ -0,0 +1,158 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import inspect +from mock import Mock +import os +from sagemaker.fw_utils import create_image_uri, framework_name_from_image, framework_version_from_tag +from sagemaker.fw_utils import tar_and_upload_dir, parse_s3_url, UploadedCode +import pytest + + +DATA_DIR = 'data_dir' +BUCKET_NAME = 'mybucket' +ROLE = 'Sagemaker' +REGION = 'us-west-2' +SCRIPT_PATH = 'script.py' + + +@pytest.fixture() +def sagemaker_session(): + boto_mock = Mock(name='boto_session', region_name=REGION) + ims = Mock(name='sagemaker_session', boto_session=boto_mock) + ims.default_bucket = Mock(name='default_bucket', return_value=BUCKET_NAME) + ims.expand_role = Mock(name="expand_role", return_value=ROLE) + ims.sagemaker_client.describe_training_job = Mock(return_value={'ModelArtifacts': + {'S3ModelArtifacts': 's3://m/m.tar.gz'}}) + return ims + + +def test_create_image_uri_cpu(): + image_uri = create_image_uri('mars-south-3', 'mlfw', 'any-non-gpu-device', '1.0rc', 'py2', '23') + assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw-py2-cpu:1.0rc-cpu-py2' + + +def test_create_image_uri_gpu(): + image_uri = create_image_uri('mars-south-3', 'mlfw', 'ml.p3.2xlarge', '1.0rc', 'py3', '23') + assert image_uri == '23.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw-py3-gpu:1.0rc-gpu-py3' + + +def test_create_image_uri_default_account(): + image_uri = create_image_uri('mars-south-3', 'mlfw', 'ml.p3.2xlarge', '1.0rc', 'py3') + assert image_uri == '520713654638.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mlfw-py3-gpu:1.0rc-gpu-py3' + + +def test_tar_and_upload_dir_s3(sagemaker_session): + bucket = 'mybucker' + s3_key_prefix = 'something/source' + script = 'mnist.py' + directory = 's3://m' + result = tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) + assert result == UploadedCode('s3://m', 'mnist.py') + + +def test_tar_and_upload_dir_does_not_exits(sagemaker_session): + bucket = 'mybucker' + s3_key_prefix = 'something/source' + script = 'mnist.py' + directory = ' !@#$%^&*()path probably in not there.!@#$%^&*()' + with pytest.raises(ValueError) as error: + tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) + assert 'does not exist' in str(error) + + +def test_tar_and_upload_dir_is_not_directory(sagemaker_session): + bucket = 'mybucker' + s3_key_prefix = 'something/source' + script = 'mnist.py' + directory = inspect.getfile(inspect.currentframe()) + with pytest.raises(ValueError) as error: + tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) + assert 'is not a directory' in str(error) + + +def test_tar_and_upload_dir_file_not_in_dir(sagemaker_session): + bucket = 'mybucker' + s3_key_prefix = 'something/source' + script = ' !@#$%^&*() .myscript. !@#$%^&*() ' + directory = '.' + with pytest.raises(ValueError) as error: + tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) + assert 'No file named' in str(error) + + +def test_tar_and_upload_dir_not_s3(sagemaker_session): + bucket = 'mybucker' + s3_key_prefix = 'something/source' + script = os.path.basename(__file__) + directory = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) + result = tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) + assert result == UploadedCode('s3://{}/{}/sourcedir.tar.gz'.format(bucket, s3_key_prefix), script) + + +def test_framework_name_from_framework_image(): + image_name = '123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py3-gpu:2.5.6-gpu-py2' + framework, py_ver, tag = framework_name_from_image(image_name) + assert framework == 'mxnet' + assert py_ver == 'py3' + assert tag == '2.5.6-gpu-py2' + + +def test_framework_name_from_wrong_framework(): + framework, py_ver, tag = framework_name_from_image('123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-myown-py2-gpu:1') + assert framework is None + assert py_ver is None + assert tag is None + + +def test_framework_name_from_wrong_python(): + framework, py_ver, tag = framework_name_from_image('123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-myown-py4-gpu:1') + assert framework is None + assert py_ver is None + assert tag is None + + +def test_framework_name_from_wrong_device(): + framework, py_ver, tag = framework_name_from_image('123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-myown-py4-gpu:1') + assert framework is None + assert py_ver is None + assert tag is None + + +def test_framework_name_from_image_any_tag(): + image_name = '123.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:any-tag' + framework, py_ver, tag = framework_name_from_image(image_name) + assert framework == 'tensorflow' + assert py_ver == 'py2' + assert tag == 'any-tag' + + +def test_framework_version_from_tag(): + version = framework_version_from_tag('1.5rc-keras-gpu-py2') + assert version == '1.5rc-keras' + + +def test_framework_version_from_tag_other(): + version = framework_version_from_tag('weird-tag-py2') + assert version is None + + +def test_parse_s3_url(): + bucket, key_prefix = parse_s3_url('s3://bucket/code_location') + assert 'bucket' == bucket + assert 'code_location' == key_prefix + + +def test_parse_s3_url_fail(): + with pytest.raises(ValueError) as error: + parse_s3_url('t3://code_location') + assert 'Expecting \'s3\' scheme' in str(error) diff --git a/tests/unit/test_hyperparameter.py b/tests/unit/test_hyperparameter.py index c168f3275e..db7ed3f64c 100644 --- a/tests/unit/test_hyperparameter.py +++ b/tests/unit/test_hyperparameter.py @@ -16,9 +16,9 @@ class Test(object): - blank = Hyperparameter(name="some-name") + blank = Hyperparameter(name="some-name", data_type=int) elizabeth = Hyperparameter(name='elizabeth') - validated = Hyperparameter(name="validated", validate=lambda value: value > 55) + validated = Hyperparameter(name="validated", validate=lambda value: value > 55, data_type=int) def test_blank_access(): @@ -55,3 +55,20 @@ def test_validated(): x.validated = 66 with pytest.raises(ValueError): x.validated = 23 + + +def test_data_type(): + x = Test() + x.validated = 66 + assert type(x.validated) == Test.__dict__["validated"].data_type + + +def test_from_string(): + x = Test() + value = 65 + + x.validated = value + from_api = str(value) + + x.validated = from_api + assert x.validated == value diff --git a/tests/unit/test_lda.py b/tests/unit/test_lda.py new file mode 100644 index 0000000000..59618a6dd9 --- /dev/null +++ b/tests/unit/test_lda.py @@ -0,0 +1,224 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import pytest +from mock import Mock, patch + +from sagemaker.amazon.lda import LDA, LDAPredictor +from sagemaker.amazon.amazon_estimator import registry, RecordSet + +ROLE = 'myrole' +TRAIN_INSTANCE_COUNT = 1 +TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge' +NUM_TOPICS = 3 + +COMMON_TRAIN_ARGS = {'role': ROLE, 'train_instance_type': TRAIN_INSTANCE_TYPE} +ALL_REQ_ARGS = dict({'num_topics': NUM_TOPICS}, **COMMON_TRAIN_ARGS) + +REGION = "us-west-2" +BUCKET_NAME = "Some-Bucket" + +DESCRIBE_TRAINING_JOB_RESULT = { + 'ModelArtifacts': { + 'S3ModelArtifacts': "s3://bucket/model.tar.gz" + } +} + + +@pytest.fixture() +def sagemaker_session(): + boto_mock = Mock(name='boto_session', region_name=REGION) + sms = Mock(name='sagemaker_session', boto_session=boto_mock) + sms.boto_region_name = REGION + sms.default_bucket = Mock(name='default_bucket', return_value=BUCKET_NAME) + sms.sagemaker_client.describe_training_job = Mock(name='describe_training_job', + return_value=DESCRIBE_TRAINING_JOB_RESULT) + + return sms + + +def test_init_required_positional(sagemaker_session): + lda = LDA(ROLE, TRAIN_INSTANCE_TYPE, NUM_TOPICS, sagemaker_session=sagemaker_session) + assert lda.role == ROLE + assert lda.train_instance_count == TRAIN_INSTANCE_COUNT + assert lda.train_instance_type == TRAIN_INSTANCE_TYPE + assert lda.num_topics == NUM_TOPICS + + +def test_init_required_named(sagemaker_session): + lda = LDA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + assert lda.role == COMMON_TRAIN_ARGS['role'] + assert lda.train_instance_count == TRAIN_INSTANCE_COUNT + assert lda.train_instance_type == COMMON_TRAIN_ARGS['train_instance_type'] + assert lda.num_topics == ALL_REQ_ARGS['num_topics'] + + +def test_all_hyperparameters(sagemaker_session): + lda = LDA(sagemaker_session=sagemaker_session, + alpha0=2.2, max_restarts=3, max_iterations=10, tol=3.3, + **ALL_REQ_ARGS) + assert lda.hyperparameters() == dict( + num_topics=str(ALL_REQ_ARGS['num_topics']), + alpha0='2.2', + max_restarts='3', + max_iterations='10', + tol='3.3', + ) + + +def test_image(sagemaker_session): + lda = LDA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + assert lda.train_image() == registry(REGION, "lda") + '/lda:1' + + +def test_num_topics_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LDA(num_topics='other', sagemaker_session=sagemaker_session, **COMMON_TRAIN_ARGS) + + +def test_num_topics_validation_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LDA(num_topics=0, sagemaker_session=sagemaker_session, **COMMON_TRAIN_ARGS) + + +def test_alpha0_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LDA(alpha0='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_max_restarts_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LDA(max_restarts='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_max_restarts_validation_fail_type2(sagemaker_session): + with pytest.raises(ValueError): + LDA(max_restarts=0.1, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_max_restarts_validation_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LDA(max_restarts=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_max_iterations_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LDA(max_iterations='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_max_iterations_validation_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LDA(max_iterations=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_tol_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LDA(tol='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_tol_validation_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LDA(tol=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +PREFIX = "prefix" +BASE_TRAIN_CALL = { + 'hyperparameters': {}, + 'image': registry(REGION, "lda") + '/lda:1', + 'input_config': [{ + 'DataSource': { + 'S3DataSource': { + 'S3DataDistributionType': 'ShardedByS3Key', + 'S3DataType': 'ManifestFile', + 'S3Uri': 's3://{}/{}'.format(BUCKET_NAME, PREFIX) + } + }, + 'ChannelName': 'train' + }], + 'input_mode': 'File', + 'output_config': {'S3OutputPath': 's3://{}/'.format(BUCKET_NAME)}, + 'resource_config': { + 'InstanceCount': TRAIN_INSTANCE_COUNT, + 'InstanceType': TRAIN_INSTANCE_TYPE, + 'VolumeSizeInGB': 30 + }, + 'stop_condition': {'MaxRuntimeInSeconds': 86400} +} + +FEATURE_DIM = 10 +MINI_BATCH_SZIE = 200 +HYPERPARAMS = {'num_topics': NUM_TOPICS, 'feature_dim': FEATURE_DIM, 'mini_batch_size': MINI_BATCH_SZIE} +STRINGIFIED_HYPERPARAMS = dict([(x, str(y)) for x, y in HYPERPARAMS.items()]) +HP_TRAIN_CALL = dict(BASE_TRAIN_CALL) +HP_TRAIN_CALL.update({'hyperparameters': STRINGIFIED_HYPERPARAMS}) + + +@patch("sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit") +def test_call_fit(base_fit, sagemaker_session): + lda = LDA(base_job_name="lda", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') + + lda.fit(data, MINI_BATCH_SZIE) + + base_fit.assert_called_once() + assert len(base_fit.call_args[0]) == 2 + assert base_fit.call_args[0][0] == data + assert base_fit.call_args[0][1] == MINI_BATCH_SZIE + + +def test_call_fit_none_mini_batch_size(sagemaker_session): + lda = LDA(base_job_name="lda", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, + channel='train') + with pytest.raises(ValueError): + lda.fit(data, None) + + +def test_call_fit_wrong_type_mini_batch_size(sagemaker_session): + lda = LDA(base_job_name="lda", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, + channel='train') + + with pytest.raises(ValueError): + lda.fit(data, "some") + + +def test_call_fit_wrong_value_mini_batch_size(sagemaker_session): + lda = LDA(base_job_name="lda", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, + channel='train') + with pytest.raises(ValueError): + lda.fit(data, 0) + + +def test_model_image(sagemaker_session): + lda = LDA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') + lda.fit(data, MINI_BATCH_SZIE) + + model = lda.create_model() + assert model.image == registry(REGION, "lda") + '/lda:1' + + +def test_predictor_type(sagemaker_session): + lda = LDA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') + lda.fit(data, MINI_BATCH_SZIE) + model = lda.create_model() + predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) + + assert isinstance(predictor, LDAPredictor) diff --git a/tests/unit/test_linear_learner.py b/tests/unit/test_linear_learner.py new file mode 100644 index 0000000000..7ab79cb91b --- /dev/null +++ b/tests/unit/test_linear_learner.py @@ -0,0 +1,554 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import pytest +from mock import Mock, patch + +from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerPredictor +from sagemaker.amazon.amazon_estimator import registry, RecordSet + +ROLE = 'myrole' +TRAIN_INSTANCE_COUNT = 1 +TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge' + +DEFAULT_PREDICTOR_TYPE = 'binary_classifier' + +REQ_ARGS = {'role': ROLE, 'train_instance_count': TRAIN_INSTANCE_COUNT, 'train_instance_type': TRAIN_INSTANCE_TYPE} + +REGION = "us-west-2" +BUCKET_NAME = "Some-Bucket" + +DESCRIBE_TRAINING_JOB_RESULT = { + 'ModelArtifacts': { + 'S3ModelArtifacts': "s3://bucket/model.tar.gz" + } +} + + +@pytest.fixture() +def sagemaker_session(): + boto_mock = Mock(name='boto_session', region_name=REGION) + sms = Mock(name='sagemaker_session', boto_session=boto_mock) + sms.boto_region_name = REGION + sms.default_bucket = Mock(name='default_bucket', return_value=BUCKET_NAME) + sms.sagemaker_client.describe_training_job = Mock(name='describe_training_job', + return_value=DESCRIBE_TRAINING_JOB_RESULT) + + return sms + + +def test_init_required_positional(sagemaker_session): + lr = LinearLearner(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, sagemaker_session=sagemaker_session) + assert lr.role == ROLE + assert lr.train_instance_count == TRAIN_INSTANCE_COUNT + assert lr.train_instance_type == TRAIN_INSTANCE_TYPE + assert lr.predictor_type == DEFAULT_PREDICTOR_TYPE + + +def test_init_required_named(sagemaker_session): + lr = LinearLearner(sagemaker_session=sagemaker_session, **REQ_ARGS) + + assert lr.role == REQ_ARGS['role'] + assert lr.train_instance_count == REQ_ARGS['train_instance_count'] + assert lr.train_instance_type == REQ_ARGS['train_instance_type'] + assert lr.predictor_type == DEFAULT_PREDICTOR_TYPE + + +def test_all_hyperparameters(sagemaker_session): + lr = LinearLearner(sagemaker_session=sagemaker_session, + predictor_type='regressor', binary_classifier_model_selection_criteria='accuracy', + target_recall=0.5, target_precision=0.6, + positive_example_weight_mult=0.1, epochs=1, use_bias=True, num_models=5, + num_calibration_samples=6, init_method='uniform', init_scale=-0.1, init_sigma=0.001, + init_bias=0, optimizer='sgd', loss='logistic', wd=0.4, l1=0.04, momentum=0.1, + learning_rate=0.001, beta_1=0.2, beta_2=0.03, bias_lr_mult=5.5, bias_wd_mult=6.6, + use_lr_scheduler=False, lr_scheduler_step=2, lr_scheduler_factor=0.03, + lr_scheduler_minimum_lr=0.001, normalize_data=False, normalize_label=True, + unbias_data=True, unbias_label=False, num_point_for_scaler=3, + **REQ_ARGS) + + assert lr.hyperparameters() == dict( + predictor_type='regressor', binary_classifier_model_selection_criteria='accuracy', + target_recall='0.5', target_precision='0.6', positive_example_weight_mult='0.1', epochs='1', + use_bias='True', num_models='5', num_calibration_samples='6', init_method='uniform', + init_scale='-0.1', init_sigma='0.001', init_bias='0.0', optimizer='sgd', loss='logistic', + wd='0.4', l1='0.04', momentum='0.1', learning_rate='0.001', beta_1='0.2', beta_2='0.03', + bias_lr_mult='5.5', bias_wd_mult='6.6', use_lr_scheduler='False', lr_scheduler_step='2', + lr_scheduler_factor='0.03', lr_scheduler_minimum_lr='0.001', normalize_data='False', + normalize_label='True', unbias_data='True', unbias_label='False', num_point_for_scaler='3', + ) + + +def test_image(sagemaker_session): + lr = LinearLearner(sagemaker_session=sagemaker_session, **REQ_ARGS) + assert lr.train_image() == registry(REGION, "linear-learner") + '/linear-learner:1' + + +def test_predictor_type_fail(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(predictor_type='other', sagemaker_session=sagemaker_session, **REQ_ARGS) + + +def test_binary_classifier_model_selection_criteria_fail(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(binary_classifier_model_selection_criteria='other', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_target_recall_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(target_recall=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_target_recall_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(target_recall=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_target_recall_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(target_recall='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_target_precision_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(target_precision=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_target_precision_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(target_precision=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_target_precision_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(target_precision='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_positive_example_weight_mult_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(positive_example_weight_mult=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_positive_example_weight_mult_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(positive_example_weight_mult='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_epochs_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(epochs=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_epochs_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(epochs='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_num_models_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(num_models=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_num_models_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(num_models='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_num_calibration_samples_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(num_calibration_samples=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_num_calibration_samples_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(num_calibration_samples='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_init_method_fail(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(init_method='other', sagemaker_session=sagemaker_session, **REQ_ARGS) + + +def test_init_scale_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(init_scale=1.01, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_init_scale_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(init_scale=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_init_scale_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(init_scale='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_init_sigma_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(init_sigma=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_init_sigma_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(init_sigma=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_init_sigma_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(init_sigma='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_init_bias_fail(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(init_bias='other', sagemaker_session=sagemaker_session, **REQ_ARGS) + + +def test_optimizer_fail(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(optimizer='other', sagemaker_session=sagemaker_session, **REQ_ARGS) + + +def test_loss_fail(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(loss='other', sagemaker_session=sagemaker_session, **REQ_ARGS) + + +def test_wd_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(wd=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_wd_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(wd=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_wd_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(wd='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_l1_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(l1=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_l1_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(l1=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_l1_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(l1='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_momentum_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(momentum=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_momentum_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(momentum=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_momentum_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(momentum='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_learning_rate_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(learning_rate=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_learning_rate_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(learning_rate=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_learning_rate_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(learning_rate='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_beta_1_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(beta_1=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_beta_1_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(beta_1=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_beta_1_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(beta_1='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_beta_2_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(beta_2=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_beta_2_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(beta_2=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_beta_2_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(beta_2='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_bias_lr_mult_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(bias_lr_mult=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_bias_lr_mult_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(bias_lr_mult='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_bias_wd_mult_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(bias_wd_mult=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_bias_wd_mult_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(bias_wd_mult='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_lr_scheduler_step_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(lr_scheduler_step=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_lr_scheduler_step_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(lr_scheduler_step='other', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_lr_scheduler_factor_fail_value_low(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(lr_scheduler_factor=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_lr_scheduler_factor_fail_value_high(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(lr_scheduler_factor=1, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_lr_scheduler_factor_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(lr_scheduler_factor='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_lr_scheduler_minimum_lr_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(lr_scheduler_minimum_lr=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_lr_scheduler_minimum_lr_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(lr_scheduler_minimum_lr='blah', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_num_point_for_scaler_fail_value(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(num_point_for_scaler=0, + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +def test_num_point_for_scaler_fail_type(sagemaker_session): + with pytest.raises(ValueError): + LinearLearner(num_point_for_scaler='other', + sagemaker_session=sagemaker_session, + **REQ_ARGS) + + +PREFIX = "prefix" +FEATURE_DIM = 10 +DEFAULT_MINI_BATCH_SIZE = 1000 + + +@patch("sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit") +def test_call_fit_calculate_batch_size_1(base_fit, sagemaker_session): + lr = LinearLearner(base_job_name="lr", sagemaker_session=sagemaker_session, **REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') + + lr.fit(data) + + base_fit.assert_called_once() + assert len(base_fit.call_args[0]) == 2 + assert base_fit.call_args[0][0] == data + assert base_fit.call_args[0][1] == 1 + + +@patch("sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit") +def test_call_fit_calculate_batch_size_2(base_fit, sagemaker_session): + lr = LinearLearner(base_job_name="lr", sagemaker_session=sagemaker_session, **REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), + num_records=10000, + feature_dim=FEATURE_DIM, + channel='train') + + lr.fit(data) + + base_fit.assert_called_once() + assert len(base_fit.call_args[0]) == 2 + assert base_fit.call_args[0][0] == data + assert base_fit.call_args[0][1] == DEFAULT_MINI_BATCH_SIZE + + +@patch("sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit") +def test_call_fit_pass_batch_size(base_fit, sagemaker_session): + lr = LinearLearner(base_job_name="lr", sagemaker_session=sagemaker_session, **REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), + num_records=10000, + feature_dim=FEATURE_DIM, + channel='train') + + lr.fit(data, 10) + + base_fit.assert_called_once() + assert len(base_fit.call_args[0]) == 2 + assert base_fit.call_args[0][0] == data + assert base_fit.call_args[0][1] == 10 + + +def test_model_image(sagemaker_session): + lr = LinearLearner(sagemaker_session=sagemaker_session, **REQ_ARGS) + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') + lr.fit(data) + + model = lr.create_model() + assert model.image == registry(REGION, 'linear-learner') + '/linear-learner:1' + + +def test_predictor_type(sagemaker_session): + lr = LinearLearner(sagemaker_session=sagemaker_session, **REQ_ARGS) + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') + lr.fit(data) + model = lr.create_model() + predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) + + assert isinstance(predictor, LinearLearnerPredictor) diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index 1f09b54849..ab6f99ddb4 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -18,9 +18,11 @@ from mock import Mock from mock import patch -from sagemaker.mxnet import MXNet, DOCKER_TAG +from sagemaker.mxnet import defaults +from sagemaker.mxnet import MXNet from sagemaker.mxnet import MXNetPredictor, MXNetModel + DATA_DIR = os.path.join(os.path.dirname(__file__), '..', 'data') SCRIPT_PATH = os.path.join(DATA_DIR, 'dummy_script.py') TIMESTAMP = '2017-11-06-14:14:15.672' @@ -28,46 +30,13 @@ BUCKET_NAME = 'mybucket' INSTANCE_COUNT = 1 INSTANCE_TYPE = 'ml.c4.4xlarge' -IMAGE_NAME = 'sagemaker-mxnet-py2-cpu' -JOB_NAME = '{}-{}'.format(IMAGE_NAME, TIMESTAMP) -FULL_IMAGE_URI = '520713654638.dkr.ecr.us-west-2.amazonaws.com/{}:{}'.format(IMAGE_NAME, DOCKER_TAG) +IMAGE_CPU_NAME = 'sagemaker-mxnet-py2-cpu' +JOB_NAME = '{}-{}'.format(IMAGE_CPU_NAME, TIMESTAMP) +FULL_IMAGE_URI = '520713654638.dkr.ecr.us-west-2.amazonaws.com/{}:{}-cpu-py2' ROLE = 'Dummy' REGION = 'us-west-2' GPU = 'ml.p2.xlarge' CPU = 'ml.c4.xlarge' -CREATE_TRAIN_JOB = {'image': FULL_IMAGE_URI, - 'input_mode': 'File', - 'input_config': [{ - 'ChannelName': 'training', - 'DataSource': { - 'S3DataSource': { - 'S3DataDistributionType': 'FullyReplicated', - 'S3DataType': 'S3Prefix' - } - } - }], - 'role': ROLE, - 'job_name': JOB_NAME, - 'output_config': { - 'S3OutputPath': 's3://{}/'.format(BUCKET_NAME), - }, - 'resource_config': { - 'InstanceType': 'ml.c4.4xlarge', - 'InstanceCount': 1, - 'VolumeSizeInGB': 30, - }, - 'hyperparameters': { - 'sagemaker_program': json.dumps('dummy_script.py'), - 'sagemaker_enable_cloudwatch_metrics': 'false', - 'sagemaker_container_log_level': str(logging.INFO), - 'sagemaker_job_name': json.dumps(JOB_NAME), - 'sagemaker_submit_directory': - json.dumps('s3://{}/{}/source/sourcedir.tar.gz'.format(BUCKET_NAME, JOB_NAME)), - 'sagemaker_region': '"us-west-2"' - }, - 'stop_condition': { - 'MaxRuntimeInSeconds': 24 * 60 * 60 - }} @pytest.fixture() @@ -81,10 +50,76 @@ def sagemaker_session(): return ims +def _get_full_image_uri(version): + return FULL_IMAGE_URI.format(IMAGE_CPU_NAME, version) + + +def _create_train_job(version): + return {'image': _get_full_image_uri(version), + 'input_mode': 'File', + 'input_config': [{ + 'ChannelName': 'training', + 'DataSource': { + 'S3DataSource': { + 'S3DataDistributionType': 'FullyReplicated', + 'S3DataType': 'S3Prefix' + } + } + }], + 'role': ROLE, + 'job_name': JOB_NAME, + 'output_config': { + 'S3OutputPath': 's3://{}/'.format(BUCKET_NAME), + }, + 'resource_config': { + 'InstanceType': 'ml.c4.4xlarge', + 'InstanceCount': 1, + 'VolumeSizeInGB': 30, + }, + 'hyperparameters': { + 'sagemaker_program': json.dumps('dummy_script.py'), + 'sagemaker_enable_cloudwatch_metrics': 'false', + 'sagemaker_container_log_level': str(logging.INFO), + 'sagemaker_job_name': json.dumps(JOB_NAME), + 'sagemaker_submit_directory': + json.dumps('s3://{}/{}/source/sourcedir.tar.gz'.format(BUCKET_NAME, JOB_NAME)), + 'sagemaker_region': '"us-west-2"' + }, + 'stop_condition': { + 'MaxRuntimeInSeconds': 24 * 60 * 60 + }} + + +def test_create_model(sagemaker_session, mxnet_version): + container_log_level = '"logging.INFO"' + source_dir = 's3://mybucket/source' + enable_cloudwatch_metrics = 'true' + mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, + framework_version=mxnet_version, container_log_level=container_log_level, + base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + + job_name = 'new_name' + mx.fit(inputs='s3://mybucket/train', job_name='new_name') + model = mx.create_model() + mx.container_log_level + + assert model.sagemaker_session == sagemaker_session + assert model.framework_version == mxnet_version + assert model.py_version == mx.py_version + assert model.entry_point == SCRIPT_PATH + assert model.role == ROLE + assert model.name == job_name + assert model.container_log_level == container_log_level + assert model.source_dir == source_dir + assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics + + @patch('time.strftime', return_value=TIMESTAMP) -def test_mxnet(strftime, sagemaker_session): +def test_mxnet(strftime, sagemaker_session, mxnet_version): mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, - train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE) + train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, + framework_version=mxnet_version) inputs = 's3://mybucket/train' @@ -95,7 +130,7 @@ def test_mxnet(strftime, sagemaker_session): boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls] assert boto_call_names == ['resource'] - expected_train_args = CREATE_TRAIN_JOB.copy() + expected_train_args = _create_train_job(mxnet_version) expected_train_args['input_config'][0]['DataSource']['S3DataSource']['S3Uri'] = inputs actual_train_args = sagemaker_session.method_calls[0][2] @@ -103,6 +138,7 @@ def test_mxnet(strftime, sagemaker_session): model = mx.create_model() + expected_image_base = '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-gpu:{}-gpu-py2' assert {'Environment': {'SAGEMAKER_SUBMIT_DIRECTORY': 's3://mybucket/sagemaker-mxnet-py2-cpu-{}/sourcedir.tar.gz'.format(TIMESTAMP), @@ -110,7 +146,7 @@ def test_mxnet(strftime, sagemaker_session): 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', 'SAGEMAKER_REGION': 'us-west-2', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20'}, - 'Image': '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-gpu:1.0', + 'Image': expected_image_base.format(mxnet_version), 'ModelDataUrl': 's3://m/m.tar.gz'} == model.prepare_container_def(GPU) assert 'cpu' in model.prepare_container_def(CPU)['Image'] @@ -119,7 +155,8 @@ def test_mxnet(strftime, sagemaker_session): def test_model(sagemaker_session): - model = MXNetModel("s3://some/data.tar.gz", role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) + model = MXNetModel("s3://some/data.tar.gz", role=ROLE, entry_point=SCRIPT_PATH, + sagemaker_session=sagemaker_session) predictor = model.deploy(1, GPU) assert isinstance(predictor, MXNetPredictor) @@ -128,13 +165,14 @@ def test_train_image_default(sagemaker_session): mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE) - assert FULL_IMAGE_URI in mx.train_image() + assert _get_full_image_uri(defaults.MXNET_VERSION) in mx.train_image() -def test_attach(sagemaker_session): +def test_attach(sagemaker_session, mxnet_version): + training_image = '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:{}-cpu-py2'.format(mxnet_version) returned_job_description = {'AlgorithmSpecification': {'TrainingInputMode': 'File', - 'TrainingImage': '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:1.0.4'}, + 'TrainingImage': training_image}, 'HyperParameters': {'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', 'sagemaker_program': '"iris-dnn-classifier.py"', @@ -161,6 +199,50 @@ def test_attach(sagemaker_session): estimator = MXNet.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert estimator.latest_training_job.job_name == 'neo' assert estimator.py_version == 'py2' + assert estimator.framework_version == mxnet_version + assert estimator.role == 'arn:aws:iam::366:role/SageMakerRole' + assert estimator.train_instance_count == 1 + assert estimator.train_max_run == 24 * 60 * 60 + assert estimator.input_mode == 'File' + assert estimator.base_job_name == 'neo' + assert estimator.output_path == 's3://place/output/neo' + assert estimator.output_kms_key == '' + assert estimator.hyperparameters()['training_steps'] == '100' + assert estimator.source_dir == 's3://some/sourcedir.tar.gz' + assert estimator.entry_point == 'iris-dnn-classifier.py' + + +def test_attach_old_container(sagemaker_session): + returned_job_description = {'AlgorithmSpecification': + {'TrainingInputMode': 'File', + 'TrainingImage': '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:1.0'}, + 'HyperParameters': + {'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', + 'sagemaker_program': '"iris-dnn-classifier.py"', + 'sagemaker_s3_uri_training': '"sagemaker-3/integ-test-data/tf_iris"', + 'sagemaker_enable_cloudwatch_metrics': 'false', + 'sagemaker_container_log_level': '"logging.INFO"', + 'sagemaker_job_name': '"neo"', + 'training_steps': '100', + 'sagemaker_region': '"us-west-2"'}, + 'RoleArn': 'arn:aws:iam::366:role/SageMakerRole', + 'ResourceConfig': + {'VolumeSizeInGB': 30, + 'InstanceCount': 1, + 'InstanceType': 'ml.c4.xlarge'}, + 'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60}, + 'TrainingJobName': 'neo', + 'TrainingJobStatus': 'Completed', + 'OutputDataConfig': {'KmsKeyId': '', + 'S3OutputPath': 's3://place/output/neo'}, + 'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}} + sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', + return_value=returned_job_description) + + estimator = MXNet.attach(training_job_name='neo', sagemaker_session=sagemaker_session) + assert estimator.latest_training_job.job_name == 'neo' + assert estimator.py_version == 'py2' + assert estimator.framework_version == '0.12' assert estimator.role == 'arn:aws:iam::366:role/SageMakerRole' assert estimator.train_instance_count == 1 assert estimator.train_max_run == 24 * 60 * 60 @@ -201,9 +283,3 @@ def test_attach_wrong_framework(sagemaker_session): with pytest.raises(ValueError) as error: MXNet.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert "didn't use image for requested framework" in str(error) - - -def test_attach_no_job_name(sagemaker_session): - with pytest.raises(ValueError) as error: - MXNet.attach(training_job_name=None, sagemaker_session=sagemaker_session) - assert "must specify training_job name" in str(error) diff --git a/tests/unit/test_ntm.py b/tests/unit/test_ntm.py new file mode 100644 index 0000000000..f248b73697 --- /dev/null +++ b/tests/unit/test_ntm.py @@ -0,0 +1,327 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import pytest +from mock import Mock, patch + +from sagemaker.amazon.ntm import NTM, NTMPredictor +from sagemaker.amazon.amazon_estimator import registry, RecordSet + +ROLE = 'myrole' +TRAIN_INSTANCE_COUNT = 1 +TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge' +NUM_TOPICS = 5 + +COMMON_TRAIN_ARGS = {'role': ROLE, 'train_instance_count': TRAIN_INSTANCE_COUNT, + 'train_instance_type': TRAIN_INSTANCE_TYPE} +ALL_REQ_ARGS = dict({'num_topics': NUM_TOPICS}, **COMMON_TRAIN_ARGS) + +REGION = "us-west-2" +BUCKET_NAME = "Some-Bucket" + +DESCRIBE_TRAINING_JOB_RESULT = { + 'ModelArtifacts': { + 'S3ModelArtifacts': "s3://bucket/model.tar.gz" + } +} + + +@pytest.fixture() +def sagemaker_session(): + boto_mock = Mock(name='boto_session', region_name=REGION) + sms = Mock(name='sagemaker_session', boto_session=boto_mock) + sms.boto_region_name = REGION + sms.default_bucket = Mock(name='default_bucket', return_value=BUCKET_NAME) + sms.sagemaker_client.describe_training_job = Mock(name='describe_training_job', + return_value=DESCRIBE_TRAINING_JOB_RESULT) + + return sms + + +def test_init_required_positional(sagemaker_session): + ntm = NTM(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_TOPICS, sagemaker_session=sagemaker_session) + assert ntm.role == ROLE + assert ntm.train_instance_count == TRAIN_INSTANCE_COUNT + assert ntm.train_instance_type == TRAIN_INSTANCE_TYPE + assert ntm.num_topics == NUM_TOPICS + + +def test_init_required_named(sagemaker_session): + ntm = NTM(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + assert ntm.role == COMMON_TRAIN_ARGS['role'] + assert ntm.train_instance_count == TRAIN_INSTANCE_COUNT + assert ntm.train_instance_type == COMMON_TRAIN_ARGS['train_instance_type'] + assert ntm.num_topics == ALL_REQ_ARGS['num_topics'] + + +def test_all_hyperparameters(sagemaker_session): + ntm = NTM(sagemaker_session=sagemaker_session, + encoder_layers=[1, 2, 3], epochs=3, encoder_layers_activation='tanh', optimizer='sgd', + tolerance=0.05, num_patience_epochs=2, batch_norm=False, rescale_gradient=0.5, clip_gradient=0.5, + weight_decay=0.5, learning_rate=0.5, **ALL_REQ_ARGS) + assert ntm.hyperparameters() == dict( + num_topics=str(ALL_REQ_ARGS['num_topics']), + encoder_layers='[1, 2, 3]', + epochs='3', + encoder_layers_activation='tanh', + optimizer='sgd', + tolerance='0.05', + num_patience_epochs='2', + batch_norm='False', + rescale_gradient='0.5', + clip_gradient='0.5', + weight_decay='0.5', + learning_rate='0.5' + ) + + +def test_image(sagemaker_session): + ntm = NTM(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + assert ntm.train_image() == registry(REGION, "ntm") + '/ntm:1' + + +def test_num_topics_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + NTM(num_topics='other', sagemaker_session=sagemaker_session, **COMMON_TRAIN_ARGS) + + +def test_num_topics_validation_fail_value_lower(sagemaker_session): + with pytest.raises(ValueError): + NTM(num_topics=0, sagemaker_session=sagemaker_session, **COMMON_TRAIN_ARGS) + + +def test_num_topics_validation_fail_value_upper(sagemaker_session): + with pytest.raises(ValueError): + NTM(num_topics=10000, sagemaker_session=sagemaker_session, **COMMON_TRAIN_ARGS) + + +def test_encoder_layers_validation_fail_type(sagemaker_session): + with pytest.raises(TypeError): + NTM(encoder_layers=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_epochs_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + NTM(epochs='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_epochs_validation_fail_value_lower(sagemaker_session): + with pytest.raises(ValueError): + NTM(epochs=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_epochs_validation_fail_value_upper(sagemaker_session): + with pytest.raises(ValueError): + NTM(epochs=1000, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_encoder_layers_activation_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + NTM(encoder_layers_activation=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_encoder_layers_activation_validation_fail_value(sagemaker_session): + with pytest.raises(ValueError): + NTM(encoder_layers_activation='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_optimizer_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + NTM(optimizer=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_optimizer_validation_fail_value(sagemaker_session): + with pytest.raises(ValueError): + NTM(optimizer='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_tolerance_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + NTM(tolerance='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_tolerance_validation_fail_value_lower(sagemaker_session): + with pytest.raises(ValueError): + NTM(tolerance=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_tolerance_validation_fail_value_upper(sagemaker_session): + with pytest.raises(ValueError): + NTM(tolerance=0.5, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_num_patience_epochs_validation_fail_type(sagemaker_session): + with pytest.raises(ValueError): + NTM(num_patience_epochs='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_num_patience_epochs_validation_fail_value_lower(sagemaker_session): + with pytest.raises(ValueError): + NTM(num_patience_epochs=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_num_patience_epochs_validation_fail_value_upper(sagemaker_session): + with pytest.raises(ValueError): + NTM(num_patience_epochs=100, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_rescale_gradient_fail_type(sagemaker_session): + with pytest.raises(ValueError): + NTM(rescale_gradient='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_rescale_gradient_validation_fail_value_lower(sagemaker_session): + with pytest.raises(ValueError): + NTM(rescale_gradient=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_rescale_gradient_validation_fail_value_upper(sagemaker_session): + with pytest.raises(ValueError): + NTM(rescale_gradient=10, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_clip_gradient_fail_type(sagemaker_session): + with pytest.raises(ValueError): + NTM(clip_gradient='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_clip_gradient_validation_fail_value(sagemaker_session): + with pytest.raises(ValueError): + NTM(clip_gradient=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_weight_decay_fail_type(sagemaker_session): + with pytest.raises(ValueError): + NTM(weight_decay='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_weight_decay_validation_fail_value_lower(sagemaker_session): + with pytest.raises(ValueError): + NTM(weight_decay=-1, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_weight_decay_validation_fail_value_upper(sagemaker_session): + with pytest.raises(ValueError): + NTM(weight_decay=2, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_learning_rate_fail_type(sagemaker_session): + with pytest.raises(ValueError): + NTM(learning_rate='other', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_learning_rate_validation_fail_value_lower(sagemaker_session): + with pytest.raises(ValueError): + NTM(learning_rate=0, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +def test_learning_rate_validation_fail_value_upper(sagemaker_session): + with pytest.raises(ValueError): + NTM(learning_rate=2, sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + +PREFIX = "prefix" +BASE_TRAIN_CALL = { + 'hyperparameters': {}, + 'image': registry(REGION, "ntm") + '/ntm:1', + 'input_config': [{ + 'DataSource': { + 'S3DataSource': { + 'S3DataDistributionType': 'ShardedByS3Key', + 'S3DataType': 'ManifestFile', + 'S3Uri': 's3://{}/{}'.format(BUCKET_NAME, PREFIX) + } + }, + 'ChannelName': 'train' + }], + 'input_mode': 'File', + 'output_config': {'S3OutputPath': 's3://{}/'.format(BUCKET_NAME)}, + 'resource_config': { + 'InstanceCount': TRAIN_INSTANCE_COUNT, + 'InstanceType': TRAIN_INSTANCE_TYPE, + 'VolumeSizeInGB': 30 + }, + 'stop_condition': {'MaxRuntimeInSeconds': 86400} +} + +FEATURE_DIM = 10 +MINI_BATCH_SIZE = 200 + + +@patch("sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit") +def test_call_fit(base_fit, sagemaker_session): + ntm = NTM(base_job_name="ntm", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') + + ntm.fit(data, MINI_BATCH_SIZE) + + base_fit.assert_called_once() + assert len(base_fit.call_args[0]) == 2 + assert base_fit.call_args[0][0] == data + assert base_fit.call_args[0][1] == MINI_BATCH_SIZE + + +def test_call_fit_none_mini_batch_size(sagemaker_session): + ntm = NTM(base_job_name="ntm", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, + channel='train') + ntm.fit(data) + + +def test_call_fit_wrong_type_mini_batch_size(sagemaker_session): + ntm = NTM(base_job_name="ntm", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, + channel='train') + + with pytest.raises((TypeError, ValueError)): + ntm.fit(data, "some") + + +def test_call_fit_wrong_value_lower_mini_batch_size(sagemaker_session): + ntm = NTM(base_job_name="ntm", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, + channel='train') + with pytest.raises(ValueError): + ntm.fit(data, 0) + + +def test_call_fit_wrong_value_upper_mini_batch_size(sagemaker_session): + ntm = NTM(base_job_name="ntm", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, + channel='train') + with pytest.raises(ValueError): + ntm.fit(data, 10001) + + +def test_model_image(sagemaker_session): + ntm = NTM(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') + ntm.fit(data, MINI_BATCH_SIZE) + + model = ntm.create_model() + assert model.image == registry(REGION, "ntm") + '/ntm:1' + + +def test_predictor_type(sagemaker_session): + ntm = NTM(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) + data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') + ntm.fit(data, MINI_BATCH_SIZE) + model = ntm.create_model() + predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) + + assert isinstance(predictor, NTMPredictor) diff --git a/tests/unit/test_predictor.py b/tests/unit/test_predictor.py index 1b6acb671c..5d863467ca 100644 --- a/tests/unit/test_predictor.py +++ b/tests/unit/test_predictor.py @@ -51,12 +51,26 @@ def test_json_serializer_python_array(): assert result == '[1, 2, 3]' +def test_json_serializer_python_dictionary(): + d = {"gender": "m", "age": 22, "city": "Paris"} + + result = json_serializer(d) + + assert json.loads(result) == d + + def test_json_serializer_python_invalid_empty(): with pytest.raises(ValueError) as error: json_serializer([]) assert "empty array" in str(error) +def test_json_serializer_python_dictionary_invalid_empty(): + with pytest.raises(ValueError) as error: + json_serializer({}) + assert "empty dictionary" in str(error) + + def test_json_serializer_csv_buffer(): csv_file_path = os.path.join(DATA_DIR, "with_integers.csv") with open(csv_file_path) as csv_file: diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 4251ffcab5..0256b54569 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -16,6 +16,7 @@ from mock import Mock, patch, call import sagemaker from sagemaker import s3_input, Session, get_execution_role +import datetime from botocore.exceptions import ClientError @@ -177,6 +178,11 @@ def test_s3_input_all_arguments(): {'ModelArtifacts': { 'S3ModelArtifacts': S3_OUTPUT + '/model/model.tar.gz' }}) +# TrainingStartTime and TrainingEndTime are for billable seconds calculation +COMPLETED_DESCRIBE_JOB_RESULT.update( + {'TrainingStartTime': datetime.datetime(2018, 2, 17, 7, 15, 0, 103000)}) +COMPLETED_DESCRIBE_JOB_RESULT.update( + {'TrainingEndTime': datetime.datetime(2018, 2, 17, 7, 19, 34, 953000)}) IN_PROGRESS_DESCRIBE_JOB_RESULT = dict(DEFAULT_EXPECTED_TRAIN_JOB_ARGS) IN_PROGRESS_DESCRIBE_JOB_RESULT.update({'TrainingJobStatus': 'InProgress'}) diff --git a/tests/unit/test_tf_estimator.py b/tests/unit/test_tf_estimator.py index dd73ac293b..e3eb2c2734 100644 --- a/tests/unit/test_tf_estimator.py +++ b/tests/unit/test_tf_estimator.py @@ -1,4 +1,4 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -16,10 +16,10 @@ import os import pytest from mock import Mock, patch -import sagemaker from sagemaker.model import MODEL_SERVER_WORKERS_PARAM_NAME from sagemaker.session import s3_input -from sagemaker.tensorflow import TensorFlow, DOCKER_TAG +from sagemaker.tensorflow import TensorFlow +from sagemaker.tensorflow import defaults from sagemaker.fw_utils import create_image_uri from sagemaker.tensorflow import TensorFlowPredictor, TensorFlowModel @@ -35,47 +35,8 @@ JOB_NAME = '{}-{}'.format(CPU_IMAGE_NAME, TIMESTAMP) ROLE = 'Dummy' REGION = 'us-west-2' - -IMAGE_URI_FORMAT_STRING = "520713654638.dkr.ecr.{}.amazonaws.com/{}:{}" -FULL_CPU_IMAGE_URI = IMAGE_URI_FORMAT_STRING.format(REGION, CPU_IMAGE_NAME, DOCKER_TAG) -FULL_GPU_IMAGE_URI = IMAGE_URI_FORMAT_STRING.format(REGION, GPU_IMAGE_NAME, DOCKER_TAG) - -CREATE_TRAIN_JOB = {'image': FULL_CPU_IMAGE_URI, - 'input_mode': 'File', - 'input_config': [{ - 'ChannelName': 'training', - 'DataSource': { - 'S3DataSource': { - 'S3DataDistributionType': 'FullyReplicated', - 'S3DataType': 'S3Prefix' - } - } - }], - 'role': ROLE, - 'job_name': JOB_NAME, - 'output_config': { - 'S3OutputPath': 's3://{}/'.format(BUCKET_NAME), - }, - 'resource_config': { - 'InstanceType': 'ml.c4.4xlarge', - 'InstanceCount': 1, - 'VolumeSizeInGB': 30, - }, - 'hyperparameters': { - 'training_steps': '1000', - 'evaluation_steps': '10', - 'sagemaker_program': json.dumps('dummy_script.py'), - 'sagemaker_submit_directory': json.dumps('s3://{}/{}/source/sourcedir.tar.gz'.format( - BUCKET_NAME, JOB_NAME)), - 'sagemaker_enable_cloudwatch_metrics': 'false', - 'sagemaker_container_log_level': str(logging.INFO), - 'sagemaker_job_name': json.dumps(JOB_NAME), - 'checkpoint_path': json.dumps('s3://{}/{}/checkpoints'.format(BUCKET_NAME, JOB_NAME)), - 'sagemaker_region': '"us-west-2"' - }, - 'stop_condition': { - 'MaxRuntimeInSeconds': 24 * 60 * 60 - }} +DOCKER_TAG = '1.0' +IMAGE_URI_FORMAT_STRING = "520713654638.dkr.ecr.{}.amazonaws.com/{}:{}-{}-{}" @pytest.fixture() @@ -90,11 +51,60 @@ def sagemaker_session(): return ims -def _build_tf(sagemaker_session, train_instance_type=None, checkpoint_path=None, enable_cloudwatch_metrics=False, - base_job_name=None, training_steps=None, evalutation_steps=None, **kwargs): +def _get_full_cpu_image_uri(version): + return IMAGE_URI_FORMAT_STRING.format(REGION, CPU_IMAGE_NAME, version, 'cpu', 'py2') + + +def _get_full_gpu_image_uri(version): + return IMAGE_URI_FORMAT_STRING.format(REGION, GPU_IMAGE_NAME, version, 'gpu', 'py2') + + +def _create_train_job(tf_version): + return {'image': _get_full_cpu_image_uri(tf_version), + 'input_mode': 'File', + 'input_config': [{ + 'ChannelName': 'training', + 'DataSource': { + 'S3DataSource': { + 'S3DataDistributionType': 'FullyReplicated', + 'S3DataType': 'S3Prefix' + } + } + }], + 'role': ROLE, + 'job_name': JOB_NAME, + 'output_config': { + 'S3OutputPath': 's3://{}/'.format(BUCKET_NAME), + }, + 'resource_config': { + 'InstanceType': 'ml.c4.4xlarge', + 'InstanceCount': 1, + 'VolumeSizeInGB': 30, + }, + 'hyperparameters': { + 'training_steps': '1000', + 'evaluation_steps': '10', + 'sagemaker_program': json.dumps('dummy_script.py'), + 'sagemaker_submit_directory': json.dumps('s3://{}/{}/source/sourcedir.tar.gz'.format( + BUCKET_NAME, JOB_NAME)), + 'sagemaker_enable_cloudwatch_metrics': 'false', + 'sagemaker_container_log_level': str(logging.INFO), + 'sagemaker_job_name': json.dumps(JOB_NAME), + 'checkpoint_path': json.dumps('s3://{}/{}/checkpoints'.format(BUCKET_NAME, JOB_NAME)), + 'sagemaker_region': '"us-west-2"' + }, + 'stop_condition': { + 'MaxRuntimeInSeconds': 24 * 60 * 60 + }} + + +def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_instance_type=None, + checkpoint_path=None, enable_cloudwatch_metrics=False, base_job_name=None, + training_steps=None, evalutation_steps=None, **kwargs): return TensorFlow(entry_point=SCRIPT_PATH, training_steps=training_steps, evaluation_steps=evalutation_steps, + framework_version=framework_version, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -105,28 +115,28 @@ def _build_tf(sagemaker_session, train_instance_type=None, checkpoint_path=None, **kwargs) -def test_tf_support_cpu_instances(sagemaker_session): - tf = _build_tf(sagemaker_session, train_instance_type='ml.c2.2xlarge') +def test_tf_support_cpu_instances(sagemaker_session, tf_version): + tf = _build_tf(sagemaker_session, tf_version, train_instance_type='ml.c2.2xlarge') - assert tf.train_image() == FULL_CPU_IMAGE_URI + assert tf.train_image() == _get_full_cpu_image_uri(tf_version) - tf = _build_tf(sagemaker_session, train_instance_type='ml.c4.2xlarge') + tf = _build_tf(sagemaker_session, tf_version, train_instance_type='ml.c4.2xlarge') - assert tf.train_image() == FULL_CPU_IMAGE_URI + assert tf.train_image() == _get_full_cpu_image_uri(tf_version) - tf = _build_tf(sagemaker_session, train_instance_type='ml.m16') + tf = _build_tf(sagemaker_session, tf_version, train_instance_type='ml.m16') - assert tf.train_image() == FULL_CPU_IMAGE_URI + assert tf.train_image() == _get_full_cpu_image_uri(tf_version) -def test_tf_support_gpu_instances(sagemaker_session): - tf = _build_tf(sagemaker_session, train_instance_type='ml.g2.2xlarge') +def test_tf_support_gpu_instances(sagemaker_session, tf_version): + tf = _build_tf(sagemaker_session, tf_version, train_instance_type='ml.g2.2xlarge') - assert tf.train_image() == FULL_GPU_IMAGE_URI + assert tf.train_image() == _get_full_gpu_image_uri(tf_version) - tf = _build_tf(sagemaker_session, train_instance_type='ml.p2.2xlarge') + tf = _build_tf(sagemaker_session, tf_version, train_instance_type='ml.p2.2xlarge') - assert tf.train_image() == FULL_GPU_IMAGE_URI + assert tf.train_image() == _get_full_gpu_image_uri(tf_version) def test_tf_deploy_model_server_workers(sagemaker_session): @@ -148,11 +158,37 @@ def test_tf_deploy_model_server_workers_unset(sagemaker_session): assert MODEL_SERVER_WORKERS_PARAM_NAME.upper() not in sagemaker_session.method_calls[3][1][2]['Environment'] +def test_create_model(sagemaker_session, tf_version): + container_log_level = '"logging.INFO"' + source_dir = 's3://mybucket/source' + enable_cloudwatch_metrics = 'true' + tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, + training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, framework_version=tf_version, + container_log_level=container_log_level, base_job_name='job', + source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + + job_name = 'doing something' + tf.fit(inputs='s3://mybucket/train', job_name=job_name) + model = tf.create_model() + + assert model.sagemaker_session == sagemaker_session + assert model.framework_version == tf_version + assert model.py_version == tf.py_version + assert model.entry_point == SCRIPT_PATH + assert model.role == ROLE + assert model.name == job_name + assert model.container_log_level == container_log_level + assert model.source_dir == source_dir + assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics + + @patch('time.strftime', return_value=TIMESTAMP) @patch('time.time', return_value=TIME) -def test_tf(time, strftime, sagemaker_session): - tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, training_steps=1000, - evaluation_steps=10, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE) +def test_tf(time, strftime, sagemaker_session, tf_version): + tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, + training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, framework_version=tf_version) inputs = 's3://mybucket/train' @@ -163,7 +199,7 @@ def test_tf(time, strftime, sagemaker_session): boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls] assert boto_call_names == ['resource'] - expected_train_args = CREATE_TRAIN_JOB.copy() + expected_train_args = _create_train_job(tf_version) expected_train_args['input_config'][0]['DataSource']['S3DataSource']['S3Uri'] = inputs actual_train_args = sagemaker_session.method_calls[0][2] @@ -178,8 +214,7 @@ def test_tf(time, strftime, sagemaker_session): 'SAGEMAKER_REGION': 'us-west-2', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20' }, - 'Image': create_image_uri('us-west-2', "tensorflow", GPU_IMAGE_NAME, "py2", - sagemaker.tensorflow.DOCKER_TAG), + 'Image': create_image_uri('us-west-2', "tensorflow", GPU_IMAGE_NAME, tf_version, "py2"), 'ModelDataUrl': 's3://m/m.tar.gz'} == model.prepare_container_def(GPU_IMAGE_NAME) assert 'cpu' in model.prepare_container_def(CPU_IMAGE_NAME)['Image'] @@ -202,7 +237,7 @@ def test_run_tensorboard_locally_without_tensorboard_binary(time, strftime, pope 'following command: \n pip install tensorboard' -def test_model(sagemaker_session): +def test_model(sagemaker_session, tf_version): model = TensorFlowModel("s3://some/data.tar.gz", role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) predictor = model.deploy(1, GPU_IMAGE_NAME) @@ -302,15 +337,20 @@ def test_tf_checkpoint_set(sagemaker_session): def test_train_image_default(sagemaker_session): - tf = _build_tf(sagemaker_session) + tf = TensorFlow(entry_point=SCRIPT_PATH, + role=ROLE, + sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE) - assert FULL_CPU_IMAGE_URI in tf.train_image() + assert _get_full_cpu_image_uri(defaults.TF_VERSION) in tf.train_image() -def test_attach(sagemaker_session): +def test_attach(sagemaker_session, tf_version): + training_image = '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:{}-cpu-py2'.format(tf_version) rjd = {'AlgorithmSpecification': {'TrainingInputMode': 'File', - 'TrainingImage': '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:1.0.4'}, + 'TrainingImage': training_image}, 'HyperParameters': {'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', 'checkpoint_path': '"s3://other/1508872349"', @@ -336,6 +376,54 @@ def test_attach(sagemaker_session): estimator = TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert estimator.latest_training_job.job_name == 'neo' assert estimator.py_version == 'py2' + assert estimator.framework_version == tf_version + assert estimator.role == 'arn:aws:iam::366:role/SageMakerRole' + assert estimator.train_instance_count == 1 + assert estimator.train_max_run == 24 * 60 * 60 + assert estimator.input_mode == 'File' + assert estimator.training_steps == 100 + assert estimator.evaluation_steps == 10 + assert estimator.input_mode == 'File' + assert estimator.base_job_name == 'neo' + assert estimator.output_path == 's3://place/output/neo' + assert estimator.output_kms_key == '' + assert estimator.hyperparameters()['training_steps'] == '100' + assert estimator.source_dir == 's3://some/sourcedir.tar.gz' + assert estimator.entry_point == 'iris-dnn-classifier.py' + assert estimator.checkpoint_path == 's3://other/1508872349' + + +def test_attach_old_container(sagemaker_session): + training_image = '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:1.0' + rjd = {'AlgorithmSpecification': + {'TrainingInputMode': 'File', + 'TrainingImage': training_image}, + 'HyperParameters': + {'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', + 'checkpoint_path': '"s3://other/1508872349"', + 'sagemaker_program': '"iris-dnn-classifier.py"', + 'sagemaker_enable_cloudwatch_metrics': 'false', + 'sagemaker_container_log_level': '"logging.INFO"', + 'sagemaker_job_name': '"neo"', + 'training_steps': '100', + 'evaluation_steps': '10'}, + 'RoleArn': 'arn:aws:iam::366:role/SageMakerRole', + 'ResourceConfig': + {'VolumeSizeInGB': 30, + 'InstanceCount': 1, + 'InstanceType': 'ml.c4.xlarge'}, + 'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60}, + 'TrainingJobName': 'neo', + 'TrainingJobStatus': 'Completed', + 'OutputDataConfig': {'KmsKeyId': '', + 'S3OutputPath': 's3://place/output/neo'}, + 'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}} + sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value=rjd) + + estimator = TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session) + assert estimator.latest_training_job.job_name == 'neo' + assert estimator.py_version == 'py2' + assert estimator.framework_version == '1.4' assert estimator.role == 'arn:aws:iam::366:role/SageMakerRole' assert estimator.train_instance_count == 1 assert estimator.train_max_run == 24 * 60 * 60 @@ -355,7 +443,7 @@ def test_attach(sagemaker_session): def test_attach_wrong_framework(sagemaker_session): returned_job_description = {'AlgorithmSpecification': {'TrainingInputMode': 'File', - 'TrainingImage': '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:1.0.4'}, + 'TrainingImage': '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:1.0'}, 'HyperParameters': {'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', 'sagemaker_program': '"iris-dnn-classifier.py"', @@ -379,9 +467,3 @@ def test_attach_wrong_framework(sagemaker_session): with pytest.raises(ValueError) as error: TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert "didn't use image for requested framework" in str(error) - - -def test_attach_no_job_name(sagemaker_session): - with pytest.raises(ValueError) as error: - TensorFlow.attach(training_job_name=None, sagemaker_session=sagemaker_session) - assert "must specify training_job name" in str(error) diff --git a/tox.ini b/tox.ini index a42fe09296..09795ca02e 100644 --- a/tox.ini +++ b/tox.ini @@ -27,7 +27,11 @@ max-complexity = 10 [testenv] # TEAMCITY_VERSION environment variable exists during build on Teamcity. teamcity-messages uses it in order to enable # reporting to TeamCity. -passenv = TEAMCITY_VERSION +passenv = + TEAMCITY_VERSION + AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY + AWS_SESSION_TOKEN # {posargs} can be passed in by additional arguments specified when invoking tox. # Can be used to specify which tests to run, e.g.: tox -- -s commands =