From 190dbf8a9c031fb3419ca8340925d7a8042a396d Mon Sep 17 00:00:00 2001 From: Nadia Yakimakha <32335935+nadiaya@users.noreply.github.com> Date: Wed, 30 Jan 2019 17:29:18 -0800 Subject: [PATCH 1/4] Fix docstrings warnings. --- doc/conf.py | 3 ++- src/sagemaker/amazon/knn.py | 2 ++ src/sagemaker/mxnet/estimator.py | 11 +++++----- src/sagemaker/session.py | 15 +++++++++----- src/sagemaker/sparkml/model.py | 2 ++ src/sagemaker/tensorflow/estimator.py | 29 +++++++++++++++++++-------- src/sagemaker/tensorflow/model.py | 4 ++-- src/sagemaker/workflow/airflow.py | 3 +-- 8 files changed, 46 insertions(+), 23 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 3bd9c77b7b..303624a099 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -32,6 +32,7 @@ def __getattr__(cls, name): 'tensorflow.python.framework', 'tensorflow_serving', 'tensorflow_serving.apis', 'numpy', 'scipy', 'scipy.sparse'] sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) +sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) project = u'sagemaker' version = pkg_resources.require(project)[0].version @@ -68,7 +69,7 @@ def __getattr__(cls, name): html_theme = 'default' else: html_theme = 'haiku' -html_static_path = ['_static'] +html_static_path = [] htmlhelp_basename = '%sdoc' % project # Example configuration for intersphinx: refer to the Python standard library. diff --git a/src/sagemaker/amazon/knn.py b/src/sagemaker/amazon/knn.py index 9b59cc8519..0340f13347 100644 --- a/src/sagemaker/amazon/knn.py +++ b/src/sagemaker/amazon/knn.py @@ -60,6 +60,7 @@ def __init__(self, role, train_instance_count, train_instance_type, k, sample_si KNN are documented below. For further information on the AWS KNN algorithm, please consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/knn.html + Args: role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access @@ -108,6 +109,7 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. """ + return KNNModel(self.model_data, self.role, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override)) diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index adc056e66e..ee4b08e9ae 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -58,15 +58,16 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_versio For convenience, this accepts other types for keys and values, but ``str()`` will be called to convert them before training. py_version (str): Python version you want to use for executing your model training code (default: 'py2'). - One of 'py2' or 'py3'. + One of 'py2' or 'py3'. framework_version (str): MXNet version you want to use for executing your model training code. List of supported versions https://github.com/aws/sagemaker-python-sdk#mxnet-sagemaker-estimators image_name (str): If specified, the estimator will use this image for training and hosting, instead of selecting the appropriate SageMaker official image based on framework_version and py_version. It can be an ECR url or dockerhub image and tag. - Examples: - 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 - custom-image:latest. + + Examples: + 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 + custom-image:latest. distributions (dict): A dictionary with information on how to run distributed training (default: None). **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor. @@ -126,7 +127,7 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na model_channel_name (str): Name of the channel where pre-trained model data will be downloaded. Returns: - dictionary: The transformed init_params + dictionary: The transformed init_params """ init_params = super(MXNet, cls)._prepare_init_params_from_job_description(job_details, model_channel_name) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index f0741b9c4b..a0dfae757c 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -325,7 +325,7 @@ def compile_model(self, input_model_config, output_model_config, role, Args: input_model_config (dict): the trained model and the Amazon S3 location where it is stored. - output_model_config (dict): - Identifies the Amazon S3 location where you want Amazon SageMaker Neo to save + output_model_config (dict): Identifies the Amazon S3 location where you want Amazon SageMaker Neo to save the results of compilation job role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker Neo compilation jobs use this role to access model artifacts. You must grant sufficient permissions to this role. @@ -550,6 +550,7 @@ def create_model(self, name, role, container_defs, vpc_config=None, the inference code. Amazon SageMaker uses this information to deploy the model in Amazon SageMaker. This method can also be used to create a Model for an Inference Pipeline if you pass the list of container definitions through the containers parameter. + Args: name (str): Name of the Amazon SageMaker ``Model`` to create. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs @@ -1177,6 +1178,7 @@ def logs_for_job(self, job_name, wait=False, poll=10): # noqa: C901 - suppress def container_def(image, model_data_url=None, env=None): """Create a definition for executing a container as part of a SageMaker model. + Args: image (str): Docker image to run for this container. model_data_url (str): S3 URI of data required by this container, @@ -1286,19 +1288,22 @@ def __init__(self, s3_data, distribution='FullyReplicated', compression=None, ``s3_data`` will be used to train. If 'ManifestFile' or 'AugmentedManifestFile', then ``s3_data`` defines a single s3 manifest file or augmented manifest file (respectively), listing the s3 data to train on. Both the ManifestFile and AugmentedManifestFile formats are described in the SageMaker API - documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/API_S3DataSource.html + documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/API_S3DataSource.html input_mode (str): Optional override for this channel's input mode (default: None). By default, channels will use the input mode defined on ``sagemaker.estimator.EstimatorBase.input_mode``, but they will ignore that setting if this parameter is set. - * None - Amazon SageMaker will use the input mode specified in the ``Estimator``. - * 'File' - Amazon SageMaker copies the training dataset from the S3 location to a local directory. - * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. + + * None - Amazon SageMaker will use the input mode specified in the ``Estimator``. + * 'File' - Amazon SageMaker copies the training dataset from the S3 location to a local directory. + * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. + attribute_names (list[str]): A list of one or more attribute names to use that are found in a specified AugmentedManifestFile. shuffle_config (ShuffleConfig): If specified this configuration enables shuffling on this channel. See the SageMaker API documentation for more info: https://docs.aws.amazon.com/sagemaker/latest/dg/API_ShuffleConfig.html """ + self.config = { 'DataSource': { 'S3DataSource': { diff --git a/src/sagemaker/sparkml/model.py b/src/sagemaker/sparkml/model.py index 3d9c1ebe98..022a054024 100644 --- a/src/sagemaker/sparkml/model.py +++ b/src/sagemaker/sparkml/model.py @@ -37,6 +37,7 @@ def __init__(self, endpoint, sagemaker_session=None): Initializes a SparkMLPredictor which should be used with SparkMLModel to perform predictions against SparkML models serialized via MLeap. The response is returned in text/csv format which is the default response format for SparkML Serving container. + Args: endpoint (str): The name of the endpoint to perform inference on. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with @@ -57,6 +58,7 @@ class SparkMLModel(Model): def __init__(self, model_data, role=None, spark_version=2.2, sagemaker_session=None, **kwargs): """Initialize a SparkMLModel.. + Args: model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. For SparkML, this will be the output that has been produced by the Spark job after serializing the Model via MLeap. diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 0fee94ff2f..2f2a817175 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -177,6 +177,7 @@ def __init__(self, training_steps=None, evaluation_steps=None, checkpoint_path=N framework_version=None, model_dir=None, requirements_file='', image_name=None, script_mode=False, distributions=None, **kwargs): """Initialize an ``TensorFlow`` estimator. + Args: training_steps (int): Perform this many steps of training. `None`, the default means train forever. evaluation_steps (int): Perform this many steps of evaluation. `None`, the default means that evaluation @@ -195,26 +196,36 @@ def __init__(self, training_steps=None, evaluation_steps=None, checkpoint_path=N image_name (str): If specified, the estimator will use this image for training and hosting, instead of selecting the appropriate SageMaker official image based on framework_version and py_version. It can be an ECR url or dockerhub image and tag. - Examples: - 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 - custom-image:latest. + + Examples: + 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 + custom-image:latest. script_mode (bool): If set to True will the estimator will use the Script Mode containers (default: False). This will be ignored if py_version is set to 'py3'. distributions (dict): A dictionary with information on how to run distributed training - (default: None). Currently we support distributed training with parameter servers and MPI. To enable - parameter server use the following setup: + (default: None). Currently we support distributed training with parameter servers and MPI. + To enable parameter server use the following setup: + + .. code:: python + + { 'parameter_server': { 'enabled': True } } + To enable MPI: + + .. code:: python + { 'mpi': { 'enabled': True } } + **kwargs: Additional kwargs passed to the Framework constructor. """ if framework_version is None: @@ -281,13 +292,15 @@ def fit(self, inputs=None, wait=True, logs=True, job_name=None, run_tensorboard_ Args: inputs (str or dict or sagemaker.session.s3_input): Information about the training data. This can be one of three types: - (str) - the S3 location where training data is saved. - (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for + + * (str) - the S3 location where training data is saved. + * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.session.s3_input` objects. - (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide + * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide additional information as well as the path to the training dataset. See :func:`sagemaker.session.s3_input` for full details. + wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. Only meaningful when wait is True (default: True). diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py index 346eb23f7c..162cd5360b 100644 --- a/src/sagemaker/tensorflow/model.py +++ b/src/sagemaker/tensorflow/model.py @@ -21,9 +21,9 @@ class TensorFlowPredictor(RealTimePredictor): - """A ``RealTimePredictor`` for inference against TensorFlow ``Endpoint``s. + """A ``RealTimePredictor`` for inference against TensorFlow ``Endpoint``. - This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for MXNet + This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for inference""" def __init__(self, endpoint_name, sagemaker_session=None): """Initialize an ``TensorFlowPredictor``. diff --git a/src/sagemaker/workflow/airflow.py b/src/sagemaker/workflow/airflow.py index 5babc8d8c3..b38140148d 100644 --- a/src/sagemaker/workflow/airflow.py +++ b/src/sagemaker/workflow/airflow.py @@ -447,8 +447,7 @@ def model_config_from_estimator(instance_type, estimator, task_id, task_type, ro Returns: dict: Model config that can be directly used by SageMakerModelOperator in Airflow. It can also be part - of the config used by SageMakerEndpointOperator. - SageMakerTransformOperator in Airflow. + of the config used by SageMakerEndpointOperator in Airflow. """ update_estimator_from_task(estimator, task_id, task_type) if isinstance(estimator, sagemaker.estimator.Estimator): From a06c48b5cda6a158867377023f131db335bba194 Mon Sep 17 00:00:00 2001 From: Nadia Yakimakha <32335935+nadiaya@users.noreply.github.com> Date: Wed, 30 Jan 2019 23:19:08 -0800 Subject: [PATCH 2/4] Fix typo. --- doc/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index 303624a099..f6538642e4 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -32,7 +32,6 @@ def __getattr__(cls, name): 'tensorflow.python.framework', 'tensorflow_serving', 'tensorflow_serving.apis', 'numpy', 'scipy', 'scipy.sparse'] sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) -sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) project = u'sagemaker' version = pkg_resources.require(project)[0].version From 1e9ebf158b0513cc9381c07d3f57355d25cdfcda Mon Sep 17 00:00:00 2001 From: Nadia Yakimakha <32335935+nadiaya@users.noreply.github.com> Date: Thu, 31 Jan 2019 10:56:06 -0800 Subject: [PATCH 3/4] Address PR comments. --- src/sagemaker/sparkml/model.py | 2 +- src/sagemaker/tensorflow/estimator.py | 2 +- src/sagemaker/tensorflow/model.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/sparkml/model.py b/src/sagemaker/sparkml/model.py index 022a054024..c95efef38a 100644 --- a/src/sagemaker/sparkml/model.py +++ b/src/sagemaker/sparkml/model.py @@ -57,7 +57,7 @@ class SparkMLModel(Model): """ def __init__(self, model_data, role=None, spark_version=2.2, sagemaker_session=None, **kwargs): - """Initialize a SparkMLModel.. + """Initialize a SparkMLModel. Args: model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. For SparkML, this will be the diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 2f2a817175..3715daae22 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -176,7 +176,7 @@ class TensorFlow(Framework): def __init__(self, training_steps=None, evaluation_steps=None, checkpoint_path=None, py_version='py2', framework_version=None, model_dir=None, requirements_file='', image_name=None, script_mode=False, distributions=None, **kwargs): - """Initialize an ``TensorFlow`` estimator. + """Initialize a ``TensorFlow`` estimator. Args: training_steps (int): Perform this many steps of training. `None`, the default means train forever. diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py index 162cd5360b..88b7690df8 100644 --- a/src/sagemaker/tensorflow/model.py +++ b/src/sagemaker/tensorflow/model.py @@ -21,7 +21,7 @@ class TensorFlowPredictor(RealTimePredictor): - """A ``RealTimePredictor`` for inference against TensorFlow ``Endpoint``. + """A ``RealTimePredictor`` for inference against TensorFlow endpoint. This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for inference""" From b862e953449ad299d80b086629829c42a39c454c Mon Sep 17 00:00:00 2001 From: Nadia Yakimakha <32335935+nadiaya@users.noreply.github.com> Date: Thu, 31 Jan 2019 11:19:07 -0800 Subject: [PATCH 4/4] Remove the line. --- src/sagemaker/amazon/knn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sagemaker/amazon/knn.py b/src/sagemaker/amazon/knn.py index 0340f13347..0951b732d6 100644 --- a/src/sagemaker/amazon/knn.py +++ b/src/sagemaker/amazon/knn.py @@ -109,7 +109,6 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. """ - return KNNModel(self.model_data, self.role, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override))