Skip to content

Commit f791330

Browse files
Merge branch 'master' into raj-bug-fix
2 parents 86e5dae + f939f89 commit f791330

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+1684
-1105
lines changed

CHANGELOG.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,24 @@
11
# Changelog
22

3+
## v2.110.0 (2022-09-27)
4+
5+
### Features
6+
7+
* Support KeepAlivePeriodInSeconds for Training APIs
8+
* added ANALYSIS_CONFIG_SCHEMA_V1_0 in clarify
9+
* add model monitor image accounts for ap-southeast-3
10+
11+
### Bug Fixes and Other Changes
12+
13+
* huggingface release test
14+
* Fixing the logic to return instanceCount for heterogeneousClusters
15+
* Disable type hints in doc signature and add PipelineVariable annotations in docstring
16+
* estimator hyperparameters in script mode
17+
18+
### Documentation Changes
19+
20+
* Added link to example notebook for Pipelines local mode
21+
322
## v2.109.0 (2022-09-09)
423

524
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.109.1.dev0
1+
2.110.1.dev0

doc/conf.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,13 @@
9696
# Example configuration for intersphinx: refer to the Python standard library.
9797
intersphinx_mapping = {"http://docs.python.org/": None}
9898

99+
# -- Options for autodoc ----------------------------------------------------
100+
# https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#configuration
101+
102+
# Automatically extract typehints when specified and place them in
103+
# descriptions of the relevant function/method.
104+
autodoc_typehints = "description"
105+
99106
# autosummary
100107
autosummary_generate = True
101108

doc/overview.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,6 +1565,8 @@ For detailed examples of running Docker in local mode, see:
15651565
- `TensorFlow local mode example notebook <https://github.com/awslabs/amazon-sagemaker-examples/blob/master/sagemaker-python-sdk/tensorflow_script_mode_using_shell_commands/tensorflow_script_mode_using_shell_commands.ipynb>`__.
15661566
- `MXNet local mode example notebook <https://github.com/awslabs/amazon-sagemaker-examples/blob/master/sagemaker-python-sdk/mxnet_gluon_mnist/mxnet_mnist_with_gluon_local_mode.ipynb>`__.
15671567
- `PyTorch local mode example notebook <https://github.com/awslabs/amazon-sagemaker-examples/blob/master/sagemaker-python-sdk/pytorch_cnn_cifar10/pytorch_local_mode_cifar10.ipynb>`__.
1568+
- `Pipelines local mode example notebook <https://github.com/aws/amazon-sagemaker-examples/blob/main/sagemaker-pipelines/tabular/local-mode/sagemaker-pipelines-local-mode.ipynb>`__.
1569+
15681570
15691571
You can also find these notebooks in the **SageMaker Python SDK** section of the **SageMaker Examples** section in a notebook instance.
15701572
For information about using sample notebooks in a SageMaker notebook instance, see `Use Example Notebooks <https://docs.aws.amazon.com/sagemaker/latest/dg/howitworks-nbexamples.html>`__

doc/workflows/pipelines/sagemaker.workflow.pipelines.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ Entities
4646

4747
.. autoclass:: sagemaker.workflow.entities.Expression
4848

49+
.. autoclass:: sagemaker.workflow.entities.PipelineVariable
50+
4951
Execution Variables
5052
-------------------
5153

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def read_requirements(filename):
5858
"packaging>=20.0",
5959
"pandas",
6060
"pathos",
61+
"schema",
6162
]
6263

6364
# Specific use case dependencies

src/sagemaker/amazon/amazon_estimator.py

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import json
1717
import logging
1818
import tempfile
19-
from typing import Union
19+
from typing import Union, Optional, Dict
2020

2121
from six.moves.urllib.parse import urlparse
2222

@@ -30,6 +30,7 @@
3030
from sagemaker.utils import sagemaker_timestamp
3131
from sagemaker.workflow.entities import PipelineVariable
3232
from sagemaker.workflow.pipeline_context import runnable_by_pipeline
33+
from sagemaker.workflow import is_pipeline_variable
3334

3435
logger = logging.getLogger(__name__)
3536

@@ -40,18 +41,20 @@ class AmazonAlgorithmEstimatorBase(EstimatorBase):
4041
This class isn't intended to be instantiated directly.
4142
"""
4243

43-
feature_dim = hp("feature_dim", validation.gt(0), data_type=int)
44-
mini_batch_size = hp("mini_batch_size", validation.gt(0), data_type=int)
45-
repo_name = None
46-
repo_version = None
44+
feature_dim: hp = hp("feature_dim", validation.gt(0), data_type=int)
45+
mini_batch_size: hp = hp("mini_batch_size", validation.gt(0), data_type=int)
46+
repo_name: Optional[str] = None
47+
repo_version: Optional[str] = None
48+
49+
DEFAULT_MINI_BATCH_SIZE: Optional[int] = None
4750

4851
def __init__(
4952
self,
50-
role,
51-
instance_count=None,
52-
instance_type=None,
53-
data_location=None,
54-
enable_network_isolation=False,
53+
role: str,
54+
instance_count: Optional[Union[int, PipelineVariable]] = None,
55+
instance_type: Optional[Union[str, PipelineVariable]] = None,
56+
data_location: Optional[str] = None,
57+
enable_network_isolation: Union[bool, PipelineVariable] = False,
5558
**kwargs
5659
):
5760
"""Initialize an AmazonAlgorithmEstimatorBase.
@@ -62,16 +65,16 @@ def __init__(
6265
endpoints use this role to access training data and model
6366
artifacts. After the endpoint is created, the inference code
6467
might use the IAM role, if it needs to access an AWS resource.
65-
instance_count (int): Number of Amazon EC2 instances to use
68+
instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
6669
for training. Required.
67-
instance_type (str): Type of EC2 instance to use for training,
70+
instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
6871
for example, 'ml.c4.xlarge'. Required.
6972
data_location (str or None): The s3 prefix to upload RecordSet
7073
objects to, expressed as an S3 url. For example
7174
"s3://example-bucket/some-key-prefix/". Objects will be saved in
7275
a unique sub-directory of the specified location. If None, a
7376
default data location will be used.
74-
enable_network_isolation (bool): Specifies whether container will
77+
enable_network_isolation (bool or PipelineVariable): Specifies whether container will
7578
run in network isolation mode. Network isolation mode restricts
7679
the container access to outside networks (such as the internet).
7780
Also known as internet-free mode (default: ``False``).
@@ -113,8 +116,14 @@ def data_location(self):
113116
return self._data_location
114117

115118
@data_location.setter
116-
def data_location(self, data_location):
119+
def data_location(self, data_location: str):
117120
"""Placeholder docstring"""
121+
if is_pipeline_variable(data_location):
122+
raise TypeError(
123+
"Invalid input: data_location should be a plain string "
124+
"rather than a pipeline variable - ({}).".format(type(data_location))
125+
)
126+
118127
if not data_location.startswith("s3://"):
119128
raise ValueError(
120129
'Expecting an S3 URL beginning with "s3://". Got "{}"'.format(data_location)
@@ -198,12 +207,12 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None):
198207
@runnable_by_pipeline
199208
def fit(
200209
self,
201-
records,
202-
mini_batch_size=None,
203-
wait=True,
204-
logs=True,
205-
job_name=None,
206-
experiment_config=None,
210+
records: "RecordSet",
211+
mini_batch_size: Optional[int] = None,
212+
wait: bool = True,
213+
logs: bool = True,
214+
job_name: Optional[str] = None,
215+
experiment_config: Optional[Dict[str, str]] = None,
207216
):
208217
"""Fit this Estimator on serialized Record objects, stored in S3.
209218
@@ -301,6 +310,20 @@ def record_set(self, train, labels=None, channel="train", encrypt=False):
301310
channel=channel,
302311
)
303312

313+
def _get_default_mini_batch_size(self, num_records: int):
314+
"""Generate the default mini_batch_size"""
315+
if is_pipeline_variable(self.instance_count):
316+
logger.warning(
317+
"mini_batch_size is not given in .fit() and instance_count is a "
318+
"pipeline variable (%s) which is only interpreted in pipeline execution time. "
319+
"Thus setting mini_batch_size to 1, since it can't be greater than "
320+
"number of records per instance_count, otherwise the training job fails.",
321+
type(self.instance_count),
322+
)
323+
return 1
324+
325+
return min(self.DEFAULT_MINI_BATCH_SIZE, max(1, int(num_records / self.instance_count)))
326+
304327

305328
class RecordSet(object):
306329
"""Placeholder docstring"""
@@ -316,16 +339,16 @@ def __init__(
316339
"""A collection of Amazon :class:~`Record` objects serialized and stored in S3.
317340
318341
Args:
319-
s3_data (str): The S3 location of the training data
342+
s3_data (str or PipelineVariable): The S3 location of the training data
320343
num_records (int): The number of records in the set.
321344
feature_dim (int): The dimensionality of "values" arrays in the
322345
Record features, and label (if each Record is labeled).
323-
s3_data_type (str): Valid values: 'S3Prefix', 'ManifestFile'. If
324-
'S3Prefix', ``s3_data`` defines a prefix of s3 objects to train
346+
s3_data_type (str or PipelineVariable): Valid values: 'S3Prefix', 'ManifestFile'.
347+
If 'S3Prefix', ``s3_data`` defines a prefix of s3 objects to train
325348
on. All objects with s3 keys beginning with ``s3_data`` will be
326349
used to train. If 'ManifestFile', then ``s3_data`` defines a
327350
single s3 manifest file, listing each s3 object to train on.
328-
channel (str): The SageMaker Training Job channel this RecordSet
351+
channel (str or PipelineVariable): The SageMaker Training Job channel this RecordSet
329352
should be bound to
330353
"""
331354
self.s3_data = s3_data
@@ -461,7 +484,7 @@ def upload_numpy_to_s3_shards(
461484
raise ex
462485

463486

464-
def get_image_uri(region_name, repo_name, repo_version=1):
487+
def get_image_uri(region_name, repo_name, repo_version="1"):
465488
"""Deprecated method. Please use sagemaker.image_uris.retrieve().
466489
467490
Args:

0 commit comments

Comments
 (0)