From bd409b6d509f684f88039779cf970e32e5fe29b4 Mon Sep 17 00:00:00 2001 From: Ignacio Quintero Date: Mon, 22 Jan 2018 16:58:40 -0800 Subject: [PATCH 1/4] Add data_type to hyperparameters When we describe a training job the data type of the hyper parameters is lost because we use a dict[str, str]. This adds a new optional field to Hyperparameter so that we can convert the datatypes at runtime. --- .../amazon/factorization_machines.py | 48 +++++++------- src/sagemaker/amazon/hyperparameter.py | 3 +- src/sagemaker/amazon/kmeans.py | 18 ++--- src/sagemaker/amazon/linear_learner.py | 65 ++++++++++--------- src/sagemaker/amazon/pca.py | 8 +-- tests/unit/test_hyperparameter.py | 19 +++++- 6 files changed, 90 insertions(+), 71 deletions(-) diff --git a/src/sagemaker/amazon/factorization_machines.py b/src/sagemaker/amazon/factorization_machines.py index 5340fa11f0..6c1aa0e8ac 100644 --- a/src/sagemaker/amazon/factorization_machines.py +++ b/src/sagemaker/amazon/factorization_machines.py @@ -23,34 +23,34 @@ class FactorizationMachines(AmazonAlgorithmEstimatorBase): repo = 'factorization-machines:1' - num_factors = hp('num_factors', (gt(0), isint), 'An integer greater than zero') + num_factors = hp('num_factors', (gt(0), isint), 'An integer greater than zero', int) predictor_type = hp('predictor_type', isin('binary_classifier', 'regressor'), - 'Value "binary_classifier" or "regressor"') - epochs = hp('epochs', (gt(0), isint), "An integer greater than 0") - clip_gradient = hp('clip_gradient', isnumber, "A float value") - eps = hp('eps', isnumber, "A float value") - rescale_grad = hp('rescale_grad', isnumber, "A float value") - bias_lr = hp('bias_lr', (ge(0), isnumber), "A non-negative float") - linear_lr = hp('linear_lr', (ge(0), isnumber), "A non-negative float") - factors_lr = hp('factors_lr', (ge(0), isnumber), "A non-negative float") - bias_wd = hp('bias_wd', (ge(0), isnumber), "A non-negative float") - linear_wd = hp('linear_wd', (ge(0), isnumber), "A non-negative float") - factors_wd = hp('factors_wd', (ge(0), isnumber), "A non-negative float") + 'Value "binary_classifier" or "regressor"', str) + epochs = hp('epochs', (gt(0), isint), "An integer greater than 0", int) + clip_gradient = hp('clip_gradient', isnumber, "A float value", float) + eps = hp('eps', isnumber, "A float value", float) + rescale_grad = hp('rescale_grad', isnumber, "A float value", float) + bias_lr = hp('bias_lr', (ge(0), isnumber), "A non-negative float", float) + linear_lr = hp('linear_lr', (ge(0), isnumber), "A non-negative float", float) + factors_lr = hp('factors_lr', (ge(0), isnumber), "A non-negative float", float) + bias_wd = hp('bias_wd', (ge(0), isnumber), "A non-negative float", float) + linear_wd = hp('linear_wd', (ge(0), isnumber), "A non-negative float", float) + factors_wd = hp('factors_wd', (ge(0), isnumber), "A non-negative float", float) bias_init_method = hp('bias_init_method', isin('normal', 'uniform', 'constant'), - 'Value "normal", "uniform" or "constant"') - bias_init_scale = hp('bias_init_scale', (ge(0), isnumber), "A non-negative float") - bias_init_sigma = hp('bias_init_sigma', (ge(0), isnumber), "A non-negative float") - bias_init_value = hp('bias_init_value', isnumber, "A float value") + 'Value "normal", "uniform" or "constant"', str) + bias_init_scale = hp('bias_init_scale', (ge(0), isnumber), "A non-negative float", float) + bias_init_sigma = hp('bias_init_sigma', (ge(0), isnumber), "A non-negative float", float) + bias_init_value = hp('bias_init_value', isnumber, "A float value", float) linear_init_method = hp('linear_init_method', isin('normal', 'uniform', 'constant'), - 'Value "normal", "uniform" or "constant"') - linear_init_scale = hp('linear_init_scale', (ge(0), isnumber), "A non-negative float") - linear_init_sigma = hp('linear_init_sigma', (ge(0), isnumber), "A non-negative float") - linear_init_value = hp('linear_init_value', isnumber, "A float value") + 'Value "normal", "uniform" or "constant"', str) + linear_init_scale = hp('linear_init_scale', (ge(0), isnumber), "A non-negative float", float) + linear_init_sigma = hp('linear_init_sigma', (ge(0), isnumber), "A non-negative float", float) + linear_init_value = hp('linear_init_value', isnumber, "A float value", float) factors_init_method = hp('factors_init_method', isin('normal', 'uniform', 'constant'), - 'Value "normal", "uniform" or "constant"') - factors_init_scale = hp('factors_init_scale', (ge(0), isnumber), "A non-negative float") - factors_init_sigma = hp('factors_init_sigma', (ge(0), isnumber), "A non-negative float") - factors_init_value = hp('factors_init_value', isnumber, "A float value") + 'Value "normal", "uniform" or "constant"', str) + factors_init_scale = hp('factors_init_scale', (ge(0), isnumber), "A non-negative float", float) + factors_init_sigma = hp('factors_init_sigma', (ge(0), isnumber), "A non-negative float", float) + factors_init_value = hp('factors_init_value', isnumber, "A float value", float) def __init__(self, role, train_instance_count, train_instance_type, num_factors, predictor_type, diff --git a/src/sagemaker/amazon/hyperparameter.py b/src/sagemaker/amazon/hyperparameter.py index 0d86191474..e34b1bcff7 100644 --- a/src/sagemaker/amazon/hyperparameter.py +++ b/src/sagemaker/amazon/hyperparameter.py @@ -16,7 +16,7 @@ class Hyperparameter(object): """An algorithm hyperparameter with optional validation. Implemented as a python descriptor object.""" - def __init__(self, name, validate=lambda _: True, validation_message=""): + def __init__(self, name, validate=lambda _: True, validation_message="", data_type=str): """Args: name (str): The name of this hyperparameter validate (callable[object]->[bool]): A validation function or list of validation functions. @@ -27,6 +27,7 @@ def __init__(self, name, validate=lambda _: True, validation_message=""): self.validation = validate self.validation_message = validation_message self.name = name + self.data_type = data_type try: iter(self.validation) except TypeError: diff --git a/src/sagemaker/amazon/kmeans.py b/src/sagemaker/amazon/kmeans.py index d3fb5d670e..bc1fb4456b 100644 --- a/src/sagemaker/amazon/kmeans.py +++ b/src/sagemaker/amazon/kmeans.py @@ -23,15 +23,15 @@ class KMeans(AmazonAlgorithmEstimatorBase): repo = 'kmeans:1' - k = hp('k', (gt(1), isint), 'An integer greater-than 1') - init_method = hp('init_method', isin('random', 'kmeans++'), 'One of "random", "kmeans++"') - max_iterations = hp('local_lloyd_max_iterations', (gt(0), isint), 'An integer greater-than 0') - tol = hp('local_lloyd_tol', (gt(0), isint), 'An integer greater-than 0') - num_trials = hp('local_lloyd_num_trials', (gt(0), isint), 'An integer greater-than 0') - local_init_method = hp('local_lloyd_init_method', isin('random', 'kmeans++'), 'One of "random", "kmeans++"') - half_life_time_size = hp('half_life_time_size', (ge(0), isint), 'An integer greater-than-or-equal-to 0') - epochs = hp('epochs', (gt(0), isint), 'An integer greater-than 0') - center_factor = hp('extra_center_factor', (gt(0), isint), 'An integer greater-than 0') + k = hp('k', (gt(1), isint), 'An integer greater-than 1', int) + init_method = hp('init_method', isin('random', 'kmeans++'), 'One of "random", "kmeans++"', str) + max_iterations = hp('local_lloyd_max_iterations', (gt(0), isint), 'An integer greater-than 0', int) + tol = hp('local_lloyd_tol', (gt(0), isint), 'An integer greater-than 0', int) + num_trials = hp('local_lloyd_num_trials', (gt(0), isint), 'An integer greater-than 0', int) + local_init_method = hp('local_lloyd_init_method', isin('random', 'kmeans++'), 'One of "random", "kmeans++"', str) + half_life_time_size = hp('half_life_time_size', (ge(0), isint), 'An integer greater-than-or-equal-to 0', int) + epochs = hp('epochs', (gt(0), isint), 'An integer greater-than 0', int) + center_factor = hp('extra_center_factor', (gt(0), isint), 'An integer greater-than 0', int) def __init__(self, role, train_instance_count, train_instance_type, k, init_method=None, max_iterations=None, tol=None, num_trials=None, local_init_method=None, diff --git a/src/sagemaker/amazon/linear_learner.py b/src/sagemaker/amazon/linear_learner.py index af336a8e6b..b78394cdad 100644 --- a/src/sagemaker/amazon/linear_learner.py +++ b/src/sagemaker/amazon/linear_learner.py @@ -27,40 +27,41 @@ class LinearLearner(AmazonAlgorithmEstimatorBase): binary_classifier_model_selection_criteria = hp('binary_classifier_model_selection_criteria', isin('accuracy', 'f1', 'precision_at_target_recall', - 'recall_at_target_precision', 'cross_entropy_loss')) - target_recall = hp('target_recall', (gt(0), lt(1)), "A float in (0,1)") - target_precision = hp('target_precision', (gt(0), lt(1)), "A float in (0,1)") - positive_example_weight_mult = hp('positive_example_weight_mult', gt(0), "A float greater than 0") - epochs = hp('epochs', (gt(0), isint), "An integer greater-than 0") + 'recall_at_target_precision', 'cross_entropy_loss'), + data_type=str) + target_recall = hp('target_recall', (gt(0), lt(1)), "A float in (0,1)", float) + target_precision = hp('target_precision', (gt(0), lt(1)), "A float in (0,1)", float) + positive_example_weight_mult = hp('positive_example_weight_mult', gt(0), "A float greater than 0", float) + epochs = hp('epochs', (gt(0), isint), "An integer greater-than 0", int) predictor_type = hp('predictor_type', isin('binary_classifier', 'regressor'), - 'One of "binary_classifier" or "regressor"') - use_bias = hp('use_bias', isbool, "Either True or False") - num_models = hp('num_models', (gt(0), isint), "An integer greater-than 0") - num_calibration_samples = hp('num_calibration_samples', (gt(0), isint), "An integer greater-than 0") - init_method = hp('init_method', isin('uniform', 'normal'), 'One of "uniform" or "normal"') - init_scale = hp('init_scale', (gt(-1), lt(1)), 'A float in (-1, 1)') - init_sigma = hp('init_sigma', (gt(0), lt(1)), 'A float in (0, 1)') - init_bias = hp('init_bias', isnumber, 'A number') - optimizer = hp('optimizer', isin('sgd', 'adam', 'auto'), 'One of "sgd", "adam" or "auto') + 'One of "binary_classifier" or "regressor"', str) + use_bias = hp('use_bias', isbool, "Either True or False", bool) + num_models = hp('num_models', (gt(0), isint), "An integer greater-than 0", int) + num_calibration_samples = hp('num_calibration_samples', (gt(0), isint), "An integer greater-than 0", int) + init_method = hp('init_method', isin('uniform', 'normal'), 'One of "uniform" or "normal"', str) + init_scale = hp('init_scale', (gt(-1), lt(1)), 'A float in (-1, 1)', float) + init_sigma = hp('init_sigma', (gt(0), lt(1)), 'A float in (0, 1)', float) + init_bias = hp('init_bias', isnumber, 'A number', float) + optimizer = hp('optimizer', isin('sgd', 'adam', 'auto'), 'One of "sgd", "adam" or "auto', str) loss = hp('loss', isin('logistic', 'squared_loss', 'absolute_loss', 'auto'), - '"logistic", "squared_loss", "absolute_loss" or"auto"') - wd = hp('wd', (gt(0), lt(1)), 'A float in (0,1)') - l1 = hp('l1', (gt(0), lt(1)), 'A float in (0,1)') - momentum = hp('momentum', (gt(0), lt(1)), 'A float in (0,1)') - learning_rate = hp('learning_rate', (gt(0), lt(1)), 'A float in (0,1)') - beta_1 = hp('beta_1', (gt(0), lt(1)), 'A float in (0,1)') - beta_2 = hp('beta_1', (gt(0), lt(1)), 'A float in (0,1)') - bias_lr_mult = hp('bias_lr_mult', gt(0), 'A float greater-than 0') - bias_wd_mult = hp('bias_wd_mult', gt(0), 'A float greater-than 0') - use_lr_scheduler = hp('use_lr_scheduler', isbool, 'A boolean') - lr_scheduler_step = hp('lr_scheduler_step', (gt(0), isint), 'An integer greater-than 0') - lr_scheduler_factor = hp('lr_scheduler_factor', (gt(0), lt(1)), 'A float in (0,1)') - lr_scheduler_minimum_lr = hp('lr_scheduler_minimum_lr', gt(0), 'A float greater-than 0') - normalize_data = hp('normalize_data', isbool, 'A boolean') - normalize_label = hp('normalize_label', isbool, 'A boolean') - unbias_data = hp('unbias_data', isbool, 'A boolean') - unbias_label = hp('unbias_label', isbool, 'A boolean') - num_point_for_scalar = hp('num_point_for_scalar', (isint, gt(0)), 'An integer greater-than 0') + '"logistic", "squared_loss", "absolute_loss" or"auto"', str) + wd = hp('wd', (gt(0), lt(1)), 'A float in (0,1)', float) + l1 = hp('l1', (gt(0), lt(1)), 'A float in (0,1)', float) + momentum = hp('momentum', (gt(0), lt(1)), 'A float in (0,1)', float) + learning_rate = hp('learning_rate', (gt(0), lt(1)), 'A float in (0,1)', float) + beta_1 = hp('beta_1', (gt(0), lt(1)), 'A float in (0,1)', float) + beta_2 = hp('beta_1', (gt(0), lt(1)), 'A float in (0,1)', float) + bias_lr_mult = hp('bias_lr_mult', gt(0), 'A float greater-than 0', float) + bias_wd_mult = hp('bias_wd_mult', gt(0), 'A float greater-than 0', float) + use_lr_scheduler = hp('use_lr_scheduler', isbool, 'A boolean', bool) + lr_scheduler_step = hp('lr_scheduler_step', (gt(0), isint), 'An integer greater-than 0', int) + lr_scheduler_factor = hp('lr_scheduler_factor', (gt(0), lt(1)), 'A float in (0,1)', float) + lr_scheduler_minimum_lr = hp('lr_scheduler_minimum_lr', gt(0), 'A float greater-than 0', float) + normalize_data = hp('normalize_data', isbool, 'A boolean', bool) + normalize_label = hp('normalize_label', isbool, 'A boolean', bool) + unbias_data = hp('unbias_data', isbool, 'A boolean', bool) + unbias_label = hp('unbias_label', isbool, 'A boolean', bool) + num_point_for_scalar = hp('num_point_for_scalar', (isint, gt(0)), 'An integer greater-than 0', int) def __init__(self, role, train_instance_count, train_instance_type, predictor_type='binary_classifier', binary_classifier_model_selection_criteria=None, target_recall=None, target_precision=None, diff --git a/src/sagemaker/amazon/pca.py b/src/sagemaker/amazon/pca.py index 7a23f60c7c..19271c43e8 100644 --- a/src/sagemaker/amazon/pca.py +++ b/src/sagemaker/amazon/pca.py @@ -25,13 +25,13 @@ class PCA(AmazonAlgorithmEstimatorBase): DEFAULT_MINI_BATCH_SIZE = 500 num_components = hp(name='num_components', validate=lambda x: x > 0 and isinstance(x, int), - validation_message='Value must be an integer greater than zero') + validation_message='Value must be an integer greater than zero', data_type=int) algorithm_mode = hp(name='algorithm_mode', validate=lambda x: x in ['regular', 'stable', 'randomized'], - validation_message='Value must be one of "regular", "stable", "randomized"') + validation_message='Value must be one of "regular", "stable", "randomized"', data_type=str) subtract_mean = hp(name='subtract_mean', validate=lambda x: isinstance(x, bool), - validation_message='Value must be a boolean') + validation_message='Value must be a boolean', data_type=bool) extra_components = hp(name='extra_components', validate=lambda x: x >= 0 and isinstance(x, int), - validation_message="Value must be an integer greater than or equal to 0") + validation_message="Value must be an integer greater than or equal to 0", data_type=int) def __init__(self, role, train_instance_count, train_instance_type, num_components, algorithm_mode=None, subtract_mean=None, extra_components=None, **kwargs): diff --git a/tests/unit/test_hyperparameter.py b/tests/unit/test_hyperparameter.py index c168f3275e..69fc248e53 100644 --- a/tests/unit/test_hyperparameter.py +++ b/tests/unit/test_hyperparameter.py @@ -18,7 +18,7 @@ class Test(object): blank = Hyperparameter(name="some-name") elizabeth = Hyperparameter(name='elizabeth') - validated = Hyperparameter(name="validated", validate=lambda value: value > 55) + validated = Hyperparameter(name="validated", validate=lambda value: value > 55, data_type=int) def test_blank_access(): @@ -55,3 +55,20 @@ def test_validated(): x.validated = 66 with pytest.raises(ValueError): x.validated = 23 + + +def test_data_type(): + x = Test() + x.validated = 66 + assert type(x.validated) == Test.__dict__["validated"].data_type + + +def test_from_string(): + x = Test() + value = 65 + + x.validated = value + from_api = str(value) + + x.validated = Test.__dict__["validated"].data_type(from_api) + assert x.validated == value From 6834806cabe4bf2da3cf7949d81bb84303ee765b Mon Sep 17 00:00:00 2001 From: Ignacio Quintero Date: Tue, 23 Jan 2018 14:25:17 -0800 Subject: [PATCH 2/4] Enforce a HP Type when setting its value. instead of validating with isinstance() cast the hp value to the type it is meant to be. This enforces a "strongly typed" value. When we deserialize from the API string responses it becomes easier to deal with too. --- src/sagemaker/amazon/amazon_estimator.py | 4 +- .../amazon/factorization_machines.py | 42 +++++++++---------- src/sagemaker/amazon/hyperparameter.py | 4 +- src/sagemaker/amazon/kmeans.py | 16 +++---- src/sagemaker/amazon/linear_learner.py | 24 +++++------ src/sagemaker/amazon/pca.py | 7 ++-- 6 files changed, 49 insertions(+), 48 deletions(-) diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 9ed28c1894..9fbdb8b631 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -28,8 +28,8 @@ class AmazonAlgorithmEstimatorBase(EstimatorBase): """Base class for Amazon first-party Estimator implementations. This class isn't intended to be instantiated directly.""" - feature_dim = hp('feature_dim', (validation.isint, validation.gt(0))) - mini_batch_size = hp('mini_batch_size', (validation.isint, validation.gt(0))) + feature_dim = hp('feature_dim', validation.gt(0), data_type=int) + mini_batch_size = hp('mini_batch_size', validation.gt(0), data_type=int) def __init__(self, role, train_instance_count, train_instance_type, data_location=None, **kwargs): """Initialize an AmazonAlgorithmEstimatorBase. diff --git a/src/sagemaker/amazon/factorization_machines.py b/src/sagemaker/amazon/factorization_machines.py index 6c1aa0e8ac..66972316ac 100644 --- a/src/sagemaker/amazon/factorization_machines.py +++ b/src/sagemaker/amazon/factorization_machines.py @@ -13,7 +13,7 @@ from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa -from sagemaker.amazon.validation import gt, isin, isint, ge, isnumber +from sagemaker.amazon.validation import gt, isin, ge from sagemaker.predictor import RealTimePredictor from sagemaker.model import Model from sagemaker.session import Session @@ -23,34 +23,34 @@ class FactorizationMachines(AmazonAlgorithmEstimatorBase): repo = 'factorization-machines:1' - num_factors = hp('num_factors', (gt(0), isint), 'An integer greater than zero', int) + num_factors = hp('num_factors', gt(0), 'An integer greater than zero', int) predictor_type = hp('predictor_type', isin('binary_classifier', 'regressor'), 'Value "binary_classifier" or "regressor"', str) - epochs = hp('epochs', (gt(0), isint), "An integer greater than 0", int) - clip_gradient = hp('clip_gradient', isnumber, "A float value", float) - eps = hp('eps', isnumber, "A float value", float) - rescale_grad = hp('rescale_grad', isnumber, "A float value", float) - bias_lr = hp('bias_lr', (ge(0), isnumber), "A non-negative float", float) - linear_lr = hp('linear_lr', (ge(0), isnumber), "A non-negative float", float) - factors_lr = hp('factors_lr', (ge(0), isnumber), "A non-negative float", float) - bias_wd = hp('bias_wd', (ge(0), isnumber), "A non-negative float", float) - linear_wd = hp('linear_wd', (ge(0), isnumber), "A non-negative float", float) - factors_wd = hp('factors_wd', (ge(0), isnumber), "A non-negative float", float) + epochs = hp('epochs', gt(0), "An integer greater than 0", int) + clip_gradient = hp('clip_gradient', (), "A float value", float) + eps = hp('eps', (), "A float value", float) + rescale_grad = hp('rescale_grad', (), "A float value", float) + bias_lr = hp('bias_lr', ge(0), "A non-negative float", float) + linear_lr = hp('linear_lr', ge(0), "A non-negative float", float) + factors_lr = hp('factors_lr', ge(0), "A non-negative float", float) + bias_wd = hp('bias_wd', ge(0), "A non-negative float", float) + linear_wd = hp('linear_wd', ge(0), "A non-negative float", float) + factors_wd = hp('factors_wd', ge(0), "A non-negative float", float) bias_init_method = hp('bias_init_method', isin('normal', 'uniform', 'constant'), 'Value "normal", "uniform" or "constant"', str) - bias_init_scale = hp('bias_init_scale', (ge(0), isnumber), "A non-negative float", float) - bias_init_sigma = hp('bias_init_sigma', (ge(0), isnumber), "A non-negative float", float) - bias_init_value = hp('bias_init_value', isnumber, "A float value", float) + bias_init_scale = hp('bias_init_scale', ge(0), "A non-negative float", float) + bias_init_sigma = hp('bias_init_sigma', ge(0), "A non-negative float", float) + bias_init_value = hp('bias_init_value', (), "A float value", float) linear_init_method = hp('linear_init_method', isin('normal', 'uniform', 'constant'), 'Value "normal", "uniform" or "constant"', str) - linear_init_scale = hp('linear_init_scale', (ge(0), isnumber), "A non-negative float", float) - linear_init_sigma = hp('linear_init_sigma', (ge(0), isnumber), "A non-negative float", float) - linear_init_value = hp('linear_init_value', isnumber, "A float value", float) + linear_init_scale = hp('linear_init_scale', ge(0), "A non-negative float", float) + linear_init_sigma = hp('linear_init_sigma', ge(0), "A non-negative float", float) + linear_init_value = hp('linear_init_value', (), "A float value", float) factors_init_method = hp('factors_init_method', isin('normal', 'uniform', 'constant'), 'Value "normal", "uniform" or "constant"', str) - factors_init_scale = hp('factors_init_scale', (ge(0), isnumber), "A non-negative float", float) - factors_init_sigma = hp('factors_init_sigma', (ge(0), isnumber), "A non-negative float", float) - factors_init_value = hp('factors_init_value', isnumber, "A float value", float) + factors_init_scale = hp('factors_init_scale', ge(0), "A non-negative float", float) + factors_init_sigma = hp('factors_init_sigma', ge(0), "A non-negative float", float) + factors_init_value = hp('factors_init_value', (), "A float value", float) def __init__(self, role, train_instance_count, train_instance_type, num_factors, predictor_type, diff --git a/src/sagemaker/amazon/hyperparameter.py b/src/sagemaker/amazon/hyperparameter.py index e34b1bcff7..5607194659 100644 --- a/src/sagemaker/amazon/hyperparameter.py +++ b/src/sagemaker/amazon/hyperparameter.py @@ -36,9 +36,10 @@ def __init__(self, name, validate=lambda _: True, validation_message="", data_ty def validate(self, value): if value is None: # We allow assignment from None, but Nones are not sent to training. return + for valid in self.validation: if not valid(value): - error_message = "Invalid hyperparameter value {}".format(value) + error_message = "Invalid hyperparameter value {} for {}".format(value, self.name) if self.validation_message: error_message = error_message + ". Expecting: " + self.validation_message raise ValueError(error_message) @@ -51,6 +52,7 @@ def __get__(self, obj, objtype): def __set__(self, obj, value): """Validate the supplied value and set this hyperparameter to value""" + value = self.data_type(value) self.validate(value) if '_hyperparameters' not in dir(obj): obj._hyperparameters = dict() diff --git a/src/sagemaker/amazon/kmeans.py b/src/sagemaker/amazon/kmeans.py index bc1fb4456b..020f496d8f 100644 --- a/src/sagemaker/amazon/kmeans.py +++ b/src/sagemaker/amazon/kmeans.py @@ -13,7 +13,7 @@ from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa -from sagemaker.amazon.validation import gt, isin, isint, ge +from sagemaker.amazon.validation import gt, isin, ge from sagemaker.predictor import RealTimePredictor from sagemaker.model import Model from sagemaker.session import Session @@ -23,15 +23,15 @@ class KMeans(AmazonAlgorithmEstimatorBase): repo = 'kmeans:1' - k = hp('k', (gt(1), isint), 'An integer greater-than 1', int) + k = hp('k', gt(1), 'An integer greater-than 1', int) init_method = hp('init_method', isin('random', 'kmeans++'), 'One of "random", "kmeans++"', str) - max_iterations = hp('local_lloyd_max_iterations', (gt(0), isint), 'An integer greater-than 0', int) - tol = hp('local_lloyd_tol', (gt(0), isint), 'An integer greater-than 0', int) - num_trials = hp('local_lloyd_num_trials', (gt(0), isint), 'An integer greater-than 0', int) + max_iterations = hp('local_lloyd_max_iterations', gt(0), 'An integer greater-than 0', int) + tol = hp('local_lloyd_tol', gt(0), 'An integer greater-than 0', int) + num_trials = hp('local_lloyd_num_trials', gt(0), 'An integer greater-than 0', int) local_init_method = hp('local_lloyd_init_method', isin('random', 'kmeans++'), 'One of "random", "kmeans++"', str) - half_life_time_size = hp('half_life_time_size', (ge(0), isint), 'An integer greater-than-or-equal-to 0', int) - epochs = hp('epochs', (gt(0), isint), 'An integer greater-than 0', int) - center_factor = hp('extra_center_factor', (gt(0), isint), 'An integer greater-than 0', int) + half_life_time_size = hp('half_life_time_size', ge(0), 'An integer greater-than-or-equal-to 0', int) + epochs = hp('epochs', gt(0), 'An integer greater-than 0', int) + center_factor = hp('extra_center_factor', gt(0), 'An integer greater-than 0', int) def __init__(self, role, train_instance_count, train_instance_type, k, init_method=None, max_iterations=None, tol=None, num_trials=None, local_init_method=None, diff --git a/src/sagemaker/amazon/linear_learner.py b/src/sagemaker/amazon/linear_learner.py index b78394cdad..d59ea3eea3 100644 --- a/src/sagemaker/amazon/linear_learner.py +++ b/src/sagemaker/amazon/linear_learner.py @@ -13,7 +13,7 @@ from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa -from sagemaker.amazon.validation import isin, gt, lt, isint, isbool, isnumber +from sagemaker.amazon.validation import isin, gt, lt, isbool from sagemaker.predictor import RealTimePredictor from sagemaker.model import Model from sagemaker.session import Session @@ -32,16 +32,16 @@ class LinearLearner(AmazonAlgorithmEstimatorBase): target_recall = hp('target_recall', (gt(0), lt(1)), "A float in (0,1)", float) target_precision = hp('target_precision', (gt(0), lt(1)), "A float in (0,1)", float) positive_example_weight_mult = hp('positive_example_weight_mult', gt(0), "A float greater than 0", float) - epochs = hp('epochs', (gt(0), isint), "An integer greater-than 0", int) + epochs = hp('epochs', gt(0), "An integer greater-than 0", int) predictor_type = hp('predictor_type', isin('binary_classifier', 'regressor'), 'One of "binary_classifier" or "regressor"', str) use_bias = hp('use_bias', isbool, "Either True or False", bool) - num_models = hp('num_models', (gt(0), isint), "An integer greater-than 0", int) - num_calibration_samples = hp('num_calibration_samples', (gt(0), isint), "An integer greater-than 0", int) + num_models = hp('num_models', gt(0), "An integer greater-than 0", int) + num_calibration_samples = hp('num_calibration_samples', gt(0), "An integer greater-than 0", int) init_method = hp('init_method', isin('uniform', 'normal'), 'One of "uniform" or "normal"', str) init_scale = hp('init_scale', (gt(-1), lt(1)), 'A float in (-1, 1)', float) init_sigma = hp('init_sigma', (gt(0), lt(1)), 'A float in (0, 1)', float) - init_bias = hp('init_bias', isnumber, 'A number', float) + init_bias = hp('init_bias', (), 'A number', float) optimizer = hp('optimizer', isin('sgd', 'adam', 'auto'), 'One of "sgd", "adam" or "auto', str) loss = hp('loss', isin('logistic', 'squared_loss', 'absolute_loss', 'auto'), '"logistic", "squared_loss", "absolute_loss" or"auto"', str) @@ -53,15 +53,15 @@ class LinearLearner(AmazonAlgorithmEstimatorBase): beta_2 = hp('beta_1', (gt(0), lt(1)), 'A float in (0,1)', float) bias_lr_mult = hp('bias_lr_mult', gt(0), 'A float greater-than 0', float) bias_wd_mult = hp('bias_wd_mult', gt(0), 'A float greater-than 0', float) - use_lr_scheduler = hp('use_lr_scheduler', isbool, 'A boolean', bool) - lr_scheduler_step = hp('lr_scheduler_step', (gt(0), isint), 'An integer greater-than 0', int) + use_lr_scheduler = hp('use_lr_scheduler', (), 'A boolean', bool) + lr_scheduler_step = hp('lr_scheduler_step', gt(0), 'An integer greater-than 0', int) lr_scheduler_factor = hp('lr_scheduler_factor', (gt(0), lt(1)), 'A float in (0,1)', float) lr_scheduler_minimum_lr = hp('lr_scheduler_minimum_lr', gt(0), 'A float greater-than 0', float) - normalize_data = hp('normalize_data', isbool, 'A boolean', bool) - normalize_label = hp('normalize_label', isbool, 'A boolean', bool) - unbias_data = hp('unbias_data', isbool, 'A boolean', bool) - unbias_label = hp('unbias_label', isbool, 'A boolean', bool) - num_point_for_scalar = hp('num_point_for_scalar', (isint, gt(0)), 'An integer greater-than 0', int) + normalize_data = hp('normalize_data', (), 'A boolean', bool) + normalize_label = hp('normalize_label', (), 'A boolean', bool) + unbias_data = hp('unbias_data', (), 'A boolean', bool) + unbias_label = hp('unbias_label', (), 'A boolean', bool) + num_point_for_scalar = hp('num_point_for_scalar', gt(0), 'An integer greater-than 0', int) def __init__(self, role, train_instance_count, train_instance_type, predictor_type='binary_classifier', binary_classifier_model_selection_criteria=None, target_recall=None, target_precision=None, diff --git a/src/sagemaker/amazon/pca.py b/src/sagemaker/amazon/pca.py index 19271c43e8..48f99c04d1 100644 --- a/src/sagemaker/amazon/pca.py +++ b/src/sagemaker/amazon/pca.py @@ -24,13 +24,12 @@ class PCA(AmazonAlgorithmEstimatorBase): DEFAULT_MINI_BATCH_SIZE = 500 - num_components = hp(name='num_components', validate=lambda x: x > 0 and isinstance(x, int), + num_components = hp(name='num_components', validate=lambda x: x > 0, validation_message='Value must be an integer greater than zero', data_type=int) algorithm_mode = hp(name='algorithm_mode', validate=lambda x: x in ['regular', 'stable', 'randomized'], validation_message='Value must be one of "regular", "stable", "randomized"', data_type=str) - subtract_mean = hp(name='subtract_mean', validate=lambda x: isinstance(x, bool), - validation_message='Value must be a boolean', data_type=bool) - extra_components = hp(name='extra_components', validate=lambda x: x >= 0 and isinstance(x, int), + subtract_mean = hp(name='subtract_mean', validation_message='Value must be a boolean', data_type=bool) + extra_components = hp(name='extra_components', validate=lambda x: x >= 0, validation_message="Value must be an integer greater than or equal to 0", data_type=int) def __init__(self, role, train_instance_count, train_instance_type, num_components, From c50b06a75c08c8a73af230751854ec8f60d9e7c7 Mon Sep 17 00:00:00 2001 From: Ignacio Quintero Date: Tue, 23 Jan 2018 16:50:46 -0800 Subject: [PATCH 3/4] Fix broken unit tests My previous commit broke a couple of unit tests. This fixes them. --- src/sagemaker/amazon/hyperparameter.py | 2 +- tests/unit/test_hyperparameter.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/amazon/hyperparameter.py b/src/sagemaker/amazon/hyperparameter.py index 5607194659..a3ac76367c 100644 --- a/src/sagemaker/amazon/hyperparameter.py +++ b/src/sagemaker/amazon/hyperparameter.py @@ -52,7 +52,7 @@ def __get__(self, obj, objtype): def __set__(self, obj, value): """Validate the supplied value and set this hyperparameter to value""" - value = self.data_type(value) + value = None if value is None else self.data_type(value) self.validate(value) if '_hyperparameters' not in dir(obj): obj._hyperparameters = dict() diff --git a/tests/unit/test_hyperparameter.py b/tests/unit/test_hyperparameter.py index 69fc248e53..db7ed3f64c 100644 --- a/tests/unit/test_hyperparameter.py +++ b/tests/unit/test_hyperparameter.py @@ -16,7 +16,7 @@ class Test(object): - blank = Hyperparameter(name="some-name") + blank = Hyperparameter(name="some-name", data_type=int) elizabeth = Hyperparameter(name='elizabeth') validated = Hyperparameter(name="validated", validate=lambda value: value > 55, data_type=int) @@ -70,5 +70,5 @@ def test_from_string(): x.validated = value from_api = str(value) - x.validated = Test.__dict__["validated"].data_type(from_api) + x.validated = from_api assert x.validated == value From 59ac007315b47d9df1db99b17dc6102c747f5785 Mon Sep 17 00:00:00 2001 From: Ignacio Quintero Date: Wed, 24 Jan 2018 10:40:07 -0800 Subject: [PATCH 4/4] Remove unused validation functions isint() isbool() etc are no longer used. --- src/sagemaker/amazon/linear_learner.py | 4 ++-- src/sagemaker/amazon/validation.py | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/sagemaker/amazon/linear_learner.py b/src/sagemaker/amazon/linear_learner.py index d59ea3eea3..d1d9dd6cb8 100644 --- a/src/sagemaker/amazon/linear_learner.py +++ b/src/sagemaker/amazon/linear_learner.py @@ -13,7 +13,7 @@ from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa -from sagemaker.amazon.validation import isin, gt, lt, isbool +from sagemaker.amazon.validation import isin, gt, lt from sagemaker.predictor import RealTimePredictor from sagemaker.model import Model from sagemaker.session import Session @@ -35,7 +35,7 @@ class LinearLearner(AmazonAlgorithmEstimatorBase): epochs = hp('epochs', gt(0), "An integer greater-than 0", int) predictor_type = hp('predictor_type', isin('binary_classifier', 'regressor'), 'One of "binary_classifier" or "regressor"', str) - use_bias = hp('use_bias', isbool, "Either True or False", bool) + use_bias = hp('use_bias', (), "Either True or False", bool) num_models = hp('num_models', gt(0), "An integer greater-than 0", int) num_calibration_samples = hp('num_calibration_samples', gt(0), "An integer greater-than 0", int) init_method = hp('init_method', isin('uniform', 'normal'), 'One of "uniform" or "normal"', str) diff --git a/src/sagemaker/amazon/validation.py b/src/sagemaker/amazon/validation.py index ff3259be8f..ede48cc9b3 100644 --- a/src/sagemaker/amazon/validation.py +++ b/src/sagemaker/amazon/validation.py @@ -10,7 +10,6 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -import numbers def gt(minimum): @@ -41,8 +40,3 @@ def istype(expected): def validate(value): return isinstance(value, expected) return validate - - -isint = istype(int) -isbool = istype(bool) -isnumber = istype(numbers.Number) # noqa