diff --git a/evalml/pipelines/components/estimators/classifiers/catboost_classifier.py b/evalml/pipelines/components/estimators/classifiers/catboost_classifier.py index e6286c8e9e..78834a4b5e 100644 --- a/evalml/pipelines/components/estimators/classifiers/catboost_classifier.py +++ b/evalml/pipelines/components/estimators/classifiers/catboost_classifier.py @@ -1,4 +1,5 @@ import copy +import warnings import numpy as np import pandas as pd @@ -31,12 +32,17 @@ class CatBoostClassifier(Estimator): SEED_MIN = 0 SEED_MAX = SEED_BOUNDS.max_bound - def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, random_state=0, **kwargs): + def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, silent=True, + random_state=0, **kwargs): random_seed = get_random_seed(random_state, self.SEED_MIN, self.SEED_MAX) parameters = {"n_estimators": n_estimators, "eta": eta, "max_depth": max_depth, - 'bootstrap_type': bootstrap_type} + 'bootstrap_type': bootstrap_type, + 'silent': silent} + if kwargs.get('allow_writing_files', False): + warnings.warn("Parameter allow_writing_files is being set to False in CatBoostClassifier") + kwargs["allow_writing_files"] = False parameters.update(kwargs) cb_error_msg = "catboost is not installed. Please install using `pip install catboost.`" @@ -47,9 +53,7 @@ def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, if bootstrap_type is None: cb_parameters.pop('bootstrap_type') cb_classifier = catboost.CatBoostClassifier(**cb_parameters, - random_seed=random_seed, - silent=True, - allow_writing_files=False) + random_seed=random_seed) super().__init__(parameters=parameters, component_obj=cb_classifier, random_state=random_state) diff --git a/evalml/pipelines/components/estimators/classifiers/elasticnet_classifier.py b/evalml/pipelines/components/estimators/classifiers/elasticnet_classifier.py index eaf16dc7ba..6bf77b4bd3 100644 --- a/evalml/pipelines/components/estimators/classifiers/elasticnet_classifier.py +++ b/evalml/pipelines/components/estimators/classifiers/elasticnet_classifier.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from sklearn.linear_model import SGDClassifier as SKElasticNetClassifier from skopt.space import Real @@ -17,16 +19,19 @@ class ElasticNetClassifier(Estimator): model_family = ModelFamily.LINEAR_MODEL supported_problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS] - def __init__(self, alpha=0.5, l1_ratio=0.5, n_jobs=-1, max_iter=1000, random_state=0, **kwargs): + def __init__(self, alpha=0.5, l1_ratio=0.5, n_jobs=-1, max_iter=1000, random_state=0, penalty='elasticnet', + **kwargs): parameters = {'alpha': alpha, 'l1_ratio': l1_ratio, 'n_jobs': n_jobs, - 'max_iter': max_iter} + 'max_iter': max_iter, + 'penalty': penalty} + if kwargs.get('loss', 'log') != 'log': + warnings.warn("Parameter loss is being set to 'log' so that ElasticNetClassifier can predict probabilities" + f". Originally received '{kwargs['loss']}'.") + kwargs["loss"] = "log" parameters.update(kwargs) - - en_classifier = SKElasticNetClassifier(loss="log", - penalty="elasticnet", - random_state=random_state, + en_classifier = SKElasticNetClassifier(random_state=random_state, **parameters) super().__init__(parameters=parameters, component_obj=en_classifier, diff --git a/evalml/pipelines/components/estimators/classifiers/logistic_regression.py b/evalml/pipelines/components/estimators/classifiers/logistic_regression.py index 9e672340f5..4b7e4ff3ad 100644 --- a/evalml/pipelines/components/estimators/classifiers/logistic_regression.py +++ b/evalml/pipelines/components/estimators/classifiers/logistic_regression.py @@ -26,8 +26,6 @@ def __init__(self, penalty="l2", C=1.0, n_jobs=-1, random_state=0, **kwargs): parameters.update(kwargs) lr_classifier = LogisticRegression(random_state=random_state, - multi_class="auto", - solver="lbfgs", **parameters) super().__init__(parameters=parameters, component_obj=lr_classifier, diff --git a/evalml/pipelines/components/estimators/regressors/catboost_regressor.py b/evalml/pipelines/components/estimators/regressors/catboost_regressor.py index 18f28780a8..0bb3d352cb 100644 --- a/evalml/pipelines/components/estimators/regressors/catboost_regressor.py +++ b/evalml/pipelines/components/estimators/regressors/catboost_regressor.py @@ -1,4 +1,5 @@ import copy +import warnings import pandas as pd from skopt.space import Integer, Real @@ -29,12 +30,17 @@ class CatBoostRegressor(Estimator): SEED_MIN = 0 SEED_MAX = SEED_BOUNDS.max_bound - def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, random_state=0, **kwargs): + def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, silent=False, + random_state=0, **kwargs): random_seed = get_random_seed(random_state, self.SEED_MIN, self.SEED_MAX) parameters = {"n_estimators": n_estimators, "eta": eta, "max_depth": max_depth, - 'bootstrap_type': bootstrap_type} + 'bootstrap_type': bootstrap_type, + 'silent': silent} + if kwargs.get('allow_writing_files', False): + warnings.warn("Parameter allow_writing_files is being set to False in CatBoostRegressor") + kwargs["allow_writing_files"] = False parameters.update(kwargs) cb_error_msg = "catboost is not installed. Please install using `pip install catboost.`" @@ -44,9 +50,7 @@ def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, if bootstrap_type is None: cb_parameters.pop('bootstrap_type') cb_regressor = catboost.CatBoostRegressor(**cb_parameters, - random_seed=random_seed, - silent=True, - allow_writing_files=False) + random_seed=random_seed) super().__init__(parameters=parameters, component_obj=cb_regressor, random_state=random_state) diff --git a/evalml/tests/component_tests/test_catboost_classifier.py b/evalml/tests/component_tests/test_catboost_classifier.py index 5d7a2f96e6..a53143ce38 100644 --- a/evalml/tests/component_tests/test_catboost_classifier.py +++ b/evalml/tests/component_tests/test_catboost_classifier.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import pytest from pytest import importorskip from evalml.pipelines.components import CatBoostClassifier @@ -41,3 +42,14 @@ def randint(self, min_bound, max_bound): rng = make_mock_random_state(CatBoostClassifier.SEED_MAX) clf = CatBoostClassifier(n_estimators=1, max_depth=1, random_state=rng) clf.fit(X, y) + + +def test_overwrite_allow_writing_files_parameter_in_kwargs(): + + with pytest.warns(expected_warning=UserWarning) as warnings: + cb = CatBoostClassifier(allow_writing_files=True) + + assert len(warnings) == 1 + # check that the message matches + assert warnings[0].message.args[0] == "Parameter allow_writing_files is being set to False in CatBoostClassifier" + assert not cb.parameters['allow_writing_files'] diff --git a/evalml/tests/component_tests/test_catboost_regressor.py b/evalml/tests/component_tests/test_catboost_regressor.py index 80d66f5e5f..2b6df2bc4a 100644 --- a/evalml/tests/component_tests/test_catboost_regressor.py +++ b/evalml/tests/component_tests/test_catboost_regressor.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import pytest from pytest import importorskip from evalml.pipelines.components import CatBoostRegressor @@ -41,3 +42,14 @@ def randint(self, min_bound, max_bound): rng = make_mock_random_state(CatBoostRegressor.SEED_MAX) clf = CatBoostRegressor(n_estimators=1, max_depth=1, random_state=rng) clf.fit(X, y) + + +def test_overwrite_allow_writing_files_parameter_in_kwargs(): + + with pytest.warns(expected_warning=UserWarning) as warnings: + cb = CatBoostRegressor(allow_writing_files=True) + + assert len(warnings) == 1 + # check that the message matches + assert warnings[0].message.args[0] == "Parameter allow_writing_files is being set to False in CatBoostRegressor" + assert not cb.parameters['allow_writing_files'] diff --git a/evalml/tests/component_tests/test_components.py b/evalml/tests/component_tests/test_components.py index a63a96fe27..4e68930125 100644 --- a/evalml/tests/component_tests/test_components.py +++ b/evalml/tests/component_tests/test_components.py @@ -139,7 +139,7 @@ def test_describe_component(): rf_regressor = RandomForestRegressor(n_estimators=10, max_depth=3) linear_regressor = LinearRegressor() assert lr_classifier.describe(return_dict=True) == {'name': 'Logistic Regression Classifier', 'parameters': {'penalty': 'l2', 'C': 1.0, 'n_jobs': -1}} - assert en_classifier.describe(return_dict=True) == {'name': 'Elastic Net Classifier', 'parameters': {'alpha': 0.5, 'l1_ratio': 0.5, 'n_jobs': -1, 'max_iter': 1000}} + assert en_classifier.describe(return_dict=True) == {'name': 'Elastic Net Classifier', 'parameters': {'alpha': 0.5, 'l1_ratio': 0.5, 'n_jobs': -1, 'max_iter': 1000, "loss": 'log', 'penalty': 'elasticnet'}} assert en_regressor.describe(return_dict=True) == {'name': 'Elastic Net Regressor', 'parameters': {'alpha': 0.5, 'l1_ratio': 0.5, 'max_iter': 1000, 'normalize': False}} assert et_classifier.describe(return_dict=True) == {'name': 'Extra Trees Classifier', 'parameters': {'n_estimators': 10, 'max_features': 'auto', 'max_depth': 6, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_jobs': -1}} assert et_regressor.describe(return_dict=True) == {'name': 'Extra Trees Regressor', 'parameters': {'n_estimators': 10, 'max_features': 'auto', 'max_depth': 6, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_jobs': -1}} @@ -769,3 +769,15 @@ def test_serialization_protocol(mock_cloudpickle_dump, tmpdir): component.save(path, pickle_protocol=42) assert len(mock_cloudpickle_dump.call_args_list) == 1 assert mock_cloudpickle_dump.call_args_list[0][1]['protocol'] == 42 + + +@pytest.mark.parametrize("estimator_class", _all_estimators()) +def test_estimators_accept_all_kwargs(estimator_class): + estimator = estimator_class() + if estimator._component_obj is None: + pytest.skip(f"Skipping {estimator_class} because does not have component object.") + params = estimator._component_obj.get_params() + if estimator_class.model_family == ModelFamily.CATBOOST: + # Deleting because we call it random_state in our api + del params["random_seed"] + estimator_class(**params) diff --git a/evalml/tests/component_tests/test_en_classifier.py b/evalml/tests/component_tests/test_en_classifier.py index 3da6d8dd0a..50cb91f7c2 100644 --- a/evalml/tests/component_tests/test_en_classifier.py +++ b/evalml/tests/component_tests/test_en_classifier.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from sklearn.linear_model import SGDClassifier as SKElasticNetClassifier from evalml.model_family import ModelFamily @@ -18,7 +19,9 @@ def test_en_parameters(): "alpha": 0.75, "l1_ratio": 0.5, 'max_iter': 1000, - 'n_jobs': -1 + 'n_jobs': -1, + 'penalty': 'elasticnet', + 'loss': 'log' } assert clf.parameters == expected_parameters @@ -107,3 +110,16 @@ def test_feature_importance_multi(X_y_multi): sk_features = np.linalg.norm(sk_clf.coef_, axis=0, ord=2) np.testing.assert_almost_equal(sk_features, clf.feature_importance, decimal=5) + + +def test_overwrite_loss_parameter_in_kwargs(): + + with pytest.warns(expected_warning=UserWarning) as warnings: + en = ElasticNetClassifier(loss="hinge") + + assert len(warnings) == 1 + # check that the message matches + assert warnings[0].message.args[0] == ("Parameter loss is being set to 'log' so that ElasticNetClassifier can predict probabilities" + ". Originally received 'hinge'.") + + assert en.parameters['loss'] == 'log'