Skip to content

Commit

Permalink
Users can now pass in all valid kwargs to LogisticRegressionClassifie…
Browse files Browse the repository at this point in the history
…r, ENClassifier, and Catboost estimators.
  • Loading branch information
freddyaboulton committed Sep 11, 2020
1 parent 6d4e98e commit b7b86b4
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 20 deletions.
@@ -1,4 +1,5 @@
import copy
import warnings

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -31,12 +32,17 @@ class CatBoostClassifier(Estimator):
SEED_MIN = 0
SEED_MAX = SEED_BOUNDS.max_bound

def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, random_state=0, **kwargs):
def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, silent=True,
random_state=0, **kwargs):
random_seed = get_random_seed(random_state, self.SEED_MIN, self.SEED_MAX)
parameters = {"n_estimators": n_estimators,
"eta": eta,
"max_depth": max_depth,
'bootstrap_type': bootstrap_type}
'bootstrap_type': bootstrap_type,
'silent': silent}
if kwargs.get('allow_writing_files', False):
warnings.warn("Parameter allow_writing_files is being set to False in CatBoostClassifier")
kwargs["allow_writing_files"] = False
parameters.update(kwargs)

cb_error_msg = "catboost is not installed. Please install using `pip install catboost.`"
Expand All @@ -47,9 +53,7 @@ def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None,
if bootstrap_type is None:
cb_parameters.pop('bootstrap_type')
cb_classifier = catboost.CatBoostClassifier(**cb_parameters,
random_seed=random_seed,
silent=True,
allow_writing_files=False)
random_seed=random_seed)
super().__init__(parameters=parameters,
component_obj=cb_classifier,
random_state=random_state)
Expand Down
@@ -1,3 +1,5 @@
import warnings

import numpy as np
from sklearn.linear_model import SGDClassifier as SKElasticNetClassifier
from skopt.space import Real
Expand All @@ -17,16 +19,19 @@ class ElasticNetClassifier(Estimator):
model_family = ModelFamily.LINEAR_MODEL
supported_problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]

def __init__(self, alpha=0.5, l1_ratio=0.5, n_jobs=-1, max_iter=1000, random_state=0, **kwargs):
def __init__(self, alpha=0.5, l1_ratio=0.5, n_jobs=-1, max_iter=1000, random_state=0, penalty='elasticnet',
**kwargs):
parameters = {'alpha': alpha,
'l1_ratio': l1_ratio,
'n_jobs': n_jobs,
'max_iter': max_iter}
'max_iter': max_iter,
'penalty': penalty}
if kwargs.get('loss', 'log') != 'log':
warnings.warn("Parameter loss is being set to 'log' so that ElasticNetClassifier can predict probabilities"
f". Originally received '{kwargs['loss']}'.")
kwargs["loss"] = "log"
parameters.update(kwargs)

en_classifier = SKElasticNetClassifier(loss="log",
penalty="elasticnet",
random_state=random_state,
en_classifier = SKElasticNetClassifier(random_state=random_state,
**parameters)
super().__init__(parameters=parameters,
component_obj=en_classifier,
Expand Down
Expand Up @@ -26,8 +26,6 @@ def __init__(self, penalty="l2", C=1.0, n_jobs=-1, random_state=0, **kwargs):
parameters.update(kwargs)

lr_classifier = LogisticRegression(random_state=random_state,
multi_class="auto",
solver="lbfgs",
**parameters)
super().__init__(parameters=parameters,
component_obj=lr_classifier,
Expand Down
@@ -1,4 +1,5 @@
import copy
import warnings

import pandas as pd
from skopt.space import Integer, Real
Expand Down Expand Up @@ -29,12 +30,17 @@ class CatBoostRegressor(Estimator):
SEED_MIN = 0
SEED_MAX = SEED_BOUNDS.max_bound

def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, random_state=0, **kwargs):
def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None, silent=False,
random_state=0, **kwargs):
random_seed = get_random_seed(random_state, self.SEED_MIN, self.SEED_MAX)
parameters = {"n_estimators": n_estimators,
"eta": eta,
"max_depth": max_depth,
'bootstrap_type': bootstrap_type}
'bootstrap_type': bootstrap_type,
'silent': silent}
if kwargs.get('allow_writing_files', False):
warnings.warn("Parameter allow_writing_files is being set to False in CatBoostRegressor")
kwargs["allow_writing_files"] = False
parameters.update(kwargs)

cb_error_msg = "catboost is not installed. Please install using `pip install catboost.`"
Expand All @@ -44,9 +50,7 @@ def __init__(self, n_estimators=10, eta=0.03, max_depth=6, bootstrap_type=None,
if bootstrap_type is None:
cb_parameters.pop('bootstrap_type')
cb_regressor = catboost.CatBoostRegressor(**cb_parameters,
random_seed=random_seed,
silent=True,
allow_writing_files=False)
random_seed=random_seed)
super().__init__(parameters=parameters,
component_obj=cb_regressor,
random_state=random_state)
Expand Down
12 changes: 12 additions & 0 deletions evalml/tests/component_tests/test_catboost_classifier.py
@@ -1,5 +1,6 @@
import numpy as np
import pandas as pd
import pytest
from pytest import importorskip

from evalml.pipelines.components import CatBoostClassifier
Expand Down Expand Up @@ -41,3 +42,14 @@ def randint(self, min_bound, max_bound):
rng = make_mock_random_state(CatBoostClassifier.SEED_MAX)
clf = CatBoostClassifier(n_estimators=1, max_depth=1, random_state=rng)
clf.fit(X, y)


def test_overwrite_allow_writing_files_parameter_in_kwargs():

with pytest.warns(expected_warning=UserWarning) as warnings:
cb = CatBoostClassifier(allow_writing_files=True)

assert len(warnings) == 1
# check that the message matches
assert warnings[0].message.args[0] == "Parameter allow_writing_files is being set to False in CatBoostClassifier"
assert not cb.parameters['allow_writing_files']
12 changes: 12 additions & 0 deletions evalml/tests/component_tests/test_catboost_regressor.py
@@ -1,5 +1,6 @@
import numpy as np
import pandas as pd
import pytest
from pytest import importorskip

from evalml.pipelines.components import CatBoostRegressor
Expand Down Expand Up @@ -41,3 +42,14 @@ def randint(self, min_bound, max_bound):
rng = make_mock_random_state(CatBoostRegressor.SEED_MAX)
clf = CatBoostRegressor(n_estimators=1, max_depth=1, random_state=rng)
clf.fit(X, y)


def test_overwrite_allow_writing_files_parameter_in_kwargs():

with pytest.warns(expected_warning=UserWarning) as warnings:
cb = CatBoostRegressor(allow_writing_files=True)

assert len(warnings) == 1
# check that the message matches
assert warnings[0].message.args[0] == "Parameter allow_writing_files is being set to False in CatBoostRegressor"
assert not cb.parameters['allow_writing_files']
14 changes: 13 additions & 1 deletion evalml/tests/component_tests/test_components.py
Expand Up @@ -139,7 +139,7 @@ def test_describe_component():
rf_regressor = RandomForestRegressor(n_estimators=10, max_depth=3)
linear_regressor = LinearRegressor()
assert lr_classifier.describe(return_dict=True) == {'name': 'Logistic Regression Classifier', 'parameters': {'penalty': 'l2', 'C': 1.0, 'n_jobs': -1}}
assert en_classifier.describe(return_dict=True) == {'name': 'Elastic Net Classifier', 'parameters': {'alpha': 0.5, 'l1_ratio': 0.5, 'n_jobs': -1, 'max_iter': 1000}}
assert en_classifier.describe(return_dict=True) == {'name': 'Elastic Net Classifier', 'parameters': {'alpha': 0.5, 'l1_ratio': 0.5, 'n_jobs': -1, 'max_iter': 1000, "loss": 'log', 'penalty': 'elasticnet'}}
assert en_regressor.describe(return_dict=True) == {'name': 'Elastic Net Regressor', 'parameters': {'alpha': 0.5, 'l1_ratio': 0.5, 'max_iter': 1000, 'normalize': False}}
assert et_classifier.describe(return_dict=True) == {'name': 'Extra Trees Classifier', 'parameters': {'n_estimators': 10, 'max_features': 'auto', 'max_depth': 6, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_jobs': -1}}
assert et_regressor.describe(return_dict=True) == {'name': 'Extra Trees Regressor', 'parameters': {'n_estimators': 10, 'max_features': 'auto', 'max_depth': 6, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_jobs': -1}}
Expand Down Expand Up @@ -769,3 +769,15 @@ def test_serialization_protocol(mock_cloudpickle_dump, tmpdir):
component.save(path, pickle_protocol=42)
assert len(mock_cloudpickle_dump.call_args_list) == 1
assert mock_cloudpickle_dump.call_args_list[0][1]['protocol'] == 42


@pytest.mark.parametrize("estimator_class", _all_estimators())
def test_estimators_accept_all_kwargs(estimator_class):
estimator = estimator_class()
if estimator._component_obj is None:
pytest.skip(f"Skipping {estimator_class} because does not have component object.")
params = estimator._component_obj.get_params()
if estimator_class.model_family == ModelFamily.CATBOOST:
# Deleting because we call it random_state in our api
del params["random_seed"]
estimator_class(**params)
18 changes: 17 additions & 1 deletion evalml/tests/component_tests/test_en_classifier.py
@@ -1,4 +1,5 @@
import numpy as np
import pytest
from sklearn.linear_model import SGDClassifier as SKElasticNetClassifier

from evalml.model_family import ModelFamily
Expand All @@ -18,7 +19,9 @@ def test_en_parameters():
"alpha": 0.75,
"l1_ratio": 0.5,
'max_iter': 1000,
'n_jobs': -1
'n_jobs': -1,
'penalty': 'elasticnet',
'loss': 'log'
}
assert clf.parameters == expected_parameters

Expand Down Expand Up @@ -107,3 +110,16 @@ def test_feature_importance_multi(X_y_multi):
sk_features = np.linalg.norm(sk_clf.coef_, axis=0, ord=2)

np.testing.assert_almost_equal(sk_features, clf.feature_importance, decimal=5)


def test_overwrite_loss_parameter_in_kwargs():

with pytest.warns(expected_warning=UserWarning) as warnings:
en = ElasticNetClassifier(loss="hinge")

assert len(warnings) == 1
# check that the message matches
assert warnings[0].message.args[0] == ("Parameter loss is being set to 'log' so that ElasticNetClassifier can predict probabilities"
". Originally received 'hinge'.")

assert en.parameters['loss'] == 'log'

0 comments on commit b7b86b4

Please sign in to comment.