Skip to content

Commit

Permalink
Change default parameters for feature selectors (#3110)
Browse files Browse the repository at this point in the history
* use mean threshold only

* docs

* lint

* Fix tests

* Make default median and add to hyperparameter ranges
  • Loading branch information
jeremyliweishih committed Dec 2, 2021
1 parent 840fc3b commit 36a50d2
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 8 deletions.
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Release Notes
* Enhancements
* Renamed ``DelayedFeatureTransformer`` to ``TimeSeriesFeaturizer`` and enhanced it to compute rolling features :pr:`3028`
* Fixes
* Default parameters for ``RFRegressorSelectFromModel`` and ``RFClassifierSelectFromModel`` has been fixed to avoid selecting all features :pr:`3110`
* Changes
* Documentation Changes
* Testing Changes
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Component that selects top features based on importance weights using a Random Forest classifier."""
import numpy as np
from sklearn.ensemble import RandomForestClassifier as SKRandomForestClassifier
from sklearn.feature_selection import SelectFromModel as SkSelect
from skopt.space import Real
Expand Down Expand Up @@ -29,11 +28,11 @@ class RFClassifierSelectFromModel(FeatureSelector):
name = "RF Classifier Select From Model"
hyperparameter_ranges = {
"percent_features": Real(0.01, 1),
"threshold": ["mean", -np.inf],
"threshold": ["mean", "median"],
}
"""{
"percent_features": Real(0.01, 1),
"threshold": ["mean", -np.inf],
"threshold": ["mean", "median"],
}"""

def __init__(
Expand All @@ -42,7 +41,7 @@ def __init__(
n_estimators=10,
max_depth=None,
percent_features=0.5,
threshold=-np.inf,
threshold="median",
n_jobs=-1,
random_seed=0,
**kwargs,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Component that selects top features based on importance weights using a Random Forest regresor."""
import numpy as np
from sklearn.ensemble import RandomForestRegressor as SKRandomForestRegressor
from sklearn.feature_selection import SelectFromModel as SkSelect
from skopt.space import Real
Expand Down Expand Up @@ -29,11 +28,11 @@ class RFRegressorSelectFromModel(FeatureSelector):
name = "RF Regressor Select From Model"
hyperparameter_ranges = {
"percent_features": Real(0.01, 1),
"threshold": ["mean", -np.inf],
"threshold": ["mean", "median"],
}
"""{
"percent_features": Real(0.01, 1),
"threshold": ["mean", -np.inf],
"threshold": ["mean", "median"],
}"""

def __init__(
Expand All @@ -42,7 +41,7 @@ def __init__(
n_estimators=10,
max_depth=None,
percent_features=0.5,
threshold=-np.inf,
threshold="median",
n_jobs=-1,
random_seed=0,
**kwargs,
Expand Down
8 changes: 8 additions & 0 deletions evalml/tests/component_tests/test_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,10 @@ def test_transformer_transform_output_type(X_y_binary):
component, SelectByType
):
assert transform_output.shape == (X.shape[0], 0)
elif isinstance(component, RFRegressorSelectFromModel):
assert transform_output.shape == (X.shape[0], 10)
elif isinstance(component, RFClassifierSelectFromModel):
assert transform_output.shape == (X.shape[0], 10)
elif isinstance(component, PCA) or isinstance(
component, LinearDiscriminantAnalysis
):
Expand Down Expand Up @@ -915,6 +919,10 @@ def test_transformer_transform_output_type(X_y_binary):
component, SelectByType
):
assert transform_output.shape == (X.shape[0], 0)
elif isinstance(component, RFRegressorSelectFromModel):
assert transform_output.shape == (X.shape[0], 10)
elif isinstance(component, RFClassifierSelectFromModel):
assert transform_output.shape == (X.shape[0], 10)
elif isinstance(component, PCA) or isinstance(
component, LinearDiscriminantAnalysis
):
Expand Down

0 comments on commit 36a50d2

Please sign in to comment.