Skip to content

Commit

Permalink
Fix determinism for few preprocessing techniques (#1172)
Browse files Browse the repository at this point in the history
* Fix determinism for few preprocessing techniques

* Code formatting

* Fix: model deserialization when mutual_info score is used for select_percentile/rates

Co-authored-by: Rohit Agarwal <rohit.agarwal4@aexp.com>
  • Loading branch information
2 people authored and eddiebergman committed Aug 18, 2021
1 parent 34534c7 commit 90658e5
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ def __init__(self, n_quantiles: int, output_distribution: str,
self.preprocessor = QuantileTransformer(
n_quantiles=n_quantiles,
output_distribution=output_distribution,
copy=False
copy=False,
random_state=random_state
)

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import \
UniformFloatHyperparameter, CategoricalHyperparameter, Constant
from functools import partial

from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
from autosklearn.pipeline.components.feature_preprocessing.select_percentile import \
Expand All @@ -27,7 +28,8 @@ def __init__(self, percentile, score_func="chi2", random_state=None):
elif score_func == "f_classif":
self.score_func = sklearn.feature_selection.f_classif
elif score_func == "mutual_info":
self.score_func = sklearn.feature_selection.mutual_info_classif
self.score_func = partial(sklearn.feature_selection.mutual_info_classif,
random_state=self.random_state)
else:
raise ValueError("score_func must be in ('chi2, 'f_classif', 'mutual_info'), "
"but is: %s" % score_func)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, CategoricalHyperparameter
from functools import partial

from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
from autosklearn.pipeline.components.feature_preprocessing.select_percentile import \
Expand All @@ -24,7 +25,8 @@ def __init__(self, percentile, score_func="f_regression", random_state=None):
if score_func == "f_regression":
self.score_func = sklearn.feature_selection.f_regression
elif score_func == "mutual_info":
self.score_func = sklearn.feature_selection.mutual_info_regression
self.score_func = partial(sklearn.feature_selection.mutual_info_regression,
random_state=self.random_state)
else:
raise ValueError("Don't know this scoring function: %s" % score_func)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
CategoricalHyperparameter
from ConfigSpace import NotEqualsCondition
from functools import partial

from autosklearn.pipeline.components.base import \
AutoSklearnPreprocessingAlgorithm
Expand All @@ -22,7 +23,8 @@ def __init__(self, alpha, mode='fpr',
elif score_func == "f_classif":
self.score_func = sklearn.feature_selection.f_classif
elif score_func == "mutual_info_classif":
self.score_func = sklearn.feature_selection.mutual_info_classif
self.score_func = partial(sklearn.feature_selection.mutual_info_classif,
random_state=self.random_state)
# mutual info classif constantly crashes without mode percentile
self.mode = 'percentile'
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
CategoricalHyperparameter
from ConfigSpace import NotEqualsCondition
from functools import partial

from autosklearn.pipeline.components.base import \
AutoSklearnPreprocessingAlgorithm
Expand All @@ -20,7 +21,8 @@ def __init__(self, alpha, mode='percentile',
if score_func == "f_regression":
self.score_func = sklearn.feature_selection.f_regression
elif score_func == "mutual_info_regression":
self.score_func = sklearn.feature_selection.mutual_info_regression
self.score_func = partial(sklearn.feature_selection.mutual_info_regression,
random_state=self.random_state)
# Mutual info consistently crashes if percentile is not the mode
self.mode = 'percentile'
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def fit(self, X, Y):
self.target_dim = int(self.target_dim)
target_dim = min(self.target_dim, X.shape[1] - 1)
self.preprocessor = sklearn.decomposition.TruncatedSVD(
target_dim, algorithm='randomized')
target_dim, algorithm='randomized', random_state=self.random_state)
# TODO: remove when migrating to sklearn 0.16
# Circumvents a bug in sklearn
# https://github.com/scikit-learn/scikit-learn/commit/f08b8c8e52663167819f242f605db39f3b5a6d0c
Expand Down

0 comments on commit 90658e5

Please sign in to comment.