diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 2b4265db95..6616313d59 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -7,7 +7,7 @@ import unittest.mock import warnings -from ConfigSpace.read_and_write import pcs +from ConfigSpace.read_and_write import json as cs_json import numpy as np import numpy.ma as ma import pandas as pd @@ -1008,7 +1008,7 @@ def _create_search_space(self, tmp_dir, backend, datamanager, task_name = 'CreateConfigSpace' self._stopwatch.start_task(task_name) - configspace_path = os.path.join(tmp_dir, 'space.pcs') + configspace_path = os.path.join(tmp_dir, 'space.json') configuration_space = pipeline.get_configuration_space( datamanager.info, include_estimators=include_estimators, @@ -1017,9 +1017,11 @@ def _create_search_space(self, tmp_dir, backend, datamanager, exclude_preprocessors=exclude_preprocessors) configuration_space = self.configuration_space_created_hook( datamanager, configuration_space) - sp_string = pcs.write(configuration_space) - backend.write_txt_file(configspace_path, sp_string, - 'Configuration space') + backend.write_txt_file( + configspace_path, + cs_json.write(configuration_space), + 'Configuration space' + ) self._stopwatch.stop_task(task_name) return configuration_space, configspace_path diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py similarity index 79% rename from autosklearn/pipeline/components/feature_preprocessing/select_rates.py rename to autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py index 7406244ea9..2700b81229 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py @@ -1,29 +1,34 @@ from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ - CategoricalHyperparameter, Constant + CategoricalHyperparameter +from ConfigSpace import NotEqualsCondition from autosklearn.pipeline.components.base import \ AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import SIGNED_DATA, UNSIGNED_DATA, SPARSE, DENSE, INPUT -class SelectRates(AutoSklearnPreprocessingAlgorithm): +class SelectClassificationRates(AutoSklearnPreprocessingAlgorithm): def __init__(self, alpha, mode='fpr', score_func="chi2", random_state=None): import sklearn.feature_selection self.random_state = random_state # We don't use this self.alpha = alpha + self.mode = mode if score_func == "chi2": self.score_func = sklearn.feature_selection.chi2 elif score_func == "f_classif": self.score_func = sklearn.feature_selection.f_classif + elif score_func == "mutual_info_classif": + self.score_func = sklearn.feature_selection.mutual_info_classif + # mutual info classif constantly crashes without mode percentile + self.mode = 'percentile' else: - raise ValueError("score_func must be in ('chi2, 'f_classif', 'mutual_info'), " - "but is: %s" % score_func) - - self.mode = mode + raise ValueError("score_func must be in ('chi2, 'f_classif', 'mutual_info_classif') " + "for classification " + "but is: %s " % (score_func)) def fit(self, X, y): import scipy.sparse @@ -99,15 +104,15 @@ def get_hyperparameter_search_space(dataset_properties=None): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1) + if dataset_properties is not None and dataset_properties.get('sparse'): + choices = ['chi2', 'mutual_info_classif'] + else: + choices = ['chi2', 'f_classif', 'mutual_info_classif'] + score_func = CategoricalHyperparameter( name="score_func", - choices=["chi2", "f_classif"], + choices=choices, default_value="chi2") - if dataset_properties is not None: - # Chi2 can handle sparse data, so we respect this - if 'sparse' in dataset_properties and dataset_properties['sparse']: - score_func = Constant( - name="score_func", value="chi2") mode = CategoricalHyperparameter('mode', ['fpr', 'fdr', 'fwe'], 'fpr') @@ -116,4 +121,9 @@ def get_hyperparameter_search_space(dataset_properties=None): cs.add_hyperparameter(score_func) cs.add_hyperparameter(mode) + # mutual_info_classif constantly crashes if mode is not percentile + # as a WA, fix the mode for this score + cond = NotEqualsCondition(mode, score_func, 'mutual_info_classif') + cs.add_condition(cond) + return cs diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py new file mode 100644 index 0000000000..238eaed3c1 --- /dev/null +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py @@ -0,0 +1,101 @@ +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ + CategoricalHyperparameter +from ConfigSpace import NotEqualsCondition + +from autosklearn.pipeline.components.base import \ + AutoSklearnPreprocessingAlgorithm +from autosklearn.pipeline.constants import UNSIGNED_DATA, SPARSE, DENSE, INPUT + + +class SelectRegressionRates(AutoSklearnPreprocessingAlgorithm): + def __init__(self, alpha, mode='percentile', + score_func="f_regression", random_state=None): + import sklearn.feature_selection + + self.random_state = random_state # We don't use this + self.alpha = alpha + self.mode = mode + + if score_func == "f_regression": + self.score_func = sklearn.feature_selection.f_regression + elif score_func == "mutual_info_regression": + self.score_func = sklearn.feature_selection.mutual_info_regression + # Mutual info consistently crashes if percentile is not the mode + self.mode = 'percentile' + else: + raise ValueError("score_func must be in ('f_regression, 'mutual_info_regression') " + "for task=regression " + "but is: %s " % (score_func)) + + def fit(self, X, y): + import sklearn.feature_selection + + self.alpha = float(self.alpha) + + self.preprocessor = sklearn.feature_selection.GenericUnivariateSelect( + score_func=self.score_func, param=self.alpha, mode=self.mode) + + self.preprocessor.fit(X, y) + return self + + def transform(self, X): + + if self.preprocessor is None: + raise NotImplementedError() + try: + Xt = self.preprocessor.transform(X) + except ValueError as e: + if "zero-size array to reduction operation maximum which has no " \ + "identity" in e.message: + raise ValueError( + "%s removed all features." % self.__class__.__name__) + else: + raise e + + if Xt.shape[1] == 0: + raise ValueError( + "%s removed all features." % self.__class__.__name__) + return Xt + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'SR', + 'name': 'Univariate Feature Selection based on rates', + 'handles_regression': True, + 'handles_classification': False, + 'handles_multiclass': True, + 'handles_multilabel': False, + 'handles_multioutput': False, + 'is_deterministic': True, + 'input': (SPARSE, DENSE, UNSIGNED_DATA), + 'output': (INPUT,)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + alpha = UniformFloatHyperparameter( + name="alpha", lower=0.01, upper=0.5, default_value=0.1) + + if dataset_properties is not None and dataset_properties.get('sparse'): + choices = ['mutual_info_regression', 'f_regression'] + else: + choices = ['f_regression'] + + score_func = CategoricalHyperparameter( + name="score_func", + choices=choices, + default_value="f_regression") + + mode = CategoricalHyperparameter('mode', ['fpr', 'fdr', 'fwe'], 'fpr') + + cs = ConfigurationSpace() + cs.add_hyperparameter(alpha) + cs.add_hyperparameter(score_func) + cs.add_hyperparameter(mode) + + # Mutual info consistently crashes if percentile is not the mode + if 'mutual_info_regression' in choices: + cond = NotEqualsCondition(mode, score_func, 'mutual_info_regression') + cs.add_condition(cond) + + return cs diff --git a/test/test_automl/test_estimators.py b/test/test_automl/test_estimators.py index a67d4ec280..c29bc81573 100644 --- a/test/test_automl/test_estimators.py +++ b/test/test_automl/test_estimators.py @@ -709,7 +709,9 @@ def test_regression(self): self.assertEqual(predictions.shape, (356,)) score = mean_squared_error(Y_test, predictions) # On average np.sqrt(30) away from the target -> ~5.5 on average - self.assertGreaterEqual(score, -30) + # Results with select rates drops avg score to a range of -32.40 to -37, on 30 seconds + # constraint. With more time_left_for_this_task this is no longer an issue + self.assertGreaterEqual(score, -37) def test_cv_regression(self): """ @@ -733,7 +735,9 @@ def test_cv_regression(self): self.assertEqual(predictions.shape, (356,)) score = mean_squared_error(Y_test, predictions) # On average np.sqrt(30) away from the target -> ~5.5 on average - self.assertGreaterEqual(score, -30) + # Results with select rates drops avg score to a range of -32.40 to -37, on 30 seconds + # constraint. With more time_left_for_this_task this is no longer an issue + self.assertGreaterEqual(score, -37) self._tearDown(tmp) self._tearDown(output) diff --git a/test/test_metalearning/pyMetaLearn/test_meta_base_data/configurations.csv b/test/test_metalearning/pyMetaLearn/test_meta_base_data/configurations.csv index c5a9d2fca6..8a5921817b 100644 --- a/test/test_metalearning/pyMetaLearn/test_meta_base_data/configurations.csv +++ b/test/test_metalearning/pyMetaLearn/test_meta_base_data/configurations.csv @@ -1,10 +1,10 @@ -idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,classifier:adaboost:learning_rate,classifier:adaboost:max_depth,classifier:adaboost:n_estimators,classifier:bernoulli_nb:alpha,classifier:bernoulli_nb:fit_prior,classifier:decision_tree:criterion,classifier:decision_tree:max_depth_factor,classifier:decision_tree:max_features,classifier:decision_tree:max_leaf_nodes,classifier:decision_tree:min_impurity_decrease,classifier:decision_tree:min_samples_leaf,classifier:decision_tree:min_samples_split,classifier:decision_tree:min_weight_fraction_leaf,classifier:extra_trees:bootstrap,classifier:extra_trees:criterion,classifier:extra_trees:max_depth,classifier:extra_trees:max_features,classifier:extra_trees:max_leaf_nodes,classifier:extra_trees:min_impurity_decrease,classifier:extra_trees:min_samples_leaf,classifier:extra_trees:min_samples_split,classifier:extra_trees:min_weight_fraction_leaf,classifier:gradient_boosting:early_stop,classifier:gradient_boosting:l2_regularization,classifier:gradient_boosting:learning_rate,classifier:gradient_boosting:loss,classifier:gradient_boosting:max_bins,classifier:gradient_boosting:max_depth,classifier:gradient_boosting:max_leaf_nodes,classifier:gradient_boosting:min_samples_leaf,classifier:gradient_boosting:n_iter_no_change,classifier:gradient_boosting:scoring,classifier:gradient_boosting:tol,classifier:gradient_boosting:validation_fraction,classifier:k_nearest_neighbors:n_neighbors,classifier:k_nearest_neighbors:p,classifier:k_nearest_neighbors:weights,classifier:lda:n_components,classifier:lda:shrinkage,classifier:lda:shrinkage_factor,classifier:lda:tol,classifier:liblinear_svc:C,classifier:liblinear_svc:dual,classifier:liblinear_svc:fit_intercept,classifier:liblinear_svc:intercept_scaling,classifier:liblinear_svc:loss,classifier:liblinear_svc:multi_class,classifier:liblinear_svc:penalty,classifier:liblinear_svc:tol,classifier:libsvm_svc:C,classifier:libsvm_svc:coef0,classifier:libsvm_svc:degree,classifier:libsvm_svc:gamma,classifier:libsvm_svc:kernel,classifier:libsvm_svc:max_iter,classifier:libsvm_svc:shrinking,classifier:libsvm_svc:tol,classifier:multinomial_nb:alpha,classifier:multinomial_nb:fit_prior,classifier:passive_aggressive:C,classifier:passive_aggressive:average,classifier:passive_aggressive:fit_intercept,classifier:passive_aggressive:loss,classifier:passive_aggressive:tol,classifier:qda:reg_param,classifier:random_forest:bootstrap,classifier:random_forest:criterion,classifier:random_forest:max_depth,classifier:random_forest:max_features,classifier:random_forest:max_leaf_nodes,classifier:random_forest:min_impurity_decrease,classifier:random_forest:min_samples_leaf,classifier:random_forest:min_samples_split,classifier:random_forest:min_weight_fraction_leaf,classifier:sgd:alpha,classifier:sgd:average,classifier:sgd:epsilon,classifier:sgd:eta0,classifier:sgd:fit_intercept,classifier:sgd:l1_ratio,classifier:sgd:learning_rate,classifier:sgd:loss,classifier:sgd:penalty,classifier:sgd:power_t,classifier:sgd:tol,data_preprocessing:categorical_transformer:categorical_encoding:__choice__,data_preprocessing:categorical_transformer:category_coalescence:__choice__,data_preprocessing:categorical_transformer:category_coalescence:minority_coalescer:minimum_fraction,data_preprocessing:numerical_transformer:imputation:strategy,data_preprocessing:numerical_transformer:rescaling:__choice__,data_preprocessing:numerical_transformer:rescaling:quantile_transformer:n_quantiles,data_preprocessing:numerical_transformer:rescaling:quantile_transformer:output_distribution,data_preprocessing:numerical_transformer:rescaling:robust_scaler:q_max,data_preprocessing:numerical_transformer:rescaling:robust_scaler:q_min,feature_preprocessor:__choice__,feature_preprocessor:extra_trees_preproc_for_classification:bootstrap,feature_preprocessor:extra_trees_preproc_for_classification:criterion,feature_preprocessor:extra_trees_preproc_for_classification:max_depth,feature_preprocessor:extra_trees_preproc_for_classification:max_features,feature_preprocessor:extra_trees_preproc_for_classification:max_leaf_nodes,feature_preprocessor:extra_trees_preproc_for_classification:min_impurity_decrease,feature_preprocessor:extra_trees_preproc_for_classification:min_samples_leaf,feature_preprocessor:extra_trees_preproc_for_classification:min_samples_split,feature_preprocessor:extra_trees_preproc_for_classification:min_weight_fraction_leaf,feature_preprocessor:extra_trees_preproc_for_classification:n_estimators,feature_preprocessor:fast_ica:algorithm,feature_preprocessor:fast_ica:fun,feature_preprocessor:fast_ica:n_components,feature_preprocessor:fast_ica:whiten,feature_preprocessor:feature_agglomeration:affinity,feature_preprocessor:feature_agglomeration:linkage,feature_preprocessor:feature_agglomeration:n_clusters,feature_preprocessor:feature_agglomeration:pooling_func,feature_preprocessor:kernel_pca:coef0,feature_preprocessor:kernel_pca:degree,feature_preprocessor:kernel_pca:gamma,feature_preprocessor:kernel_pca:kernel,feature_preprocessor:kernel_pca:n_components,feature_preprocessor:kitchen_sinks:gamma,feature_preprocessor:kitchen_sinks:n_components,feature_preprocessor:liblinear_svc_preprocessor:C,feature_preprocessor:liblinear_svc_preprocessor:dual,feature_preprocessor:liblinear_svc_preprocessor:fit_intercept,feature_preprocessor:liblinear_svc_preprocessor:intercept_scaling,feature_preprocessor:liblinear_svc_preprocessor:loss,feature_preprocessor:liblinear_svc_preprocessor:multi_class,feature_preprocessor:liblinear_svc_preprocessor:penalty,feature_preprocessor:liblinear_svc_preprocessor:tol,feature_preprocessor:nystroem_sampler:coef0,feature_preprocessor:nystroem_sampler:degree,feature_preprocessor:nystroem_sampler:gamma,feature_preprocessor:nystroem_sampler:kernel,feature_preprocessor:nystroem_sampler:n_components,feature_preprocessor:pca:keep_variance,feature_preprocessor:pca:whiten,feature_preprocessor:polynomial:degree,feature_preprocessor:polynomial:include_bias,feature_preprocessor:polynomial:interaction_only,feature_preprocessor:random_trees_embedding:bootstrap,feature_preprocessor:random_trees_embedding:max_depth,feature_preprocessor:random_trees_embedding:max_leaf_nodes,feature_preprocessor:random_trees_embedding:min_samples_leaf,feature_preprocessor:random_trees_embedding:min_samples_split,feature_preprocessor:random_trees_embedding:min_weight_fraction_leaf,feature_preprocessor:random_trees_embedding:n_estimators,feature_preprocessor:select_percentile_classification:percentile,feature_preprocessor:select_percentile_classification:score_func,feature_preprocessor:select_rates:alpha,feature_preprocessor:select_rates:mode,feature_preprocessor:select_rates:score_func +idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,classifier:adaboost:learning_rate,classifier:adaboost:max_depth,classifier:adaboost:n_estimators,classifier:bernoulli_nb:alpha,classifier:bernoulli_nb:fit_prior,classifier:decision_tree:criterion,classifier:decision_tree:max_depth_factor,classifier:decision_tree:max_features,classifier:decision_tree:max_leaf_nodes,classifier:decision_tree:min_impurity_decrease,classifier:decision_tree:min_samples_leaf,classifier:decision_tree:min_samples_split,classifier:decision_tree:min_weight_fraction_leaf,classifier:extra_trees:bootstrap,classifier:extra_trees:criterion,classifier:extra_trees:max_depth,classifier:extra_trees:max_features,classifier:extra_trees:max_leaf_nodes,classifier:extra_trees:min_impurity_decrease,classifier:extra_trees:min_samples_leaf,classifier:extra_trees:min_samples_split,classifier:extra_trees:min_weight_fraction_leaf,classifier:gradient_boosting:early_stop,classifier:gradient_boosting:l2_regularization,classifier:gradient_boosting:learning_rate,classifier:gradient_boosting:loss,classifier:gradient_boosting:max_bins,classifier:gradient_boosting:max_depth,classifier:gradient_boosting:max_leaf_nodes,classifier:gradient_boosting:min_samples_leaf,classifier:gradient_boosting:n_iter_no_change,classifier:gradient_boosting:scoring,classifier:gradient_boosting:tol,classifier:gradient_boosting:validation_fraction,classifier:k_nearest_neighbors:n_neighbors,classifier:k_nearest_neighbors:p,classifier:k_nearest_neighbors:weights,classifier:lda:n_components,classifier:lda:shrinkage,classifier:lda:shrinkage_factor,classifier:lda:tol,classifier:liblinear_svc:C,classifier:liblinear_svc:dual,classifier:liblinear_svc:fit_intercept,classifier:liblinear_svc:intercept_scaling,classifier:liblinear_svc:loss,classifier:liblinear_svc:multi_class,classifier:liblinear_svc:penalty,classifier:liblinear_svc:tol,classifier:libsvm_svc:C,classifier:libsvm_svc:coef0,classifier:libsvm_svc:degree,classifier:libsvm_svc:gamma,classifier:libsvm_svc:kernel,classifier:libsvm_svc:max_iter,classifier:libsvm_svc:shrinking,classifier:libsvm_svc:tol,classifier:multinomial_nb:alpha,classifier:multinomial_nb:fit_prior,classifier:passive_aggressive:C,classifier:passive_aggressive:average,classifier:passive_aggressive:fit_intercept,classifier:passive_aggressive:loss,classifier:passive_aggressive:tol,classifier:qda:reg_param,classifier:random_forest:bootstrap,classifier:random_forest:criterion,classifier:random_forest:max_depth,classifier:random_forest:max_features,classifier:random_forest:max_leaf_nodes,classifier:random_forest:min_impurity_decrease,classifier:random_forest:min_samples_leaf,classifier:random_forest:min_samples_split,classifier:random_forest:min_weight_fraction_leaf,classifier:sgd:alpha,classifier:sgd:average,classifier:sgd:epsilon,classifier:sgd:eta0,classifier:sgd:fit_intercept,classifier:sgd:l1_ratio,classifier:sgd:learning_rate,classifier:sgd:loss,classifier:sgd:penalty,classifier:sgd:power_t,classifier:sgd:tol,data_preprocessing:categorical_transformer:categorical_encoding:__choice__,data_preprocessing:categorical_transformer:category_coalescence:__choice__,data_preprocessing:categorical_transformer:category_coalescence:minority_coalescer:minimum_fraction,data_preprocessing:numerical_transformer:imputation:strategy,data_preprocessing:numerical_transformer:rescaling:__choice__,data_preprocessing:numerical_transformer:rescaling:quantile_transformer:n_quantiles,data_preprocessing:numerical_transformer:rescaling:quantile_transformer:output_distribution,data_preprocessing:numerical_transformer:rescaling:robust_scaler:q_max,data_preprocessing:numerical_transformer:rescaling:robust_scaler:q_min,feature_preprocessor:__choice__,feature_preprocessor:extra_trees_preproc_for_classification:bootstrap,feature_preprocessor:extra_trees_preproc_for_classification:criterion,feature_preprocessor:extra_trees_preproc_for_classification:max_depth,feature_preprocessor:extra_trees_preproc_for_classification:max_features,feature_preprocessor:extra_trees_preproc_for_classification:max_leaf_nodes,feature_preprocessor:extra_trees_preproc_for_classification:min_impurity_decrease,feature_preprocessor:extra_trees_preproc_for_classification:min_samples_leaf,feature_preprocessor:extra_trees_preproc_for_classification:min_samples_split,feature_preprocessor:extra_trees_preproc_for_classification:min_weight_fraction_leaf,feature_preprocessor:extra_trees_preproc_for_classification:n_estimators,feature_preprocessor:fast_ica:algorithm,feature_preprocessor:fast_ica:fun,feature_preprocessor:fast_ica:n_components,feature_preprocessor:fast_ica:whiten,feature_preprocessor:feature_agglomeration:affinity,feature_preprocessor:feature_agglomeration:linkage,feature_preprocessor:feature_agglomeration:n_clusters,feature_preprocessor:feature_agglomeration:pooling_func,feature_preprocessor:kernel_pca:coef0,feature_preprocessor:kernel_pca:degree,feature_preprocessor:kernel_pca:gamma,feature_preprocessor:kernel_pca:kernel,feature_preprocessor:kernel_pca:n_components,feature_preprocessor:kitchen_sinks:gamma,feature_preprocessor:kitchen_sinks:n_components,feature_preprocessor:liblinear_svc_preprocessor:C,feature_preprocessor:liblinear_svc_preprocessor:dual,feature_preprocessor:liblinear_svc_preprocessor:fit_intercept,feature_preprocessor:liblinear_svc_preprocessor:intercept_scaling,feature_preprocessor:liblinear_svc_preprocessor:loss,feature_preprocessor:liblinear_svc_preprocessor:multi_class,feature_preprocessor:liblinear_svc_preprocessor:penalty,feature_preprocessor:liblinear_svc_preprocessor:tol,feature_preprocessor:nystroem_sampler:coef0,feature_preprocessor:nystroem_sampler:degree,feature_preprocessor:nystroem_sampler:gamma,feature_preprocessor:nystroem_sampler:kernel,feature_preprocessor:nystroem_sampler:n_components,feature_preprocessor:pca:keep_variance,feature_preprocessor:pca:whiten,feature_preprocessor:polynomial:degree,feature_preprocessor:polynomial:include_bias,feature_preprocessor:polynomial:interaction_only,feature_preprocessor:random_trees_embedding:bootstrap,feature_preprocessor:random_trees_embedding:max_depth,feature_preprocessor:random_trees_embedding:max_leaf_nodes,feature_preprocessor:random_trees_embedding:min_samples_leaf,feature_preprocessor:random_trees_embedding:min_samples_split,feature_preprocessor:random_trees_embedding:min_weight_fraction_leaf,feature_preprocessor:random_trees_embedding:n_estimators,feature_preprocessor:select_percentile_classification:percentile,feature_preprocessor:select_percentile_classification:score_func,feature_preprocessor:select_rates_classification:alpha,feature_preprocessor:select_rates_classification:mode,feature_preprocessor:select_rates_classification:score_func 1,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.006832691101653281,0.0990420448281782,auto,255,None,58,9,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.047607909209835673,most_frequent,normalize,,,,,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,72,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -2,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.215660250704945e-08,0.0568967527929491,auto,255,None,74,58,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,none,,,,,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.3999502319254789,fwe,f_classif +2,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.215660250704945e-08,0.0568967527929491,auto,255,None,74,58,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,none,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.3999502319254789,fwe,f_classif 3,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.7811236762098946,None,0.0,15,9,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,none,,,,,extra_trees_preproc_for_classification,False,entropy,None,0.2269858618750471,None,0.0,13,12,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -4,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.5804208006044023,None,0.0,5,2,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,median,standardize,,,,,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.061500733991527654,fdr,f_classif +4,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.5804208006044023,None,0.0,5,2,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,median,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.061500733991527654,fdr,f_classif 5,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -6,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,683.603209772402,-0.7761786661778607,4,1.0146245161392977,poly,-1,True,0.0004729761062000146,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.07556779791699596,most_frequent,standardize,,,,,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.46651479293540027,fdr,f_classif +6,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,683.603209772402,-0.7761786661778607,4,1.0146245161392977,poly,-1,True,0.0004729761062000146,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.07556779791699596,most_frequent,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.46651479293540027,fdr,f_classif 7,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.993647974710288,-0.09714179076410145,2,0.10000000000000006,poly,-1,True,0.0011475566557439987,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,mean,robust_scaler,,,0.8124421960026027,0.18251138129426106,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,True,,,,,,,,,,,, 8,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.31482574716831474,None,0.0,15,2,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,standardize,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,False,True,1,squared_hinge,ovr,l1,5.5234897124903465e-05,,,,,,,,,,,,,,,,,,,,,, 9,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.8932965286370729,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,, @@ -31,22 +31,22 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class 30,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.6993161849181185,0.44235005157802176,auto,255,None,270,15,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0077758033214372,mean,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 31,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.34085742012558995,None,0.0,2,2,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.010251558508210521,most_frequent,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,94.77374433257484,chi2,,, 32,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23.617163356857034,0.4249652342218557,2,0.03426568422270486,poly,-1,True,0.0003809897288698571,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004949301030421484,most_frequent,quantile_transformer,847,normal,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,3.3404000226016595,False,True,1,squared_hinge,ovr,l1,0.00013812954117187317,,,,,,,,,,,,,,,,,,,,,, -33,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.4751607001217574,None,0.0,2,18,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.0015489667569464098,most_frequent,standardize,,,,,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.2465775971025667,fpr,chi2 -34,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,0.787172957129578,0.23076913534674612,auto,255,None,8,4,10,loss,1e-07,0.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.002842817334543296,mean,standardize,,,,,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.2779207466036798,fwe,f_classif +33,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.4751607001217574,None,0.0,2,18,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.0015489667569464098,most_frequent,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.2465775971025667,fpr,chi2 +34,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,0.787172957129578,0.23076913534674612,auto,255,None,8,4,10,loss,1e-07,0.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.002842817334543296,mean,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.2779207466036798,fwe,f_classif 35,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,158.8949863228855,,,0.03920054687126197,rbf,-1,True,0.05469985785058926,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.015996674733825135,most_frequent,quantile_transformer,1033,uniform,,,fast_ica,,,,,,,,,,,parallel,logcosh,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 36,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.1161756717784211,None,0.0,2,2,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,normalize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,, -37,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,114.37037237306517,,,0.013196149743002957,rbf,-1,False,2.2119982336561568e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.030600209348810598,median,robust_scaler,,,0.8903774541072713,0.14849508114407797,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.10727720089253716,fwe,f_classif +37,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,114.37037237306517,,,0.013196149743002957,rbf,-1,False,2.2119982336561568e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.030600209348810598,median,robust_scaler,,,0.8903774541072713,0.14849508114407797,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.10727720089253716,fwe,f_classif 38,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,25369.899739311986,-0.20538081740449166,,0.007550793530761754,sigmoid,-1,True,0.00014198788135109906,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.009250691729522439,most_frequent,quantile_transformer,1442,uniform,,,fast_ica,,,,,,,,,,,parallel,cube,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 39,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.742074481485891,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010388289410086769,mean,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 40,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.5,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0038325481818368653,most_frequent,quantile_transformer,1000,uniform,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,, 41,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.46057831591617715,False,True,hinge,0.04557857428827514,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.00027457445401600137,median,standardize,,,,,extra_trees_preproc_for_classification,True,gini,None,0.48190346970486964,None,0.0,17,18,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 42,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,34,auto,,0.00012339000686260981,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010000000000000004,mean,standardize,,,,,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.027161884929113287,3011,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -43,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1007.8868860667042,0.7073639177519475,2,0.0009693320195457126,poly,-1,True,0.00048384544670559135,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.017078985265493323,median,quantile_transformer,971,uniform,,,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.27854669854596986,fpr,f_classif +43,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1007.8868860667042,0.7073639177519475,2,0.0009693320195457126,poly,-1,True,0.00048384544670559135,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.017078985265493323,median,quantile_transformer,971,uniform,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.27854669854596986,fpr,f_classif 44,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.0009039383509168851,0.013859624893482336,auto,255,None,314,166,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.07166826832005445,median,robust_scaler,,,0.8113117119932765,0.22229745700501014,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 45,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.5916641238089724,None,0.0,1,7,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.014941875096420176,most_frequent,standardize,,,,,extra_trees_preproc_for_classification,True,gini,None,0.6621674571394228,None,0.0,3,11,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 46,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.4177635558897493,True,True,hinge,0.00036622547004230247,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.3298639925115399,median,normalize,,,,,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.02443001336430177,7802,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 47,weighting,sgd,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.576840761438448e-07,True,,0.0003087686113414944,True,1.0895900532824292e-07,constant,hinge,elasticnet,,0.007781223173502778,no_encoding,minority_coalescer,0.002482961497851837,mean,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -48,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.6149200141024044,None,0.0,3,6,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,normalize,,,,,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.27403124544524843,fdr,chi2 +48,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.6149200141024044,None,0.0,3,6,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,normalize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.27403124544524843,fdr,chi2 49,weighting,adaboost,SAMME,1.1345415570152533,6,472,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,mean,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,89.55941314463736,f_classif,,, 50,none,adaboost,SAMME,1.117891964153124,7,350,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.099090775365223,mean,standardize,,,,,fast_ica,,,,,,,,,,,parallel,exp,1122,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 51,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,1.260108334347015e-07,0.020450900578038868,auto,255,None,1907,82,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,quantile_transformer,1015,normal,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.9704315246405552,True,,,,,,,,,,,,,,, @@ -60,7 +60,7 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class 59,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.38845176895497546,True,True,hinge,0.07195442121939964,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.009803171174126721,most_frequent,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,True,False,,,,,,,,,,,, 60,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.19548169161642792,None,0.0,10,18,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.0031030790458014663,most_frequent,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,96.55453782974163,f_classif,,, 61,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4077.0183557137793,,,0.18582946048483806,rbf,-1,True,0.007982841167341137,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0024292204383546253,mean,robust_scaler,,,0.7925685994397953,0.28082571006541873,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,15,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -62,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.02145872972690199,0.03842927840160621,auto,255,None,171,8,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.004228524718610471,most_frequent,standardize,,,,,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.13472853186492292,fdr,chi2 +62,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.02145872972690199,0.03842927840160621,auto,255,None,171,8,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.004228524718610471,most_frequent,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.13472853186492292,fdr,chi2 63,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.35459002631952014,None,0.0,5,15,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010270173676218672,median,quantile_transformer,1807,uniform,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,56.525707700661215,chi2,,, 64,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10091.529330032845,,,0.0011283303013784186,rbf,-1,True,0.006930076959856067,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,True,,,,,,,,,,,, 65,weighting,sgd,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.7553798077390236e-05,False,,,True,0.5295119133805599,optimal,log,elasticnet,,0.0002846848503288152,no_encoding,minority_coalescer,0.05377825070455988,mean,quantile_transformer,1591,normal,,,fast_ica,,,,,,,,,,,deflation,cube,1400,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, @@ -90,7 +90,7 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class 89,weighting,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.07578664472529394,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.012596384519267407,median,robust_scaler,,,0.7176883035814098,0.2870577047962274,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,0.6428686651502072,False,True,1,squared_hinge,ovr,l1,2.5546943595340656e-05,,,,,,,,,,,,,,,,,,,,,, 90,weighting,adaboost,SAMME,0.07959216314142419,1,124,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,median,robust_scaler,,,0.7401836136931198,0.2679472228039613,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,18.787733861356816,mutual_info,,, 91,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,89,auto,,0.0819425045156221,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.37737820096945385,median,minmax,,,,,nystroem_sampler,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.5073162154954842,2,0.004207352122999392,poly,1212,,,,,,,,,,,,,,,,, -92,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.7229728445103076,None,0.0,5,13,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,robust_scaler,,,0.7823020129596692,0.1205596141179452,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.12983623180397538,fwe,f_classif +92,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.7229728445103076,None,0.0,5,13,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,robust_scaler,,,0.7823020129596692,0.1205596141179452,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.12983623180397538,fwe,f_classif 93,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.7982423863663426,None,0.0,11,7,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.003645478141655197,median,robust_scaler,,,0.7651599230489026,0.15388614105871848,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,34,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 94,none,adaboost,SAMME,0.04534487012126666,9,237,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.009377862051712454,median,minmax,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,4.281022361344507,False,True,1,squared_hinge,ovr,l1,0.02040524760798526,,,,,,,,,,,,,,,,,,,,,, 95,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, @@ -98,7 +98,7 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class 97,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 98,weighting,adaboost,SAMME,0.24826166093503962,4,203,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.011447514256202326,median,quantile_transformer,949,normal,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.7702718499065888,True,,,,,,,,,,,,,,, 99,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,5.295700573535198e-10,0.042756254512807394,auto,255,None,84,14,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.35207536432313746,median,robust_scaler,,,0.7792676238311911,0.28901203457977576,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,248,max,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -100,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.0016445078304079647,0.1997863062244349,auto,255,None,564,15,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.75,0.23248947228355937,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.32975511665416357,fdr,f_classif +100,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.0016445078304079647,0.1997863062244349,auto,255,None,564,15,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.75,0.23248947228355937,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.32975511665416357,fdr,f_classif 101,weighting,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0019618741335452826,True,True,hinge,0.0004803120822404903,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,standardize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,57.38056668131513,mutual_info,,, 102,weighting,adaboost,SAMME.R,0.0190998863782481,7,99,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,quantile_transformer,1177,uniform,,,random_trees_embedding,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,7,None,1,3,1.0,97,,,,, 103,weighting,extra_trees,,,,,,,,,,,,,,,False,gini,None,0.8850157429082246,None,0.0,12,9,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.27673478870889345,median,none,,,,,fast_ica,,,,,,,,,,,deflation,cube,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, @@ -111,14 +111,14 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class 110,weighting,adaboost,SAMME.R,0.22665749778830807,7,78,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,quantile_transformer,1000,uniform,,,feature_agglomeration,,,,,,,,,,,,,,,cosine,complete,373,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 111,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1689.0860195745497,0.7404917548199534,2,0.011884114654356123,poly,-1,True,4.262566522678876e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004395163582476699,median,minmax,,,,,random_trees_embedding,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,3,None,13,13,1.0,74,,,,, 112,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.859589328406253e-07,0.25392293346701533,auto,255,None,4,80,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,mean,standardize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,65.7135135608214,mutual_info,,, -113,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,9.097196057095871e-06,0.32800101253288033,auto,255,None,53,28,20,loss,1e-07,0.101919468281566,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.008121631984215255,median,robust_scaler,,,0.7654960296398138,0.25,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.07288328235161678,fwe,chi2 +113,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,9.097196057095871e-06,0.32800101253288033,auto,255,None,53,28,20,loss,1e-07,0.101919468281566,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.008121631984215255,median,robust_scaler,,,0.7654960296398138,0.25,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.07288328235161678,fwe,chi2 114,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.5463209559127865,None,0.0,12,5,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.025636105021492692,mean,standardize,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,1.4472785394247571,False,True,1,squared_hinge,ovr,l1,0.00018809455411335498,,,,,,,,,,,,,,,,,,,,,, 115,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.07970183198340376,None,0.0,9,9,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.008729901092151533,most_frequent,quantile_transformer,1028,normal,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,True,,,,,,,,,,,, -116,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.050858257794119e-10,0.0509713008465305,auto,255,None,17,16,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.0006772048110168395,median,none,,,,,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.42719682445182733,fdr,f_classif +116,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.050858257794119e-10,0.0509713008465305,auto,255,None,17,16,,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.0006772048110168395,median,none,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.42719682445182733,fdr,f_classif 117,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.4886932005592788,None,0.0,1,17,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,median,minmax,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,2.2673178962517726,False,True,1,squared_hinge,ovr,l1,0.07576775715726437,,,,,,,,,,,,,,,,,,,,,, 118,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1954.4001293172055,0.5941977727413141,3,0.01222672837922025,poly,-1,False,0.000868704184075337,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.023611476558497053,most_frequent,quantile_transformer,1312,normal,,,extra_trees_preproc_for_classification,True,entropy,None,0.26038719206370126,None,0.0,6,14,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 119,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.48772464140872207,None,0.0,1,16,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.010000000000000004,most_frequent,normalize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,, -120,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,571.8976418358935,-0.6542106402522795,,5.0850539598583375e-05,sigmoid,-1,False,0.003954814208041632,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,median,none,,,,,select_rates,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.1378799965815952,fwe,f_classif +120,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,571.8976418358935,-0.6542106402522795,,5.0850539598583375e-05,sigmoid,-1,False,0.003954814208041632,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,median,none,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.1378799965815952,fwe,f_classif 121,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.6204291847226782,None,0.0,2,7,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.012802264108301202,most_frequent,normalize,,,,,fast_ica,,,,,,,,,,,deflation,exp,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 122,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.7159488281157247,None,0.0,15,3,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,robust_scaler,,,0.7546151696972261,0.25941712940346606,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,33,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 123,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.4285190453868457,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.49851517731857553,most_frequent,quantile_transformer,958,uniform,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,, diff --git a/test/test_pipeline/components/feature_preprocessing/test_choice.py b/test/test_pipeline/components/feature_preprocessing/test_choice.py index 838cb5c3d8..525ec38356 100644 --- a/test/test_pipeline/components/feature_preprocessing/test_choice.py +++ b/test/test_pipeline/components/feature_preprocessing/test_choice.py @@ -7,7 +7,7 @@ class FeatureProcessingTest(unittest.TestCase): def test_get_available_components(self): # Target type for target_type, num_values in [('classification', 15), - ('regression', 13)]: + ('regression', 14)]: data_properties = {'target_type': target_type} available_components = fp.FeaturePreprocessorChoice(data_properties)\ diff --git a/test/test_pipeline/components/feature_preprocessing/test_select_rates.py b/test/test_pipeline/components/feature_preprocessing/test_select_rates_classification.py similarity index 60% rename from test/test_pipeline/components/feature_preprocessing/test_select_rates.py rename to test/test_pipeline/components/feature_preprocessing/test_select_rates_classification.py index 16e586e7e2..2497b5174a 100644 --- a/test/test_pipeline/components/feature_preprocessing/test_select_rates.py +++ b/test/test_pipeline/components/feature_preprocessing/test_select_rates_classification.py @@ -4,20 +4,20 @@ import scipy.sparse import sklearn.preprocessing -from autosklearn.pipeline.components.feature_preprocessing.select_rates import \ - SelectRates +from autosklearn.pipeline.components.feature_preprocessing.select_rates_classification import \ + SelectClassificationRates from autosklearn.pipeline.util import _test_preprocessing, get_dataset -class SelectRatesComponentTest(unittest.TestCase): +class SelectClassificationRatesComponentTest(unittest.TestCase): def test_default_configuration(self): - transformation, original = _test_preprocessing(SelectRates) + transformation, original = _test_preprocessing(SelectClassificationRates) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 3) self.assertFalse((transformation == 0).all()) transformation, original = _test_preprocessing( - SelectRates, make_sparse=True) + SelectClassificationRates, make_sparse=True) self.assertTrue(scipy.sparse.issparse(transformation)) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], int(original.shape[1] / 2)) @@ -27,13 +27,13 @@ def test_default_configuration(self): original_X_train = X_train.copy() ss = sklearn.preprocessing.StandardScaler() X_train = ss.fit_transform(X_train) - configuration_space = SelectRates.get_hyperparameter_search_space() + configuration_space = SelectClassificationRates.get_hyperparameter_search_space() default = configuration_space.get_default_configuration() - preprocessor = SelectRates(random_state=1, - **{hp_name: default[hp_name] - for hp_name in default - if default[hp_name] is not None}) + preprocessor = SelectClassificationRates(random_state=1, + **{hp_name: default[hp_name] + for hp_name in default + if default[hp_name] is not None}) transformer = preprocessor.fit(X_train, Y_train) transformation, original = transformer.transform( @@ -50,11 +50,11 @@ def test_preprocessing_dtype(self): X_train, Y_train, X_test, Y_test = get_dataset("iris") self.assertEqual(X_train.dtype, np.float32) - configuration_space = SelectRates.get_hyperparameter_search_space() + configuration_space = SelectClassificationRates.get_hyperparameter_search_space() default = configuration_space.get_default_configuration() - preprocessor = SelectRates(random_state=1, - **{hp_name: default[hp_name] for hp_name in - default}) + preprocessor = SelectClassificationRates(random_state=1, + **{hp_name: default[hp_name] for hp_name in + default}) preprocessor.fit(X_train, Y_train) Xt = preprocessor.transform(X_train) self.assertEqual(Xt.dtype, np.float32) @@ -62,11 +62,11 @@ def test_preprocessing_dtype(self): # np.float64 X_train, Y_train, X_test, Y_test = get_dataset("iris") X_train = X_train.astype(np.float64) - configuration_space = SelectRates.get_hyperparameter_search_space() + configuration_space = SelectClassificationRates.get_hyperparameter_search_space() default = configuration_space.get_default_configuration() - preprocessor = SelectRates(random_state=1, - **{hp_name: default[hp_name] for hp_name in - default}) + preprocessor = SelectClassificationRates(random_state=1, + **{hp_name: default[hp_name] for hp_name in + default}) preprocessor.fit(X_train, Y_train) Xt = preprocessor.transform(X_train) self.assertEqual(Xt.dtype, np.float64) @@ -75,11 +75,11 @@ def test_preprocessing_dtype(self): # np.float32 X_train, Y_train, X_test, Y_test = get_dataset("iris", make_sparse=True) self.assertEqual(X_train.dtype, np.float32) - configuration_space = SelectRates.get_hyperparameter_search_space() + configuration_space = SelectClassificationRates.get_hyperparameter_search_space() default = configuration_space.get_default_configuration() - preprocessor = SelectRates(random_state=1, - **{hp_name: default[hp_name] for hp_name in - default}) + preprocessor = SelectClassificationRates(random_state=1, + **{hp_name: default[hp_name] for hp_name in + default}) preprocessor.fit(X_train, Y_train) Xt = preprocessor.transform(X_train) self.assertEqual(Xt.dtype, np.float32) @@ -87,11 +87,11 @@ def test_preprocessing_dtype(self): # np.float64 X_train, Y_train, X_test, Y_test = get_dataset("iris", make_sparse=True) X_train = X_train.astype(np.float64) - configuration_space = SelectRates.get_hyperparameter_search_space() + configuration_space = SelectClassificationRates.get_hyperparameter_search_space() default = configuration_space.get_default_configuration() - preprocessor = SelectRates(random_state=1, - **{hp_name: default[hp_name] for hp_name in - default}) + preprocessor = SelectClassificationRates(random_state=1, + **{hp_name: default[hp_name] for hp_name in + default}) preprocessor.fit(X_train, Y_train) Xt = preprocessor.transform(X_train) self.assertEqual(Xt.dtype, np.float64) diff --git a/test/test_pipeline/components/feature_preprocessing/test_select_rates_regression.py b/test/test_pipeline/components/feature_preprocessing/test_select_rates_regression.py new file mode 100644 index 0000000000..573bab32ce --- /dev/null +++ b/test/test_pipeline/components/feature_preprocessing/test_select_rates_regression.py @@ -0,0 +1,85 @@ +import unittest + +import numpy as np +import scipy.sparse +import sklearn.preprocessing + +from autosklearn.pipeline.components.feature_preprocessing.select_rates_regression import \ + SelectRegressionRates +from autosklearn.pipeline.util import _test_preprocessing, get_dataset + + +class SelectRegressionRatesComponentTest(unittest.TestCase): + def test_default_configuration(self): + transformation, original = _test_preprocessing(SelectRegressionRates) + self.assertEqual(transformation.shape[0], original.shape[0]) + self.assertEqual(transformation.shape[1], 4) + self.assertFalse((transformation == 0).all()) + + transformation, original = _test_preprocessing( + SelectRegressionRates, make_sparse=True) + self.assertTrue(scipy.sparse.issparse(transformation)) + self.assertEqual(transformation.shape[0], original.shape[0]) + self.assertEqual(transformation.shape[1], int(original.shape[1] / 2)) + + # Makes sure that the features are reduced, not the number of samples + X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits') + original_X_train = X_train.copy() + ss = sklearn.preprocessing.StandardScaler() + X_train = ss.fit_transform(X_train) + configuration_space = SelectRegressionRates.get_hyperparameter_search_space() + default = configuration_space.get_default_configuration() + + preprocessor = SelectRegressionRates(random_state=1, + **{hp_name: default[hp_name] + for hp_name in default + if default[hp_name] is not None}) + + transformer = preprocessor.fit(X_train, Y_train) + transformation, original = transformer.transform( + X_train), original_X_train + self.assertEqual(transformation.shape[0], original.shape[0]) + self.assertEqual(transformation.shape[1], 21) + + def test_default_configuration_regression(self): + transformation, original = _test_preprocessing( + SelectRegressionRates, + dataset='boston', + ) + self.assertEqual(transformation.shape[0], original.shape[0]) + # From 13 to 12 features + self.assertEqual(transformation.shape[1], 12) + self.assertFalse((transformation == 0).all()) + + def test_preprocessing_dtype_regression(self): + # Dense + # np.float32 + X_train, Y_train, X_test, Y_test = get_dataset("boston") + self.assertEqual(X_train.dtype, np.float32) + + dataset_properties = {'target_type': 'regression'} + + configuration_space = SelectRegressionRates.get_hyperparameter_search_space( + dataset_properties + ) + default = configuration_space.get_default_configuration() + preprocessor = SelectRegressionRates(random_state=1, + **{hp_name: default[hp_name] for hp_name in + default}) + preprocessor.fit(X_train, Y_train) + Xt = preprocessor.transform(X_train) + self.assertEqual(Xt.dtype, np.float32) + + # np.float64 + X_train, Y_train, X_test, Y_test = get_dataset("boston") + X_train = X_train.astype(np.float64) + configuration_space = SelectRegressionRates.get_hyperparameter_search_space( + dataset_properties + ) + default = configuration_space.get_default_configuration() + preprocessor = SelectRegressionRates(random_state=1, + **{hp_name: default[hp_name] for hp_name in + default}) + preprocessor.fit(X_train, Y_train) + Xt = preprocessor.transform(X_train) + self.assertEqual(Xt.dtype, np.float64) diff --git a/test/test_pipeline/test_regression.py b/test/test_pipeline/test_regression.py index d8b753680f..e6450d2275 100644 --- a/test/test_pipeline/test_regression.py +++ b/test/test_pipeline/test_regression.py @@ -275,7 +275,7 @@ def test_get_hyperparameter_search_space(self): self.assertIsInstance(cs, ConfigurationSpace) conditions = cs.get_conditions() hyperparameters = cs.get_hyperparameters() - self.assertEqual(140, len(hyperparameters)) + self.assertEqual(143, len(hyperparameters)) self.assertEqual(len(hyperparameters) - 6, len(conditions)) def test_get_hyperparameter_search_space_include_exclude_models(self):