In [67]:
from sklearn.naive_bayes import *
from sklearn.tree import *
from sklearn.svm import *
from sklearn.cluster import *
from sklearn.kernel_approximation import *
from sklearn.gaussian_process import *
import sklearn.discriminant_analysis
import sklearn.neural_network
from sklearn.ensemble import *
import autosklearn
import autosklearn.classification
import sklearn.datasets
import ConfigSpace
import sklearn.linear_model
from ConfigSpace.read_and_write import json as config_json
import json

In [68]:
import autosklearn.pipeline.components.classification as algos_class
import autosklearn.pipeline.components.data_preprocessing as algos_data_preprocessing
import autosklearn.pipeline.components.feature_preprocessing as algos_feature_preprocessing
import autosklearn.pipeline.components.regression as algos_regression

In [69]:
algos_class.bernoulli_nb.BernoulliNB.get_hyperparameter_search_space()

Configuration space object:
  Hyperparameters:
    alpha, Type: UniformFloat, Range: [0.01, 100.0], Default: 1.0, on log-scale
    fit_prior, Type: Categorical, Choices: {True, False}, Default: True

In [70]:
def is_pipeline_forbidden(pl):
    forbidden_combos = [
        {"feature-pre-processor": sklearn.decomposition.FastICA, "classifier": sklearn.naive_bayes.MultinomialNB}
    ]
    
    representation = {}
    for step_name, obj in pl.steps:
        representation[step_name] = obj.__class__
    
    for combo in forbidden_combos:
        matches = True
        for key, val in combo.items():
            if not key in representation or representation[key] != val:
                matches = False
                break
        if matches:
            return True
    return False
    
pl = sklearn.pipeline.Pipeline(steps=[("feature-pre-processor", sklearn.decomposition.FastICA()), ("classifier", sklearn.naive_bayes.MultinomialNB())])
is_pipeline_forbidden(pl)

True

In [71]:
def get_autosklearn_searchspace_classification():
    
    def get_params(cls):
        params = []
        conf_space = cls.get_hyperparameter_search_space()
        for hp in conf_space.get_hyperparameters():
            if type(hp) == ConfigSpace.hyperparameters.CategoricalHyperparameter:
                params.append({
                    "name": hp.name,
                    "type": "cat",
                    "values": list(hp.choices)
                })
            elif issubclass(hp.__class__, ConfigSpace.hyperparameters.NumericalHyperparameter):
                params.append({
                    "name": hp.name,
                    "type": "float-exp" if hp.log else "float",
                    "min": hp.lower,
                    "max": hp.upper
                })
            elif type(hp) == ConfigSpace.hyperparameters.UnParametrizedHyperparameter:
                params.append({
                    "name": hp.name,
                    "type": "cat",
                    "values": [hp.value]
                })
            elif type(hp) == ConfigSpace.hyperparameters.Constant:
                params.append({
                    "name": hp.name,
                    "type": "cat",
                    "values": [hp.value]
                })
            else:
                raise Exception("Unsupported parameter type: " + str(type(hp)))
        return params
    
    def get_config_space(cls):
        return config_json.write(cls.get_hyperparameter_search_space())
                
    classifiers = {
        algos_class.bernoulli_nb.BernoulliNB: sklearn.naive_bayes.BernoulliNB,
        algos_class.decision_tree.DecisionTree: sklearn.tree.DecisionTreeClassifier,
        algos_class.extra_trees.ExtraTreesClassifier: sklearn.ensemble.ExtraTreesClassifier,
        algos_class.gaussian_nb.GaussianNB: sklearn.naive_bayes.GaussianNB,
        algos_class.gradient_boosting.GradientBoostingClassifier: sklearn.ensemble.GradientBoostingClassifier,
        algos_class.k_nearest_neighbors.KNearestNeighborsClassifier: sklearn.neighbors.KNeighborsClassifier,
        algos_class.lda.LDA: sklearn.discriminant_analysis.LinearDiscriminantAnalysis,
        #algos_class.liblinear_svc.LibLinear_SVC: sklearn.svm.LinearSVC,
        algos_class.libsvm_svc.LibSVM_SVC: sklearn.svm.SVC,
        algos_class.mlp.MLPClassifier: sklearn.neural_network.MLPClassifier,
        algos_class.multinomial_nb.MultinomialNB: sklearn.naive_bayes.MultinomialNB,
        #algos_class.passive_aggressive.PassiveAggressive: sklearn.linear_model.PassiveAggressiveClassifier,
        algos_class.qda.QDA: sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis,
        algos_class.random_forest.RandomForest: sklearn.ensemble.RandomForestClassifier,
        #algos_class.sgd.SGD: sklearn.linear_model.SGDClassifier
    }
    data_preprocessors = {
        #algos_data_preprocessing.minority_coalescense.minority_coalescer.MinorityCoalescer: algos_data_preprocessing.minority_coalescense.minority_coalescer.MinorityCoalescer,
        algos_data_preprocessing.rescaling.minmax.MinMaxScalerComponent: sklearn.preprocessing.MinMaxScaler,
        algos_data_preprocessing.rescaling.normalize.NormalizerComponent: sklearn.preprocessing.Normalizer,
        algos_data_preprocessing.rescaling.power_transformer.PowerTransformerComponent: sklearn.preprocessing.PowerTransformer,
        algos_data_preprocessing.rescaling.quantile_transformer.QuantileTransformerComponent: sklearn.preprocessing.QuantileTransformer,
        algos_data_preprocessing.rescaling.robust_scaler.RobustScalerComponent: sklearn.preprocessing.RobustScaler,
        algos_data_preprocessing.rescaling.standardize.StandardScalerComponent: sklearn.preprocessing.StandardScaler,
        algos_data_preprocessing.variance_threshold.variance_threshold.VarianceThreshold: sklearn.feature_selection.VarianceThreshold
    }
    feature_preprocessors = {
        algos_feature_preprocessing.fast_ica.FastICA: sklearn.decomposition.FastICA,
        algos_feature_preprocessing.feature_agglomeration.FeatureAgglomeration: sklearn.cluster.FeatureAgglomeration,
        algos_feature_preprocessing.kernel_pca.KernelPCA: sklearn.decomposition.KernelPCA,
        algos_feature_preprocessing.kitchen_sinks.RandomKitchenSinks: sklearn.kernel_approximation.RBFSampler,
        algos_feature_preprocessing.nystroem_sampler.Nystroem: sklearn.kernel_approximation.Nystroem,
        algos_feature_preprocessing.pca.PCA: sklearn.decomposition.PCA,
        algos_feature_preprocessing.polynomial.PolynomialFeatures: sklearn.preprocessing.PolynomialFeatures,
        algos_feature_preprocessing.select_percentile_classification.SelectPercentileClassification: sklearn.feature_selection.SelectPercentile,
        algos_feature_preprocessing.select_rates_classification.SelectClassificationRates: sklearn.feature_selection.GenericUnivariateSelect
    }
    
    search_space = []
    
    # data-preprocessors
    pre_data_search_space = []
    for pp in data_preprocessors:
        params = get_config_space(pp)
        #print(pp, params)
        props = pp.get_properties()
        #print(pp)
        #print("handles_nominal_values" in props and props["handles_nominal_values"], "handles_numerical_features" in props and props["handles_numerical_features"])
        pre_data_search_space.append({"class": data_preprocessors[pp].__module__ + "." + data_preprocessors[pp].__name__, "params": json.loads(str(params))})
    search_space.append({"name": "data-pre-processor", "components": pre_data_search_space})
    
    # feature-preprocessors
    pre_feature_search_space = []
    for pp in feature_preprocessors:
        params = get_config_space(pp)
        #print(pp, params)
        pre_feature_search_space.append({"class": feature_preprocessors[pp].__module__ + "." + feature_preprocessors[pp].__name__, "params": json.loads(str(params))})
    search_space.append({"name": "feature-pre-processor", "components": pre_feature_search_space})
    
    # classifiers
    classifier_search_space = []
    for c in classifiers:
        if c == algos_class.libsvm_svc.LibSVM_SVC:
            config_space = c.get_hyperparameter_search_space()
            #print(config_json.write(config_space))
            
            kernel_pm = config_space.get_hyperparameter("kernel")
            for kernel in ["linear"] + list(kernel_pm.choices):
                config_space_for_kernel = ConfigSpace.ConfigurationSpace()
                for hp in config_space.get_hyperparameters():
                    if hp.name == "kernel":
                        continue
                    if hp.name == "degree" and kernel != "poly":
                        continue
                    if hp.name == "coef0" and kernel not in ["poly", "sigmoid"]:
                        continue
                    config_space_for_kernel.add_hyperparameter(hp)
                config_space_for_kernel.add_hyperparameter(ConfigSpace.hyperparameters.Constant("kernel", kernel))
                params = config_json.write(config_space_for_kernel)
                classifier_search_space.append({"class": classifiers[c].__module__ + "." + classifiers[c].__name__, "params": json.loads(str(params))})
        else:
            params = get_config_space(c)
            classifier_search_space.append({"class": classifiers[c].__module__ + "." + classifiers[c].__name__, "params": json.loads(str(params))})
    search_space.append({"name": "learner", "components": classifier_search_space})
    return search_space
                
search_space = get_autosklearn_searchspace_classification()
with open('singularity/searchspace-classification.json', 'w') as outfile:
    json.dump(search_space, outfile, indent = 4)

In [84]:
def get_autosklearn_searchspace_regression():
    
    def drop_parameter(config, hyperparameter):
        params_as_dict = json.loads(config)
        i = [j for j, param in enumerate(params_as_dict["hyperparameters"]) if param["name"] == hyperparameter][0]
        del params_as_dict["hyperparameters"][i]
        return json.dumps(params_as_dict, indent = 4)
    
    def get_params(cls):
        params = []
        conf_space = cls.get_hyperparameter_search_space()
        for hp in conf_space.get_hyperparameters():
            if type(hp) == ConfigSpace.hyperparameters.CategoricalHyperparameter:
                params.append({
                    "name": hp.name,
                    "type": "cat",
                    "values": list(hp.choices)
                })
            elif issubclass(hp.__class__, ConfigSpace.hyperparameters.NumericalHyperparameter):
                params.append({
                    "name": hp.name,
                    "type": "float-exp" if hp.log else "float",
                    "min": hp.lower,
                    "max": hp.upper
                })
            elif type(hp) == ConfigSpace.hyperparameters.UnParametrizedHyperparameter:
                params.append({
                    "name": hp.name,
                    "type": "cat",
                    "values": [hp.value]
                })
            elif type(hp) == ConfigSpace.hyperparameters.Constant:
                params.append({
                    "name": hp.name,
                    "type": "cat",
                    "values": [hp.value]
                })
            else:
                raise Exception("Unsupported parameter type: " + str(type(hp)))
        return params
    
    def get_config_space(cls):
        return config_json.write(cls.get_hyperparameter_search_space())
                
    regressors = {
        algos_regression.adaboost.AdaboostRegressor: sklearn.ensemble.AdaBoostRegressor,
        algos_regression.ard_regression.ARDRegression: sklearn.linear_model.ARDRegression,
        algos_regression.decision_tree.DecisionTree: sklearn.tree.DecisionTreeRegressor,
        algos_regression.extra_trees.ExtraTreesRegressor: sklearn.ensemble.ExtraTreesRegressor,
        algos_regression.gaussian_process.GaussianProcess: sklearn.gaussian_process.GaussianProcessRegressor,
        algos_regression.gradient_boosting.GradientBoosting: sklearn.ensemble.HistGradientBoostingRegressor,
        algos_regression.k_nearest_neighbors.KNearestNeighborsRegressor : sklearn.neighbors.KNeighborsRegressor,
        algos_regression.liblinear_svr.LibLinear_SVR: sklearn.svm.LinearSVR,
        algos_regression.libsvm_svr.LibSVM_SVR: sklearn.svm.SVR,
        algos_regression.mlp.MLPRegressor: sklearn.neural_network.MLPRegressor,
        algos_regression.random_forest.RandomForest: sklearn.ensemble.RandomForestRegressor,
        algos_regression.sgd.SGD: sklearn.linear_model.SGDRegressor
    }
    data_preprocessors = {
        algos_data_preprocessing.rescaling.minmax.MinMaxScalerComponent: sklearn.preprocessing.MinMaxScaler,
        algos_data_preprocessing.rescaling.normalize.NormalizerComponent: sklearn.preprocessing.Normalizer,
        algos_data_preprocessing.rescaling.power_transformer.PowerTransformerComponent: sklearn.preprocessing.PowerTransformer,
        algos_data_preprocessing.rescaling.quantile_transformer.QuantileTransformerComponent: sklearn.preprocessing.QuantileTransformer,
        algos_data_preprocessing.rescaling.robust_scaler.RobustScalerComponent: sklearn.preprocessing.RobustScaler,
        algos_data_preprocessing.rescaling.standardize.StandardScalerComponent: sklearn.preprocessing.StandardScaler,
        algos_data_preprocessing.variance_threshold.variance_threshold.VarianceThreshold: sklearn.feature_selection.VarianceThreshold
    }
    feature_preprocessors = {
        algos_feature_preprocessing.fast_ica.FastICA: sklearn.decomposition.FastICA,
        algos_feature_preprocessing.feature_agglomeration.FeatureAgglomeration: sklearn.cluster.FeatureAgglomeration,
        algos_feature_preprocessing.kernel_pca.KernelPCA: sklearn.decomposition.KernelPCA,
        algos_feature_preprocessing.kitchen_sinks.RandomKitchenSinks: sklearn.kernel_approximation.RBFSampler,
        algos_feature_preprocessing.nystroem_sampler.Nystroem: sklearn.kernel_approximation.Nystroem,
        algos_feature_preprocessing.pca.PCA: sklearn.decomposition.PCA,
        algos_feature_preprocessing.polynomial.PolynomialFeatures: sklearn.preprocessing.PolynomialFeatures,
        algos_feature_preprocessing.select_percentile_classification.SelectPercentileClassification: sklearn.feature_selection.SelectPercentile,
    }
    
    search_space = []
    
    # data-preprocessors
    pre_data_search_space = []
    for pp in data_preprocessors:
        params = get_config_space(pp)
        #print(pp, params)
        props = pp.get_properties()
        #print(pp)
        #print("handles_nominal_values" in props and props["handles_nominal_values"], "handles_numerical_features" in props and props["handles_numerical_features"])
        pre_data_search_space.append({"class": data_preprocessors[pp].__module__ + "." + data_preprocessors[pp].__name__, "params": json.loads(str(params))})
    search_space.append({"name": "data-pre-processor", "components": pre_data_search_space})
    
    # feature-preprocessors
    pre_feature_search_space = []
    for pp in feature_preprocessors:
        params = get_config_space(pp)
        #print(pp, params)
        pre_feature_search_space.append({"class": feature_preprocessors[pp].__module__ + "." + feature_preprocessors[pp].__name__, "params": json.loads(str(params))})
    search_space.append({"name": "feature-pre-processor", "components": pre_feature_search_space})
    
    # learners
    regressor_search_space = []
    for c in regressors:
        params = get_config_space(c)
        regressor_search_space.append({"class": regressors[c].__module__ + "." + regressors[c].__name__, "params": json.loads(str(params))})
    search_space.append({"name": "learner", "components": regressor_search_space})
    return search_space
                
search_space = get_autosklearn_searchspace_regression()
with open('singularity/searchspace-regression.json', 'w') as outfile:
    json.dump(search_space, outfile, indent = 4)

In [24]:
from sklearn.model_selection import BaseCrossValidator

class KCV(BaseCrossValidator):
    
    def __init__(self, k):
        self.k = k
    
    def get_n_splits(self, X=None, y=None, groups=None):
        return k

In [33]:
from sklearn.model_selection import LeaveOneOut
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    #tmp_folder='/tmp/autosklearn_resampling_example_tmp',
    #output_folder='/tmp/autosklearn_resampling_example_out',
    #disable_evaluator_output=False,
    resampling_strategy=LeaveOneOut
    #resampling_strategy_arguments={'k': 10},
)

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = \
    sklearn.model_selection.train_test_split(X, y, random_state=1)
automl.fit(X_train, y_train)


Perhaps you already have a cluster running?
Hosting the HTTP server on port 46823 instead
  http_address["port"], self.http_server.port


ValueError: Dummy prediction failed with run state StatusType.CRASHED and additional output: {'traceback': 'Traceback (most recent call last):\n  File "/home/felix/anaconda3/lib/python3.7/site-packages/autosklearn/evaluation/__init__.py", line 32, in fit_predict_try_except_decorator\n    return ta(queue=queue, **kwargs)\n  File "/home/felix/anaconda3/lib/python3.7/site-packages/autosklearn/evaluation/train_evaluator.py", line 1256, in eval_cv\n    budget_type=budget_type,\n  File "/home/felix/anaconda3/lib/python3.7/site-packages/autosklearn/evaluation/train_evaluator.py", line 185, in __init__\n    groups=self.resampling_strategy_args.get(\'groups\')\nTypeError: get_n_splits() missing 1 required positional argument: \'X\'\n', 'error': 'TypeError("get_n_splits() missing 1 required positional argument: \'X\'")', 'configuration_origin': 'DUMMY'}.

In [54]:
import numpy as np
resampling_strategy = sklearn.model_selection.LeaveOneOut
resampling_strategy_arguments = {}#'test_fold': np.where(X_train[:, 0] < np.mean(X_train[:, 0]))[0]}


X, y = sklearn.datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, random_state=1)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    disable_evaluator_output=False,
    #resampling_strategy=resampling_strategy,
    #resampling_strategy_arguments=resampling_strategy_arguments,
)
print("Running auto-sklearn")
automl.fit(X_train, y_train)

Running auto-sklearn


TypeError: ('not an exception', None)

In [53]:
resampling_strategy = sklearn.model_selection.PredefinedSplit
resampling_strategy_arguments = {'test_fold': np.where(X_train[:, 0] < np.mean(X_train[:, 0]))[0]}

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    tmp_folder='/tmp/autosklearn_resampling_example_tmp',
    output_folder='/tmp/autosklearn_resampling_example_out',
    disable_evaluator_output=False,
    resampling_strategy=resampling_strategy,
    resampling_strategy_arguments=resampling_strategy_arguments,
)
automl.fit(X_train, y_train, dataset_name='breast_cancer')

TypeError: ('not an exception', None)