In [2]:
from sklearn.naive_bayes import *
from sklearn.tree import *
from sklearn.svm import *
from sklearn.cluster import *
from sklearn.kernel_approximation import *
import sklearn.discriminant_analysis
import sklearn.neural_network
from sklearn.ensemble import *
import autosklearn
import autosklearn.classification
import sklearn.datasets
import ConfigSpace
import sklearn.linear_model
from ConfigSpace.read_and_write import json as config_json
import json

In [3]:
import autosklearn.pipeline.components.classification as algos_class
import autosklearn.pipeline.components.data_preprocessing as algos_data_preprocessing
import autosklearn.pipeline.components.feature_preprocessing as algos_feature_preprocessing

In [4]:
def get_autosklearn_searchspace():
    
    def get_params(cls):
        params = []
        conf_space = cls.get_hyperparameter_search_space()
        for hp in conf_space.get_hyperparameters():
            if type(hp) == ConfigSpace.hyperparameters.CategoricalHyperparameter:
                params.append({
                    "name": hp.name,
                    "type": "cat",
                    "values": list(hp.choices)
                })
            elif issubclass(hp.__class__, ConfigSpace.hyperparameters.NumericalHyperparameter):
                params.append({
                    "name": hp.name,
                    "type": "float-exp" if hp.log else "float",
                    "min": hp.lower,
                    "max": hp.upper
                })
            elif type(hp) == ConfigSpace.hyperparameters.UnParametrizedHyperparameter:
                params.append({
                    "name": hp.name,
                    "type": "cat",
                    "values": [hp.value]
                })
            elif type(hp) == ConfigSpace.hyperparameters.Constant:
                params.append({
                    "name": hp.name,
                    "type": "cat",
                    "values": [hp.value]
                })
            else:
                raise Exception("Unsupported parameter type: " + str(type(hp)))
        return params
    
    def get_config_space(cls):
        return config_json.write(cls.get_hyperparameter_search_space())
                
    classifiers = {
        algos_class.bernoulli_nb.BernoulliNB: sklearn.naive_bayes.BernoulliNB,
        algos_class.decision_tree.DecisionTree: sklearn.tree.DecisionTreeClassifier,
        algos_class.extra_trees.ExtraTreesClassifier: sklearn.ensemble.ExtraTreesClassifier,
        algos_class.gaussian_nb.GaussianNB: sklearn.naive_bayes.GaussianNB,
        algos_class.gradient_boosting.GradientBoostingClassifier: sklearn.ensemble.GradientBoostingClassifier,
        algos_class.k_nearest_neighbors.KNearestNeighborsClassifier: sklearn.neighbors.KNeighborsClassifier,
        algos_class.lda.LDA: sklearn.discriminant_analysis.LinearDiscriminantAnalysis,
        #algos_class.liblinear_svc.LibLinear_SVC: sklearn.svm.LinearSVC,
        algos_class.libsvm_svc.LibSVM_SVC: sklearn.svm.SVC,
        algos_class.mlp.MLPClassifier: sklearn.neural_network.MLPClassifier,
        algos_class.multinomial_nb.MultinomialNB: sklearn.naive_bayes.MultinomialNB,
        algos_class.passive_aggressive.PassiveAggressive: sklearn.linear_model.PassiveAggressiveClassifier,
        algos_class.qda.QDA: sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis,
        algos_class.random_forest.RandomForest: sklearn.ensemble.RandomForestClassifier,
        algos_class.sgd.SGD: sklearn.linear_model.SGDClassifier
    }
    data_preprocessors = {
        #algos_data_preprocessing.minority_coalescense.minority_coalescer.MinorityCoalescer: algos_data_preprocessing.minority_coalescense.minority_coalescer.MinorityCoalescer,
        algos_data_preprocessing.rescaling.minmax.MinMaxScalerComponent: sklearn.preprocessing.MinMaxScaler,
        algos_data_preprocessing.rescaling.normalize.NormalizerComponent: sklearn.preprocessing.Normalizer,
        algos_data_preprocessing.rescaling.power_transformer.PowerTransformerComponent: sklearn.preprocessing.PowerTransformer,
        algos_data_preprocessing.rescaling.quantile_transformer.QuantileTransformerComponent: sklearn.preprocessing.QuantileTransformer,
        algos_data_preprocessing.rescaling.robust_scaler.RobustScalerComponent: sklearn.preprocessing.RobustScaler,
        algos_data_preprocessing.rescaling.standardize.StandardScalerComponent: sklearn.preprocessing.StandardScaler,
        algos_data_preprocessing.variance_threshold.variance_threshold.VarianceThreshold: sklearn.feature_selection.VarianceThreshold
    }
    feature_preprocessors = {
        algos_feature_preprocessing.fast_ica.FastICA: sklearn.decomposition.FastICA,
        algos_feature_preprocessing.feature_agglomeration.FeatureAgglomeration: sklearn.cluster.FeatureAgglomeration,
        algos_feature_preprocessing.kernel_pca.KernelPCA: sklearn.decomposition.KernelPCA,
        algos_feature_preprocessing.kitchen_sinks.RandomKitchenSinks: sklearn.kernel_approximation.RBFSampler,
        algos_feature_preprocessing.nystroem_sampler.Nystroem: sklearn.kernel_approximation.Nystroem,
        algos_feature_preprocessing.pca.PCA: sklearn.decomposition.PCA,
        algos_feature_preprocessing.polynomial.PolynomialFeatures: sklearn.preprocessing.PolynomialFeatures,
        algos_feature_preprocessing.select_percentile_classification.SelectPercentileClassification: sklearn.feature_selection.SelectPercentile,
        algos_feature_preprocessing.select_rates_classification.SelectClassificationRates: sklearn.feature_selection.GenericUnivariateSelect
    }
    
    search_space = []
    
    # data-preprocessors
    pre_data_search_space = []
    for pp in data_preprocessors:
        params = get_config_space(pp)
        props = pp.get_properties()
        pre_data_search_space.append({"class": data_preprocessors[pp].__module__ + "." + data_preprocessors[pp].__name__, "params": params})
    search_space.append({"name": "data-pre-processor", "components": pre_data_search_space})
    
    # feature-preprocessors
    pre_feature_search_space = []
    for pp in feature_preprocessors:
        params = get_config_space(pp)
        #print(pp, params)
        pre_feature_search_space.append({"class": feature_preprocessors[pp].__module__ + "." + feature_preprocessors[pp].__name__, "params": params})
    search_space.append({"name": "feature-pre-processor", "components": pre_feature_search_space})
    
    # classifiers
    classifier_search_space = []
    for c in classifiers:
        if c == algos_class.libsvm_svc.LibSVM_SVC:
            config_space = c.get_hyperparameter_search_space()
            #print(config_json.write(config_space))
            
            kernel_pm = config_space.get_hyperparameter("kernel")
            for kernel in ["linear"] + list(kernel_pm.choices):
                config_space_for_kernel = ConfigSpace.ConfigurationSpace()
                for hp in config_space.get_hyperparameters():
                    if hp.name == "kernel":
                        continue
                    if hp.name == "degree" and kernel != "poly":
                        continue
                    if hp.name == "coef0" and kernel not in ["poly", "sigmoid"]:
                        continue
                    config_space_for_kernel.add_hyperparameter(hp)
                config_space_for_kernel.add_hyperparameter(ConfigSpace.hyperparameters.Constant("kernel", kernel))
                params = config_json.write(config_space_for_kernel)
                classifier_search_space.append({"class": classifiers[c].__module__ + "." + classifiers[c].__name__, "params": params})
        else:
            params = get_config_space(c)
            classifier_search_space.append({"class": classifiers[c].__module__ + "." + classifiers[c].__name__, "params": params})
    search_space.append({"name": "classifier", "components": classifier_search_space})
    return search_space
                
search_space = get_autosklearn_searchspace()
with open('singularity/searchspace.json', 'w') as outfile:
    json.dump(search_space, outfile)