From 193c9007c2176c3dcac430ff507243891d7acfb1 Mon Sep 17 00:00:00 2001 From: "luka.pecnik" Date: Tue, 17 Nov 2020 13:10:29 +0100 Subject: [PATCH 1/4] bug fixes, code cleanup, pipeline updates --- niaaml/feature_selection_algorithms/jDEFSTH.py | 18 +++++++----------- niaaml/pipeline.py | 18 +++++++++++++++++- niaaml/pipeline_optimizer.py | 8 +++++--- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/niaaml/feature_selection_algorithms/jDEFSTH.py b/niaaml/feature_selection_algorithms/jDEFSTH.py index fe576c2..cba9d39 100644 --- a/niaaml/feature_selection_algorithms/jDEFSTH.py +++ b/niaaml/feature_selection_algorithms/jDEFSTH.py @@ -4,7 +4,6 @@ from NiaPy.benchmarks import Benchmark from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression -from sklearn.datasets import load_wine __all__ = [ 'jDEFSTH' @@ -39,7 +38,7 @@ def _set_parameters(self, **kwargs): """ return - def final_output(self, sol): + def __final_output(self, sol): selected = [] threshold = sol[len(sol)-1] for i in range(len(sol)-1): @@ -50,14 +49,16 @@ def final_output(self, sol): return selected def select_features(self, x, y, **kwargs): - num_features = X.shape[1] + num_features = x.shape[1] algo = SelfAdaptiveDifferentialEvolution(NP=10, F=0.5, F_l=0.0, F_u=2.0, Tao1=0.9, CR=0.5, Tao2=0.45) - task = StoppingTask(D=num_features+1, nFES=1000, benchmark=FeatureSelectionThreshold(X, y)) + task = StoppingTask(D=num_features+1, nFES=1000, benchmark=FeatureSelectionThreshold(x, y)) best = algo.run(task) - return self.final_output(best[0]) - + return self.__final_output(best[0]) class FeatureSelectionThreshold(Benchmark): + r"""TODO + """ + def __init__(self, X, y): Benchmark.__init__(self, 0.0, 1.0) self.train_X, self.test_X, self.train_y, self.test_y = train_test_split( @@ -93,8 +94,3 @@ def evaluate(D, sol): return fitness return evaluate - -#test -#X, y = load_wine(return_X_y=True) -#a = jDEFSTH() -#a.select_features(X,y) diff --git a/niaaml/pipeline.py b/niaaml/pipeline.py index dc16ba3..622652c 100644 --- a/niaaml/pipeline.py +++ b/niaaml/pipeline.py @@ -1,3 +1,5 @@ +from sklearn.model_selection import train_test_split + __all__ = [ 'Pipeline' ] @@ -42,4 +44,18 @@ def _set_parameters(self, data, feature_selection_algorithm, preprocessing_algor self.__data = data self.__feature_selection_algorithm = feature_selection_algorithm self.__preprocessing_algorithm = preprocessing_algorithm - self.__classifier = classifier \ No newline at end of file + self.__classifier = classifier + + def optimize(self, population_size, number_of_evaluations): + r"""TODO + """ + X = self.__feature_selection_algorithm.select_features(self.__data.get_x(), self.__data.get_y()) + + if self.__preprocessing_algorithm is not None: + X = self.__preprocessing_algorithm.process(X) + + train_X, test_X, train_y, test_y = train_test_split( + X, self.__data.get_y(), test_size=0.2) + + self.__classifier.fit(train_X, train_y) + self.__classifier.predict(test_X) diff --git a/niaaml/pipeline_optimizer.py b/niaaml/pipeline_optimizer.py index 50006bd..b2f5c96 100644 --- a/niaaml/pipeline_optimizer.py +++ b/niaaml/pipeline_optimizer.py @@ -101,9 +101,11 @@ def __float_to_instance(self, value, collection, factory): name = collection[np.int(np.round(value * (len(collection) - 1)))] return factory.get_result(name) if name is not None else None - def optimize_pipeline(self, n_p, n_fes): - algo = ParticleSwarmOptimization(NP=n_p) # TODO define InitPopFunc - task = StoppingTask(D=3, nFES=n_fes, benchmark=PipelineOptimizerBenchmark()) + def run(self, pipeline_population_size, pipeline_classifier_population_size, number_of_pipeline_evaluations, number_of_classifier_evaluations): + r"""TODO + """ + algo = ParticleSwarmOptimization(NP=pipeline_population_size) # TODO define InitPopFunc + task = StoppingTask(D=3, nFES=number_of_pipeline_evaluations, benchmark=PipelineOptimizerBenchmark()) best = algo.run(task) return best From 8d9606a77b2a349e2bc20ad889a3a75afdc4e2ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Pe=C4=8Dnik?= Date: Tue, 17 Nov 2020 19:11:19 +0100 Subject: [PATCH 2/4] pipeline classes progress --- niaaml/classifiers/ada_boost.py | 2 +- niaaml/classifiers/bagging.py | 2 +- .../classifiers/extremely_randomized_trees.py | 2 +- niaaml/classifiers/linear_svc_classifier.py | 2 +- niaaml/classifiers/multi_layer_perceptron.py | 2 +- .../classifiers/random_forest_classifier.py | 2 +- .../feature_selection_algorithms/jDEFSTH.py | 4 +- niaaml/pipeline.py | 13 ++- niaaml/pipeline_optimizer.py | 97 ++++++++++--------- 9 files changed, 71 insertions(+), 55 deletions(-) diff --git a/niaaml/classifiers/ada_boost.py b/niaaml/classifiers/ada_boost.py index 6bc0dd5..2c55520 100644 --- a/niaaml/classifiers/ada_boost.py +++ b/niaaml/classifiers/ada_boost.py @@ -56,4 +56,4 @@ def predict(self, x, **kwargs): Returns: numpy.array[int]: n predicted classes. """ - self.__adaBoost.predict(x) + return self.__adaBoost.predict(x) diff --git a/niaaml/classifiers/bagging.py b/niaaml/classifiers/bagging.py index 042ac1e..1788090 100644 --- a/niaaml/classifiers/bagging.py +++ b/niaaml/classifiers/bagging.py @@ -57,4 +57,4 @@ def predict(self, x, **kwargs): Returns: numpy.array[int]: n predicted classes. """ - self.__baggingClassifier.predict(x) + return self.__baggingClassifier.predict(x) diff --git a/niaaml/classifiers/extremely_randomized_trees.py b/niaaml/classifiers/extremely_randomized_trees.py index f60e71a..b85ecf7 100644 --- a/niaaml/classifiers/extremely_randomized_trees.py +++ b/niaaml/classifiers/extremely_randomized_trees.py @@ -57,4 +57,4 @@ def predict(self, x, **kwargs): Returns: numpy.array[int]: n predicted classes. """ - self.__extraTreesClassifier.predict(x) + return self.__extraTreesClassifier.predict(x) diff --git a/niaaml/classifiers/linear_svc_classifier.py b/niaaml/classifiers/linear_svc_classifier.py index 0325c1c..fdf86fc 100644 --- a/niaaml/classifiers/linear_svc_classifier.py +++ b/niaaml/classifiers/linear_svc_classifier.py @@ -56,4 +56,4 @@ def predict(self, x, **kwargs): Returns: numpy.array[int]: n predicted classes. """ - self.__linearSVC.predict(x) + return self.__linearSVC.predict(x) diff --git a/niaaml/classifiers/multi_layer_perceptron.py b/niaaml/classifiers/multi_layer_perceptron.py index 2b30fa3..2fad8dd 100644 --- a/niaaml/classifiers/multi_layer_perceptron.py +++ b/niaaml/classifiers/multi_layer_perceptron.py @@ -57,4 +57,4 @@ def predict(self, x, **kwargs): Returns: numpy.array[int]: n predicted classes. """ - self.__multiLayerPerceptron.predict(x) + return self.__multiLayerPerceptron.predict(x) diff --git a/niaaml/classifiers/random_forest_classifier.py b/niaaml/classifiers/random_forest_classifier.py index 999be2a..1a1556c 100644 --- a/niaaml/classifiers/random_forest_classifier.py +++ b/niaaml/classifiers/random_forest_classifier.py @@ -55,4 +55,4 @@ def predict(self, x, **kwargs): Returns: numpy.array[int]: n predicted classes. """ - self.__randomForestClassifier.predict(x) + return self.__randomForestClassifier.predict(x) diff --git a/niaaml/feature_selection_algorithms/jDEFSTH.py b/niaaml/feature_selection_algorithms/jDEFSTH.py index cba9d39..66eb15e 100644 --- a/niaaml/feature_selection_algorithms/jDEFSTH.py +++ b/niaaml/feature_selection_algorithms/jDEFSTH.py @@ -51,11 +51,11 @@ def __final_output(self, sol): def select_features(self, x, y, **kwargs): num_features = x.shape[1] algo = SelfAdaptiveDifferentialEvolution(NP=10, F=0.5, F_l=0.0, F_u=2.0, Tao1=0.9, CR=0.5, Tao2=0.45) - task = StoppingTask(D=num_features+1, nFES=1000, benchmark=FeatureSelectionThreshold(x, y)) + task = StoppingTask(D=num_features+1, nFES=1000, benchmark=_FeatureSelectionThreshold(x, y)) best = algo.run(task) return self.__final_output(best[0]) -class FeatureSelectionThreshold(Benchmark): +class _FeatureSelectionThreshold(Benchmark): r"""TODO """ diff --git a/niaaml/pipeline.py b/niaaml/pipeline.py index 622652c..f8d549d 100644 --- a/niaaml/pipeline.py +++ b/niaaml/pipeline.py @@ -1,4 +1,5 @@ from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score __all__ = [ 'Pipeline' @@ -49,13 +50,19 @@ def _set_parameters(self, data, feature_selection_algorithm, preprocessing_algor def optimize(self, population_size, number_of_evaluations): r"""TODO """ - X = self.__feature_selection_algorithm.select_features(self.__data.get_x(), self.__data.get_y()) + # TODO implement optimization process + X = self.__data.get_x() + + if self.__feature_selection_algorithm is not None: + X = self.__feature_selection_algorithm.select_features(self.__data.get_x(), self.__data.get_y()) if self.__preprocessing_algorithm is not None: - X = self.__preprocessing_algorithm.process(X) + X = self.__preprocessing_algorithm.process(X) train_X, test_X, train_y, test_y = train_test_split( X, self.__data.get_y(), test_size=0.2) self.__classifier.fit(train_X, train_y) - self.__classifier.predict(test_X) + predictions = self.__classifier.predict(test_X) + + return accuracy_score(test_y, predictions) diff --git a/niaaml/pipeline_optimizer.py b/niaaml/pipeline_optimizer.py index b2f5c96..374b8d0 100644 --- a/niaaml/pipeline_optimizer.py +++ b/niaaml/pipeline_optimizer.py @@ -3,15 +3,21 @@ from niaaml.classifiers import ClassifierFactory from niaaml.feature_selection_algorithms import FeatureSelectionAlgorithmFactory from niaaml.preprocessing_algorithms import PreprocessingAlgorithmFactory -from NiaPy.task import StoppingTask +from NiaPy.task import StoppingTask, OptimizationType from NiaPy.benchmarks import Benchmark from NiaPy.algorithms.basic import ParticleSwarmOptimization __all__ = [ - 'PipelineOptimizer', - 'PipelineOptimizerBenchmark' + 'PipelineOptimizer' ] +def _initialize_population(task, NP, rnd=np.random, **kwargs): + r"""TODO + """ + pop = np.random.uniform(size=(NP, 3)) + fpop = np.apply_along_axis(task.eval, 1, pop) + return pop, fpop + class PipelineOptimizer(): r"""Optimization task that finds the best classification pipeline according to the given input. @@ -31,7 +37,6 @@ class PipelineOptimizer(): __classifiers (Iterable[Classifier]): Array of possible classifiers. __pipelines_numeric (numpy.ndarray[float]): Numeric representation of pipelines. __pipelines (Iterable[Pipeline]): Actual pipelines. - __pop_size (int): Number of individuals in the pipeline optimizer's population. __classifier_factory (ClassifierFactory): Factory for classifier instances. __preprocessing_algorithm_factory (ClassifierFactory): Factory for preprocessing algorithm instances. @@ -42,21 +47,15 @@ class PipelineOptimizer(): __preprocessing_algorithms = None __classifiers = None - __pop_size = None __pipelines_numeric = None __pipelines = None - __classifier_factory = ClassifierFactory() - __preprocessing_algorithm_factory = PreprocessingAlgorithmFactory() - __feature_selection_algorithm_factory = FeatureSelectionAlgorithmFactory() - def __init__(self, **kwargs): r"""Initialize task. """ self._set_parameters(**kwargs) - self.__initialize_population(self.__pop_size) - def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, pop_size, **kwargs): + def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, **kwargs): r"""Set the parameters/arguments of the task. Arguments: @@ -64,57 +63,67 @@ def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algo feature_selection_algorithms (Iterable[FeatureSelectionAlgorithm]): Array of possible feature selection algorithms. preprocessing_algorithms (Iterable[PreprocessingAlgorithm]): Array of possible preprocessing algorithms. classifiers (Iterable[Classificator]): Array of possible classifiers. - pop_size (int): Number of individuals in the pipeline optimizer's population. """ self.__data = data self.__preprocessing_algorithms = preprocessing_algorithms - try: - self.__preprocessing_algorithms.index(None) - except: - self.__preprocessing_algorithms.insert(0, None) + if self.__preprocessing_algorithms is not None: + try: + self.__preprocessing_algorithms.index(None) + except: + self.__preprocessing_algorithms.insert(0, None) self.__classifiers = classifiers self.__feature_selection_algorithms = feature_selection_algorithms - self.__pop_size = pop_size - def __initialize_population(self, pop_size): - r"""Initialize population of pipelines to find the best setup. + def run(self, pipeline_population_size, pipeline_classifier_population_size, number_of_pipeline_evaluations, number_of_classifier_evaluations): + r"""TODO + """ + algo = ParticleSwarmOptimization(NP=pipeline_population_size, InitPopFunc=_initialize_population) + task = StoppingTask(D=3, nFES=number_of_pipeline_evaluations, benchmark=_PipelineOptimizerBenchmark(self.__data, self.__feature_selection_algorithms, self.__preprocessing_algorithms, self.__classifiers), optType=OptimizationType.MAXIMIZATION) + best = algo.run(task) + return best + +class _PipelineOptimizerBenchmark(Benchmark): + r"""TODO + """ + __data = None + __feature_selection_algorithms = None + __preprocessing_algorithms = None + __classifiers = None + + __classifier_factory = ClassifierFactory() + __preprocessing_algorithm_factory = PreprocessingAlgorithmFactory() + __feature_selection_algorithm_factory = FeatureSelectionAlgorithmFactory() - Arguments: - pop_size (int): Number of individuals. + def __init__(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers): + r"""TODO """ + self.__data = data + self.__feature_selection_algorithms = feature_selection_algorithms + self.__preprocessing_algorithms = preprocessing_algorithms + self.__classifiers = classifiers + Benchmark.__init__(self, 0.0, 1.0) - self.__pipelines_numeric = np.random.uniform(size=(pop_size, 3)) - self.__pipelines = [ - Pipeline( - data=self.__data, - feature_selection_algorithm=self.__float_to_instance(i[0], self.__feature_selection_algorithms, self.__feature_selection_algorithm_factory) if self.__feature_selection_algorithms is not None and len(self.__feature_selection_algorithms) > 0 else None, - preprocessing_algorithm=self.__float_to_instance(i[1], self.__preprocessing_algorithms, self.__preprocessing_algorithm_factory) if self.__preprocessing_algorithms is not None and len(self.__preprocessing_algorithms) > 0 else None, - classifier=self.__float_to_instance(i[2], self.__classifiers, self.__classifier_factory) - ) for i in self.__pipelines_numeric - ] - def __float_to_instance(self, value, collection, factory): r"""TODO """ name = collection[np.int(np.round(value * (len(collection) - 1)))] return factory.get_result(name) if name is not None else None - - def run(self, pipeline_population_size, pipeline_classifier_population_size, number_of_pipeline_evaluations, number_of_classifier_evaluations): - r"""TODO - """ - algo = ParticleSwarmOptimization(NP=pipeline_population_size) # TODO define InitPopFunc - task = StoppingTask(D=3, nFES=number_of_pipeline_evaluations, benchmark=PipelineOptimizerBenchmark()) - best = algo.run(task) - return best - -class PipelineOptimizerBenchmark(Benchmark): - def __init__(self): - Benchmark.__init__(self, 0.0, 1.0) def function(self): + r"""TODO + """ # TODO def evaluate(D, sol): - return 0.0 + r"""TODO + """ + pipeline = Pipeline( + data=self.__data, + feature_selection_algorithm=self.__float_to_instance(sol[0], self.__feature_selection_algorithms, self.__feature_selection_algorithm_factory) if self.__feature_selection_algorithms is not None and len(self.__feature_selection_algorithms) > 0 else None, + preprocessing_algorithm=self.__float_to_instance(sol[1], self.__preprocessing_algorithms, self.__preprocessing_algorithm_factory) if self.__preprocessing_algorithms is not None and len(self.__preprocessing_algorithms) > 0 else None, + classifier=self.__float_to_instance(sol[2], self.__classifiers, self.__classifier_factory) + ) + return pipeline.optimize(0, 0) + return evaluate \ No newline at end of file From 117d1b5d7e5b4245c88723972d5442d7591135dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Pe=C4=8Dnik?= Date: Wed, 18 Nov 2020 10:07:36 +0100 Subject: [PATCH 3/4] factories fix, get opt. algorithm using name --- niaaml/classifiers/utility.py | 13 ++++++- .../feature_selection_algorithms/utility.py | 13 ++++++- niaaml/pipeline_optimizer.py | 34 ++++++++++++++----- niaaml/preprocessing_algorithms/utility.py | 13 ++++++- niaaml/utilities.py | 6 ++-- 5 files changed, 65 insertions(+), 14 deletions(-) diff --git a/niaaml/classifiers/utility.py b/niaaml/classifiers/utility.py index c46669d..db6cde0 100644 --- a/niaaml/classifiers/utility.py +++ b/niaaml/classifiers/utility.py @@ -25,4 +25,15 @@ def _set_parameters(self, **kwargs): 'LinearSVCClassifier': classifiers.LinearSVCClassifier, 'MultiLayerPerceptron': classifiers.MultiLayerPerceptron, 'RandomForestClassifier': classifiers.RandomForestClassifier - } \ No newline at end of file + } + + def get_result(self, name): + r"""Get the resulting classifier. + + Arguments: + name (str): String that represents the classifier. + + Returns: + Classifier: Classifier according to the given name. + """ + return Factory.get_result(self, name).getRandomInstance() diff --git a/niaaml/feature_selection_algorithms/utility.py b/niaaml/feature_selection_algorithms/utility.py index 8126e1b..f984874 100644 --- a/niaaml/feature_selection_algorithms/utility.py +++ b/niaaml/feature_selection_algorithms/utility.py @@ -23,4 +23,15 @@ def _set_parameters(self, **kwargs): 'SelectKBestChi2': feature_selection_algorithms.SelectKBestChi2, 'SelectPercentileChi2': feature_selection_algorithms.SelectPercentileChi2, 'VarianceThresholdFeatureSelection': feature_selection_algorithms.VarianceThresholdFeatureSelection - } \ No newline at end of file + } + + def get_result(self, name): + r"""Get the resulting feature selection algorithm. + + Arguments: + name (str): String that represents the feature selection algorithm. + + Returns: + FeatureSelectionAlgorithm: FeatureSelectionAlgorithm according to the given name. + """ + return Factory.get_result(self, name)() \ No newline at end of file diff --git a/niaaml/pipeline_optimizer.py b/niaaml/pipeline_optimizer.py index 374b8d0..004de9f 100644 --- a/niaaml/pipeline_optimizer.py +++ b/niaaml/pipeline_optimizer.py @@ -5,7 +5,7 @@ from niaaml.preprocessing_algorithms import PreprocessingAlgorithmFactory from NiaPy.task import StoppingTask, OptimizationType from NiaPy.benchmarks import Benchmark -from NiaPy.algorithms.basic import ParticleSwarmOptimization +from NiaPy.algorithms.utility import AlgorithmUtility __all__ = [ 'PipelineOptimizer' @@ -18,7 +18,7 @@ def _initialize_population(task, NP, rnd=np.random, **kwargs): fpop = np.apply_along_axis(task.eval, 1, pop) return pop, fpop -class PipelineOptimizer(): +class PipelineOptimizer: r"""Optimization task that finds the best classification pipeline according to the given input. Date: @@ -41,6 +41,9 @@ class PipelineOptimizer(): __classifier_factory (ClassifierFactory): Factory for classifier instances. __preprocessing_algorithm_factory (ClassifierFactory): Factory for preprocessing algorithm instances. __feature_selection_algorithm_factory (ClassifierFactory): Factory for feature selection algorithm instances. + + __optimization_algorithm (str): Name of the optimization algorithm to use. + __niapy_algorithm_utility (AlgorithmUtility): Utility class used to get an optimization algorithm. """ __data = None __feature_selection_algorithms = None @@ -50,12 +53,15 @@ class PipelineOptimizer(): __pipelines_numeric = None __pipelines = None + __optimization_algorithm = None + __niapy_algorithm_utility = AlgorithmUtility() + def __init__(self, **kwargs): r"""Initialize task. """ self._set_parameters(**kwargs) - def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, **kwargs): + def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, optimization_algorithm, **kwargs): r"""Set the parameters/arguments of the task. Arguments: @@ -63,8 +69,10 @@ def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algo feature_selection_algorithms (Iterable[FeatureSelectionAlgorithm]): Array of possible feature selection algorithms. preprocessing_algorithms (Iterable[PreprocessingAlgorithm]): Array of possible preprocessing algorithms. classifiers (Iterable[Classificator]): Array of possible classifiers. + optimization_algorithm (str): Name of the optimization algorithm to use. """ self.__data = data + self.__optimization_algorithm = optimization_algorithm self.__preprocessing_algorithms = preprocessing_algorithms if self.__preprocessing_algorithms is not None: @@ -76,11 +84,19 @@ def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algo self.__classifiers = classifiers self.__feature_selection_algorithms = feature_selection_algorithms - def run(self, pipeline_population_size, pipeline_classifier_population_size, number_of_pipeline_evaluations, number_of_classifier_evaluations): + def run(self, pipeline_population_size, classifier_population_size, number_of_pipeline_evaluations, number_of_classifier_evaluations): r"""TODO """ - algo = ParticleSwarmOptimization(NP=pipeline_population_size, InitPopFunc=_initialize_population) - task = StoppingTask(D=3, nFES=number_of_pipeline_evaluations, benchmark=_PipelineOptimizerBenchmark(self.__data, self.__feature_selection_algorithms, self.__preprocessing_algorithms, self.__classifiers), optType=OptimizationType.MAXIMIZATION) + algo = self.__niapy_algorithm_utility.get_algorithm(self.__optimization_algorithm) + algo.NP = pipeline_population_size + algo.InitPopFunc = _initialize_population + + task = StoppingTask( + D=3, + nFES=number_of_pipeline_evaluations, + benchmark=_PipelineOptimizerBenchmark(self.__data, self.__feature_selection_algorithms, self.__preprocessing_algorithms, self.__classifiers, classifier_population_size, number_of_classifier_evaluations), + optType=OptimizationType.MAXIMIZATION + ) best = algo.run(task) return best @@ -96,13 +112,15 @@ class _PipelineOptimizerBenchmark(Benchmark): __preprocessing_algorithm_factory = PreprocessingAlgorithmFactory() __feature_selection_algorithm_factory = FeatureSelectionAlgorithmFactory() - def __init__(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers): + def __init__(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, classifier_population_size, number_of_classifier_evaluations): r"""TODO """ self.__data = data self.__feature_selection_algorithms = feature_selection_algorithms self.__preprocessing_algorithms = preprocessing_algorithms self.__classifiers = classifiers + self.__classifier_population_size = classifier_population_size + self.__number_of_classifier_evaluations = number_of_classifier_evaluations Benchmark.__init__(self, 0.0, 1.0) def __float_to_instance(self, value, collection, factory): @@ -124,6 +142,6 @@ def evaluate(D, sol): preprocessing_algorithm=self.__float_to_instance(sol[1], self.__preprocessing_algorithms, self.__preprocessing_algorithm_factory) if self.__preprocessing_algorithms is not None and len(self.__preprocessing_algorithms) > 0 else None, classifier=self.__float_to_instance(sol[2], self.__classifiers, self.__classifier_factory) ) - return pipeline.optimize(0, 0) + return pipeline.optimize(self.__classifier_population_size, self.__number_of_classifier_evaluations) return evaluate \ No newline at end of file diff --git a/niaaml/preprocessing_algorithms/utility.py b/niaaml/preprocessing_algorithms/utility.py index 85467b9..7840f1f 100644 --- a/niaaml/preprocessing_algorithms/utility.py +++ b/niaaml/preprocessing_algorithms/utility.py @@ -21,4 +21,15 @@ def _set_parameters(self, **kwargs): self._entities = { 'Normalizer': preprocessing_algorithms.Normalizer, 'StandardScaler': preprocessing_algorithms.StandardScaler - } \ No newline at end of file + } + + def get_result(self, name): + r"""Get the resulting preprocessing algorithm. + + Arguments: + name (str): String that represents the preprocessing algorithm. + + Returns: + PreprocessingAlgorithm: PreprocessingAlgorithm according to the given name. + """ + return Factory.get_result(self, name)() \ No newline at end of file diff --git a/niaaml/utilities.py b/niaaml/utilities.py index 796f222..49f07cb 100644 --- a/niaaml/utilities.py +++ b/niaaml/utilities.py @@ -77,7 +77,7 @@ class Factory: r"""Base class with string mappings to entities. Attributes: - __entities (Dict[str, any]): Dictionary to map from strings to an instance of anything. + _entities (Dict[str, any]): Dictionary to map from strings to an instance of anything. """ _entities = None @@ -94,13 +94,13 @@ def get_result(self, name): r"""Get the resulting entity. Arguments: - name (Dictionary[str, any]): String that represents the entity. + name (str): String that represents the entity. Returns: any: Entity according to the given name. """ if name in self._entities: - return self._entities[name]() + return self._entities[name] else: raise TypeError('Passed entity is not defined! --> %s' % name) \ No newline at end of file From 5846890b3814180cb791d28db16b08fc212c97d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Pe=C4=8Dnik?= Date: Wed, 18 Nov 2020 12:07:27 +0100 Subject: [PATCH 4/4] updated pipeline --- niaaml/classifiers/ada_boost.py | 2 +- niaaml/classifiers/bagging.py | 6 ++-- .../classifiers/extremely_randomized_trees.py | 2 +- niaaml/classifiers/linear_svc_classifier.py | 2 +- niaaml/classifiers/multi_layer_perceptron.py | 7 +++-- niaaml/pipeline.py | 29 +++++++++++-------- niaaml/utilities.py | 2 +- 7 files changed, 28 insertions(+), 22 deletions(-) diff --git a/niaaml/classifiers/ada_boost.py b/niaaml/classifiers/ada_boost.py index 2c55520..5697128 100644 --- a/niaaml/classifiers/ada_boost.py +++ b/niaaml/classifiers/ada_boost.py @@ -24,7 +24,7 @@ class AdaBoost(Classifier): __adaBoost = AdaBoostClassifier() _params = dict( n_estimators = ParameterDefinition(MinMax(min=10, max=150), np.uint), - algorithm = ParameterDefinition(['SAMME', 'SAMME.R'], None) + algorithm = ParameterDefinition(['SAMME', 'SAMME.R']) ) def _set_parameters(self, **kwargs): diff --git a/niaaml/classifiers/bagging.py b/niaaml/classifiers/bagging.py index 1788090..2381ac4 100644 --- a/niaaml/classifiers/bagging.py +++ b/niaaml/classifiers/bagging.py @@ -23,9 +23,9 @@ class Bagging(Classifier): """ __baggingClassifier = BaggingClassifier() _params = dict( - n_estimators = ParameterDefinition(MinMax(min=5, max=30), np.uint), - bootstrap = ParameterDefinition([True, False], None), - bootstrap_features = ParameterDefinition([True, False], None) + n_estimators = ParameterDefinition(MinMax(min=10, max=150), np.uint), + bootstrap = ParameterDefinition([True, False]), + bootstrap_features = ParameterDefinition([True, False]) ) def _set_parameters(self, **kwargs): diff --git a/niaaml/classifiers/extremely_randomized_trees.py b/niaaml/classifiers/extremely_randomized_trees.py index b85ecf7..a65b70d 100644 --- a/niaaml/classifiers/extremely_randomized_trees.py +++ b/niaaml/classifiers/extremely_randomized_trees.py @@ -24,7 +24,7 @@ class ExtremelyRandomizedTrees(Classifier): __extraTreesClassifier = ExtraTreesClassifier() _params = dict( n_estimators = ParameterDefinition(MinMax(min=10, max=200), np.uint), - criterion = ParameterDefinition(['gini', 'entropy'], None), + criterion = ParameterDefinition(['gini', 'entropy']), min_samples_split = ParameterDefinition(MinMax(min=2, max=10), np.uint) ) diff --git a/niaaml/classifiers/linear_svc_classifier.py b/niaaml/classifiers/linear_svc_classifier.py index fdf86fc..13658ee 100644 --- a/niaaml/classifiers/linear_svc_classifier.py +++ b/niaaml/classifiers/linear_svc_classifier.py @@ -23,7 +23,7 @@ class LinearSVCClassifier(Classifier): """ __linearSVC = LinearSVC() _params = dict( - penalty = ParameterDefinition(['l1', 'l2'], None), + penalty = ParameterDefinition(['l1', 'l2']), max_iter = ParameterDefinition(MinMax(min=300, max=2000), np.uint) ) diff --git a/niaaml/classifiers/multi_layer_perceptron.py b/niaaml/classifiers/multi_layer_perceptron.py index 2fad8dd..cd0db27 100644 --- a/niaaml/classifiers/multi_layer_perceptron.py +++ b/niaaml/classifiers/multi_layer_perceptron.py @@ -23,9 +23,10 @@ class MultiLayerPerceptron(Classifier): """ __multiLayerPerceptron = MLPClassifier() _params = dict( - activation = ParameterDefinition(['identity', 'logistic', 'tanh', 'relu'], None), - solver = ParameterDefinition(['lbfgs', 'sgd', 'adam'], None), - max_iter = ParameterDefinition(MinMax(min=200, max=500), np.uint) + activation = ParameterDefinition(['identity', 'logistic', 'tanh', 'relu']), + solver = ParameterDefinition(['lbfgs', 'sgd', 'adam']), + max_iter = ParameterDefinition(MinMax(min=200, max=500), np.uint), + learning_rate = ParameterDefinition(['constant', 'invscaling', 'adaptive']) ) def _set_parameters(self, **kwargs): diff --git a/niaaml/pipeline.py b/niaaml/pipeline.py index f8d549d..0403e81 100644 --- a/niaaml/pipeline.py +++ b/niaaml/pipeline.py @@ -51,18 +51,23 @@ def optimize(self, population_size, number_of_evaluations): r"""TODO """ # TODO implement optimization process - X = self.__data.get_x() + try: + X = self.__data.get_x() - if self.__feature_selection_algorithm is not None: - X = self.__feature_selection_algorithm.select_features(self.__data.get_x(), self.__data.get_y()) - - if self.__preprocessing_algorithm is not None: - X = self.__preprocessing_algorithm.process(X) - - train_X, test_X, train_y, test_y = train_test_split( - X, self.__data.get_y(), test_size=0.2) + if self.__feature_selection_algorithm is not None: + X = self.__feature_selection_algorithm.select_features(self.__data.get_x(), self.__data.get_y()) + + if self.__preprocessing_algorithm is not None: + X = self.__preprocessing_algorithm.process(X) + + train_X, test_X, train_y, test_y = train_test_split( + X, self.__data.get_y(), test_size=0.2) - self.__classifier.fit(train_X, train_y) - predictions = self.__classifier.predict(test_X) + self.__classifier.fit(train_X, train_y) + predictions = self.__classifier.predict(test_X) - return accuracy_score(test_y, predictions) + return accuracy_score(test_y, predictions) + except: + # infeasible solution as it causes some kind of error + # return negative infinity as we are looking for maximum accuracy in the optimization process + return -float('inf') diff --git a/niaaml/utilities.py b/niaaml/utilities.py index 49f07cb..5feec0b 100644 --- a/niaaml/utilities.py +++ b/niaaml/utilities.py @@ -63,7 +63,7 @@ class ParameterDefinition: value = None paramType = None - def __init__(self, value, paramType): + def __init__(self, value, paramType = None): r"""Initialize instance. Arguments: