From c0a52c01a2d86c922dd24b271a631be67e866def Mon Sep 17 00:00:00 2001 From: "luka.pecnik" Date: Mon, 16 Nov 2020 13:15:51 +0100 Subject: [PATCH 1/3] PipelineOptimizer implementation progress --- niaaml/__init__.py | 5 +- niaaml/classifiers/__init__.py | 4 +- niaaml/classifiers/utility.py | 4 +- .../feature_selection_algorithms/__init__.py | 4 +- .../feature_selection_algorithms/utility.py | 4 +- niaaml/pipeline.py | 45 +++++++++ niaaml/pipeline_optimizer.py | 96 +++++++++++++++++++ niaaml/preprocessing_algorithms/__init__.py | 4 +- niaaml/preprocessing_algorithms/utility.py | 4 +- niaaml/task.py | 58 ----------- 10 files changed, 157 insertions(+), 71 deletions(-) create mode 100644 niaaml/pipeline_optimizer.py delete mode 100644 niaaml/task.py diff --git a/niaaml/__init__.py b/niaaml/__init__.py index 32135d6..2aa8501 100644 --- a/niaaml/__init__.py +++ b/niaaml/__init__.py @@ -7,6 +7,8 @@ from niaaml.utilities import ParameterDefinition from niaaml.utilities import Factory from niaaml.task import Task +from niaaml.pipeline_optimizer import PipelineOptimizer +from niaaml.pipeline import Pipeline __all__ = [ 'data', @@ -17,7 +19,8 @@ 'MinMax', 'ParameterDefinition', 'Factory', - 'Task' + 'PipelineOptimizer', + 'Pipeline' ] __project__ = 'niaaml' __version__ = '0.1.0' diff --git a/niaaml/classifiers/__init__.py b/niaaml/classifiers/__init__.py index 7a7e79d..472f18d 100644 --- a/niaaml/classifiers/__init__.py +++ b/niaaml/classifiers/__init__.py @@ -5,7 +5,7 @@ from niaaml.classifiers.ada_boost import AdaBoost from niaaml.classifiers.extremely_randomized_trees import ExtremelyRandomizedTrees from niaaml.classifiers.bagging import Bagging -from niaaml.classifiers.utility import ClassifierUtility +from niaaml.classifiers.utility import ClassifierFactory __all__ = [ 'Classifier', @@ -15,5 +15,5 @@ 'AdaBoost', 'Bagging', 'ExtremelyRandomizedTrees', - 'ClassifierUtility' + 'ClassifierFactory' ] \ No newline at end of file diff --git a/niaaml/classifiers/utility.py b/niaaml/classifiers/utility.py index 98069f0..c46669d 100644 --- a/niaaml/classifiers/utility.py +++ b/niaaml/classifiers/utility.py @@ -2,10 +2,10 @@ from niaaml import classifiers __all__ = [ - 'ClassifierUtility' + 'ClassifierFactory' ] -class ClassifierUtility(Factory): +class ClassifierFactory(Factory): r"""Class with string mappings to classifiers. Attributes: diff --git a/niaaml/feature_selection_algorithms/__init__.py b/niaaml/feature_selection_algorithms/__init__.py index e795476..9d5b525 100644 --- a/niaaml/feature_selection_algorithms/__init__.py +++ b/niaaml/feature_selection_algorithms/__init__.py @@ -3,7 +3,7 @@ from niaaml.feature_selection_algorithms.jDEFSTH import jDEFSTH from niaaml.feature_selection_algorithms.select_percentile_chi2 import SelectPercentileChi2 from niaaml.feature_selection_algorithms.select_k_best_chi2 import SelectKBestChi2 -from niaaml.feature_selection_algorithms.utility import FeatureSelectionAlgorithmUtility +from niaaml.feature_selection_algorithms.utility import FeatureSelectionAlgorithmFactory __all__ = [ 'FeatureSelectionAlgorithm', @@ -11,5 +11,5 @@ 'jDEFSTH', 'SelectPercentileChi2', 'SelectKBestChi2', - 'FeatureSelectionAlgorithmUtility' + 'FeatureSelectionAlgorithmFactory' ] diff --git a/niaaml/feature_selection_algorithms/utility.py b/niaaml/feature_selection_algorithms/utility.py index 7c4bcd8..8126e1b 100644 --- a/niaaml/feature_selection_algorithms/utility.py +++ b/niaaml/feature_selection_algorithms/utility.py @@ -2,10 +2,10 @@ from niaaml import feature_selection_algorithms __all__ = [ - 'FeatureSelectionAlgorithmUtility' + 'FeatureSelectionAlgorithmFactory' ] -class FeatureSelectionAlgorithmUtility(Factory): +class FeatureSelectionAlgorithmFactory(Factory): r"""Class with string mappings to feature selection algorithms. Attributes: diff --git a/niaaml/pipeline.py b/niaaml/pipeline.py index e69de29..dc16ba3 100644 --- a/niaaml/pipeline.py +++ b/niaaml/pipeline.py @@ -0,0 +1,45 @@ +__all__ = [ + 'Pipeline' +] + +class Pipeline: + r"""Classification pipeline defined by optional preprocessing steps, feature selection algorithm and classifier. + + Date: + 2020 + + Author + Luka Pečnik + + License: + MIT + + Attributes: + __data (DataReader): Instance of any DataReader implementation. + __feature_selection_algorithm (FeatureSelectionAlgorithm): Feature selection algorithm implementation. + __preprocessing_algorithm (PreprocessingAlgorithm): Preprocessing algorithm implementation (optional). + __classifier (Classifier): Classifier implementation. + """ + __data = None + __feature_selection_algorithm = None + __preprocessing_algorithm = None + __classifier = None + + def __init__(self, **kwargs): + r"""Initialize task. + """ + self._set_parameters(**kwargs) + + def _set_parameters(self, data, feature_selection_algorithm, preprocessing_algorithm, classifier, **kwargs): + r"""Set the parameters/arguments of the task. + + Arguments: + data (DataReader): Instance of any DataReader implementation. + feature_selection_algorithm (FeatureSelectionAlgorithm): Feature selection algorithm implementation. + preprocessing_algorithm (PreprocessingAlgorithm): Preprocessing algorithm implementation (optional). + classifier (Classifier): Classifier implementation. + """ + self.__data = data + self.__feature_selection_algorithm = feature_selection_algorithm + self.__preprocessing_algorithm = preprocessing_algorithm + self.__classifier = classifier \ No newline at end of file diff --git a/niaaml/pipeline_optimizer.py b/niaaml/pipeline_optimizer.py new file mode 100644 index 0000000..49de343 --- /dev/null +++ b/niaaml/pipeline_optimizer.py @@ -0,0 +1,96 @@ +import numpy as np +from niaaml import Pipeline +from niaaml.classifiers import ClassifierFactory +from niaaml.feature_selection_algorithms import FeatureSelectionAlgorithmFactory +from niaaml.preprocessing_algorithms import PreprocessingAlgorithmFactory + +__all__ = [ + 'PipelineOptimizer' +] + +class PipelineOptimizer: + r"""Optimization task that finds the best classification pipeline according to the given input. + + Date: + 2020 + + Author + Luka Pečnik + + License: + MIT + + Attributes: + __data (DataReader): Instance of any DataReader implementation. + __feature_selection_algorithms (Iterable[FeatureSelectionAlgorithm]): Array of possible feature selection algorithms. + __preprocessing_algorithms (Iterable[PreprocessingAlgorithm]): Array of possible preprocessing algorithms. + __classifiers (Iterable[Classifier]): Array of possible classifiers. + __pipelines_numeric (numpy.ndarray[float]): Numeric representation of pipelines. + __pipelines (Iterable[Pipeline]): Actual pipelines. + __pop_size (int): Number of individuals in the pipeline optimizer's population. + """ + __data = None + __feature_selection_algorithms = None + __preprocessing_algorithms = None + __classifiers = None + + __pop_size = None + __pipelines_numeric = None + __pipelines = None + + __classifier_factory = ClassifierFactory() + __preprocessing_algorithm_factory = PreprocessingAlgorithmFactory() + __feature_selection_algorithm_factory = FeatureSelectionAlgorithmFactory() + + def __init__(self, **kwargs): + r"""Initialize task. + """ + self._set_parameters(**kwargs) + self.__initialize_population(self.__pop_size) + + def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, pop_size, **kwargs): + r"""Set the parameters/arguments of the task. + + Arguments: + data (DataReader): Instance of any DataReader implementation. + feature_selection_algorithms (Iterable[FeatureSelectionAlgorithm]): Array of possible feature selection algorithms. + preprocessing_algorithms (Iterable[PreprocessingAlgorithm]): Array of possible preprocessing algorithms. + classifiers (Iterable[Classificator]): Array of possible classifiers. + pop_size (int): Number of individuals in the pipeline optimizer's population. + """ + self.__data = data + + self.__feature_selection_algorithms = feature_selection_algorithms + self.__feature_selection_algorithms.insert(0, None) + + self.__preprocessing_algorithms = preprocessing_algorithms + self.__classifiers = classifiers + self.__pop_size = pop_size + + def __initialize_population(self, pop_size): + r"""Initialize population of pipelines to find the best setup. + + Arguments: + pop_size (int): Number of individuals. + """ + dims = 1 + if(self.__preprocessing_algorithms is not None and len(self.__preprocessing_algorithms) > 0 ): + dims += 1 + if(self.__feature_selection_algorithms is not None and len(self.__feature_selection_algorithms) > 0 ): + dims += 1 + + self.__pipelines_numeric = np.random.uniform(size=(pop_size, dims)) + self.__pipelines = [ + Pipeline( + data=self.__data, + feature_selection_algorithm=self.__float_to_instance(i[0], self.__feature_selection_algorithms, self.__feature_selection_algorithm_factory) if self.__feature_selection_algorithms is not None and len(self.__feature_selection_algorithms) > 0 else None, + preprocessing_algorithm=self.__float_to_instance(i[1], self.__preprocessing_algorithms, self.__preprocessing_algorithm_factory) if self.__preprocessing_algorithms is not None and len(self.__preprocessing_algorithms) > 0 else None, + classifier=self.__float_to_instance(i[2], self.__classifiers, self.__classifier_factory) + ) for i in self.__pipelines_numeric + ] + + def __float_to_instance(self, value, collection, factory): + r"""TODO + """ + name = collection[np.int(np.floor(value / len(collection)))] + return factory.get_result(name) if name is not None else None \ No newline at end of file diff --git a/niaaml/preprocessing_algorithms/__init__.py b/niaaml/preprocessing_algorithms/__init__.py index cfada32..2477c12 100644 --- a/niaaml/preprocessing_algorithms/__init__.py +++ b/niaaml/preprocessing_algorithms/__init__.py @@ -1,11 +1,11 @@ from niaaml.preprocessing_algorithms.preprocessing_algorithm import PreprocessingAlgorithm from niaaml.preprocessing_algorithms.normalizer import Normalizer from niaaml.preprocessing_algorithms.standard_scaler import StandardScaler -from niaaml.preprocessing_algorithms.utility import PreprocessingAlgorithmUtility +from niaaml.preprocessing_algorithms.utility import PreprocessingAlgorithmFactory __all__ = [ 'PreprocessingAlgorithm', 'Normalizer', 'StandardScaler', - 'PreprocessingAlgorithmUtility' + 'PreprocessingAlgorithmFactory' ] diff --git a/niaaml/preprocessing_algorithms/utility.py b/niaaml/preprocessing_algorithms/utility.py index fb374f8..85467b9 100644 --- a/niaaml/preprocessing_algorithms/utility.py +++ b/niaaml/preprocessing_algorithms/utility.py @@ -2,10 +2,10 @@ from niaaml import preprocessing_algorithms __all__ = [ - 'PreprocessingAlgorithmUtility' + 'PreprocessingAlgorithmFactory' ] -class PreprocessingAlgorithmUtility(Factory): +class PreprocessingAlgorithmFactory(Factory): r"""Class with string mappings to preprocessing algorithms. Attributes: diff --git a/niaaml/task.py b/niaaml/task.py deleted file mode 100644 index 82cea9c..0000000 --- a/niaaml/task.py +++ /dev/null @@ -1,58 +0,0 @@ -import numpy as np - -__all__ = [ - 'Task' -] - -class Task: - r"""Task that finds the best classification pipeline according to the given input. - - Date: - 2020 - - Author - Luka Pečnik - - License: - MIT - - Attributes: - __data (DataReader): Instance of any DataReader implementation. - __feature_selection_algorithms (Iterable[FeatureSelectionAlgorithm]): Array of possible feature selection algorithms. - __preprocessing_algorithms (Iterable[PreprocessingAlgorithm]): Array of possible preprocessing algorithms. - __classifiers (Iterable[Classificator]): Array of possible classifiers. - """ - __data = None - __feature_selection_algorithms = None - __preprocessing_algorithms = None - __classifiers = None - - def __init__(self, **kwargs): - r"""Initialize task. - """ - self._set_parameters(**kwargs) - - def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, **kwargs): - r"""Set the parameters/arguments of the task. - - Arguments: - data (DataReader): Instance of any DataReader implementation. - feature_selection_algorithms (Iterable[FeatureSelectionAlgorithm]): Array of possible feature selection algorithms. - preprocessing_algorithms (Iterable[PreprocessingAlgorithm]): Array of possible preprocessing algorithms. - classifiers (Iterable[Classificator]): Array of possible classifiers. - """ - self.__data = data - self.__feature_selection_algorithms = feature_selection_algorithms - self.__preprocessing_algorithms = preprocessing_algorithms - self.__classifiers = classifiers - - def __initialize_population(self, pop_size): - r"""Initialize population of pipelines to find the best setup. - - Arguments: - pop_size (int): Number of individuals. - - Returns: - TODO - """ - return np.random.uniform(size=(pop_size, 3)) \ No newline at end of file From aaf0e2783e57e8efe09f32eef2db176fec80a21d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Pe=C4=8Dnik?= Date: Mon, 16 Nov 2020 18:27:18 +0100 Subject: [PATCH 2/3] PipelineOptimizer initialization --- niaaml/__init__.py | 2 +- niaaml/pipeline_optimizer.py | 24 +++++++++++++----------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/niaaml/__init__.py b/niaaml/__init__.py index 2aa8501..0a3fa36 100644 --- a/niaaml/__init__.py +++ b/niaaml/__init__.py @@ -3,10 +3,10 @@ from niaaml import preprocessing_algorithms from niaaml import feature_selection_algorithms from niaaml.utilities import get_label_encoder +from niaaml.utilities import float_converter from niaaml.utilities import MinMax from niaaml.utilities import ParameterDefinition from niaaml.utilities import Factory -from niaaml.task import Task from niaaml.pipeline_optimizer import PipelineOptimizer from niaaml.pipeline import Pipeline diff --git a/niaaml/pipeline_optimizer.py b/niaaml/pipeline_optimizer.py index 49de343..1d542ca 100644 --- a/niaaml/pipeline_optimizer.py +++ b/niaaml/pipeline_optimizer.py @@ -1,5 +1,5 @@ import numpy as np -from niaaml import Pipeline +from niaaml.pipeline import Pipeline from niaaml.classifiers import ClassifierFactory from niaaml.feature_selection_algorithms import FeatureSelectionAlgorithmFactory from niaaml.preprocessing_algorithms import PreprocessingAlgorithmFactory @@ -28,6 +28,10 @@ class PipelineOptimizer: __pipelines_numeric (numpy.ndarray[float]): Numeric representation of pipelines. __pipelines (Iterable[Pipeline]): Actual pipelines. __pop_size (int): Number of individuals in the pipeline optimizer's population. + + __classifier_factory (ClassifierFactory): Factory for classifier instances. + __preprocessing_algorithm_factory (ClassifierFactory): Factory for preprocessing algorithm instances. + __feature_selection_algorithm_factory (ClassifierFactory): Factory for feature selection algorithm instances. """ __data = None __feature_selection_algorithms = None @@ -60,11 +64,14 @@ def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algo """ self.__data = data - self.__feature_selection_algorithms = feature_selection_algorithms - self.__feature_selection_algorithms.insert(0, None) - self.__preprocessing_algorithms = preprocessing_algorithms + try: + self.__preprocessing_algorithms.index(None) + except: + self.__preprocessing_algorithms.insert(0, None) + self.__classifiers = classifiers + self.__feature_selection_algorithms = feature_selection_algorithms self.__pop_size = pop_size def __initialize_population(self, pop_size): @@ -73,13 +80,8 @@ def __initialize_population(self, pop_size): Arguments: pop_size (int): Number of individuals. """ - dims = 1 - if(self.__preprocessing_algorithms is not None and len(self.__preprocessing_algorithms) > 0 ): - dims += 1 - if(self.__feature_selection_algorithms is not None and len(self.__feature_selection_algorithms) > 0 ): - dims += 1 - self.__pipelines_numeric = np.random.uniform(size=(pop_size, dims)) + self.__pipelines_numeric = np.random.uniform(size=(pop_size, 3)) self.__pipelines = [ Pipeline( data=self.__data, @@ -92,5 +94,5 @@ def __initialize_population(self, pop_size): def __float_to_instance(self, value, collection, factory): r"""TODO """ - name = collection[np.int(np.floor(value / len(collection)))] + name = collection[np.int(np.round(value * (len(collection) - 1)))] return factory.get_result(name) if name is not None else None \ No newline at end of file From 9c6cceb658c1e8cd618a482ff3af7548cde761f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Pe=C4=8Dnik?= Date: Mon, 16 Nov 2020 22:02:08 +0100 Subject: [PATCH 3/3] PipelineOptimizer progress --- niaaml/pipeline_optimizer.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/niaaml/pipeline_optimizer.py b/niaaml/pipeline_optimizer.py index 1d542ca..50006bd 100644 --- a/niaaml/pipeline_optimizer.py +++ b/niaaml/pipeline_optimizer.py @@ -3,12 +3,16 @@ from niaaml.classifiers import ClassifierFactory from niaaml.feature_selection_algorithms import FeatureSelectionAlgorithmFactory from niaaml.preprocessing_algorithms import PreprocessingAlgorithmFactory +from NiaPy.task import StoppingTask +from NiaPy.benchmarks import Benchmark +from NiaPy.algorithms.basic import ParticleSwarmOptimization __all__ = [ - 'PipelineOptimizer' + 'PipelineOptimizer', + 'PipelineOptimizerBenchmark' ] -class PipelineOptimizer: +class PipelineOptimizer(): r"""Optimization task that finds the best classification pipeline according to the given input. Date: @@ -95,4 +99,20 @@ def __float_to_instance(self, value, collection, factory): r"""TODO """ name = collection[np.int(np.round(value * (len(collection) - 1)))] - return factory.get_result(name) if name is not None else None \ No newline at end of file + return factory.get_result(name) if name is not None else None + + def optimize_pipeline(self, n_p, n_fes): + algo = ParticleSwarmOptimization(NP=n_p) # TODO define InitPopFunc + task = StoppingTask(D=3, nFES=n_fes, benchmark=PipelineOptimizerBenchmark()) + best = algo.run(task) + return best + +class PipelineOptimizerBenchmark(Benchmark): + def __init__(self): + Benchmark.__init__(self, 0.0, 1.0) + + def function(self): + # TODO + def evaluate(D, sol): + return 0.0 + return evaluate \ No newline at end of file