Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions niaaml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from niaaml import preprocessing_algorithms
from niaaml import feature_selection_algorithms
from niaaml.utilities import get_label_encoder
from niaaml.utilities import float_converter
from niaaml.utilities import MinMax
from niaaml.utilities import ParameterDefinition
from niaaml.utilities import Factory
from niaaml.task import Task
from niaaml.pipeline_optimizer import PipelineOptimizer
from niaaml.pipeline import Pipeline

__all__ = [
'data',
Expand All @@ -17,7 +19,8 @@
'MinMax',
'ParameterDefinition',
'Factory',
'Task'
'PipelineOptimizer',
'Pipeline'
]
__project__ = 'niaaml'
__version__ = '0.1.0'
4 changes: 2 additions & 2 deletions niaaml/classifiers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from niaaml.classifiers.ada_boost import AdaBoost
from niaaml.classifiers.extremely_randomized_trees import ExtremelyRandomizedTrees
from niaaml.classifiers.bagging import Bagging
from niaaml.classifiers.utility import ClassifierUtility
from niaaml.classifiers.utility import ClassifierFactory

__all__ = [
'Classifier',
Expand All @@ -15,5 +15,5 @@
'AdaBoost',
'Bagging',
'ExtremelyRandomizedTrees',
'ClassifierUtility'
'ClassifierFactory'
]
4 changes: 2 additions & 2 deletions niaaml/classifiers/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from niaaml import classifiers

__all__ = [
'ClassifierUtility'
'ClassifierFactory'
]

class ClassifierUtility(Factory):
class ClassifierFactory(Factory):
r"""Class with string mappings to classifiers.

Attributes:
Expand Down
4 changes: 2 additions & 2 deletions niaaml/feature_selection_algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from niaaml.feature_selection_algorithms.jDEFSTH import jDEFSTH
from niaaml.feature_selection_algorithms.select_percentile_chi2 import SelectPercentileChi2
from niaaml.feature_selection_algorithms.select_k_best_chi2 import SelectKBestChi2
from niaaml.feature_selection_algorithms.utility import FeatureSelectionAlgorithmUtility
from niaaml.feature_selection_algorithms.utility import FeatureSelectionAlgorithmFactory

__all__ = [
'FeatureSelectionAlgorithm',
'VarianceThresholdFeatureSelection',
'jDEFSTH',
'SelectPercentileChi2',
'SelectKBestChi2',
'FeatureSelectionAlgorithmUtility'
'FeatureSelectionAlgorithmFactory'
]
4 changes: 2 additions & 2 deletions niaaml/feature_selection_algorithms/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from niaaml import feature_selection_algorithms

__all__ = [
'FeatureSelectionAlgorithmUtility'
'FeatureSelectionAlgorithmFactory'
]

class FeatureSelectionAlgorithmUtility(Factory):
class FeatureSelectionAlgorithmFactory(Factory):
r"""Class with string mappings to feature selection algorithms.

Attributes:
Expand Down
45 changes: 45 additions & 0 deletions niaaml/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
__all__ = [
'Pipeline'
]

class Pipeline:
r"""Classification pipeline defined by optional preprocessing steps, feature selection algorithm and classifier.

Date:
2020

Author
Luka Pečnik

License:
MIT

Attributes:
__data (DataReader): Instance of any DataReader implementation.
__feature_selection_algorithm (FeatureSelectionAlgorithm): Feature selection algorithm implementation.
__preprocessing_algorithm (PreprocessingAlgorithm): Preprocessing algorithm implementation (optional).
__classifier (Classifier): Classifier implementation.
"""
__data = None
__feature_selection_algorithm = None
__preprocessing_algorithm = None
__classifier = None

def __init__(self, **kwargs):
r"""Initialize task.
"""
self._set_parameters(**kwargs)

def _set_parameters(self, data, feature_selection_algorithm, preprocessing_algorithm, classifier, **kwargs):
r"""Set the parameters/arguments of the task.

Arguments:
data (DataReader): Instance of any DataReader implementation.
feature_selection_algorithm (FeatureSelectionAlgorithm): Feature selection algorithm implementation.
preprocessing_algorithm (PreprocessingAlgorithm): Preprocessing algorithm implementation (optional).
classifier (Classifier): Classifier implementation.
"""
self.__data = data
self.__feature_selection_algorithm = feature_selection_algorithm
self.__preprocessing_algorithm = preprocessing_algorithm
self.__classifier = classifier
118 changes: 118 additions & 0 deletions niaaml/pipeline_optimizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import numpy as np
from niaaml.pipeline import Pipeline
from niaaml.classifiers import ClassifierFactory
from niaaml.feature_selection_algorithms import FeatureSelectionAlgorithmFactory
from niaaml.preprocessing_algorithms import PreprocessingAlgorithmFactory
from NiaPy.task import StoppingTask
from NiaPy.benchmarks import Benchmark
from NiaPy.algorithms.basic import ParticleSwarmOptimization

__all__ = [
'PipelineOptimizer',
'PipelineOptimizerBenchmark'
]

class PipelineOptimizer():
r"""Optimization task that finds the best classification pipeline according to the given input.

Date:
2020

Author
Luka Pečnik

License:
MIT

Attributes:
__data (DataReader): Instance of any DataReader implementation.
__feature_selection_algorithms (Iterable[FeatureSelectionAlgorithm]): Array of possible feature selection algorithms.
__preprocessing_algorithms (Iterable[PreprocessingAlgorithm]): Array of possible preprocessing algorithms.
__classifiers (Iterable[Classifier]): Array of possible classifiers.
__pipelines_numeric (numpy.ndarray[float]): Numeric representation of pipelines.
__pipelines (Iterable[Pipeline]): Actual pipelines.
__pop_size (int): Number of individuals in the pipeline optimizer's population.

__classifier_factory (ClassifierFactory): Factory for classifier instances.
__preprocessing_algorithm_factory (ClassifierFactory): Factory for preprocessing algorithm instances.
__feature_selection_algorithm_factory (ClassifierFactory): Factory for feature selection algorithm instances.
"""
__data = None
__feature_selection_algorithms = None
__preprocessing_algorithms = None
__classifiers = None

__pop_size = None
__pipelines_numeric = None
__pipelines = None

__classifier_factory = ClassifierFactory()
__preprocessing_algorithm_factory = PreprocessingAlgorithmFactory()
__feature_selection_algorithm_factory = FeatureSelectionAlgorithmFactory()

def __init__(self, **kwargs):
r"""Initialize task.
"""
self._set_parameters(**kwargs)
self.__initialize_population(self.__pop_size)

def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, pop_size, **kwargs):
r"""Set the parameters/arguments of the task.

Arguments:
data (DataReader): Instance of any DataReader implementation.
feature_selection_algorithms (Iterable[FeatureSelectionAlgorithm]): Array of possible feature selection algorithms.
preprocessing_algorithms (Iterable[PreprocessingAlgorithm]): Array of possible preprocessing algorithms.
classifiers (Iterable[Classificator]): Array of possible classifiers.
pop_size (int): Number of individuals in the pipeline optimizer's population.
"""
self.__data = data

self.__preprocessing_algorithms = preprocessing_algorithms
try:
self.__preprocessing_algorithms.index(None)
except:
self.__preprocessing_algorithms.insert(0, None)

self.__classifiers = classifiers
self.__feature_selection_algorithms = feature_selection_algorithms
self.__pop_size = pop_size

def __initialize_population(self, pop_size):
r"""Initialize population of pipelines to find the best setup.

Arguments:
pop_size (int): Number of individuals.
"""

self.__pipelines_numeric = np.random.uniform(size=(pop_size, 3))
self.__pipelines = [
Pipeline(
data=self.__data,
feature_selection_algorithm=self.__float_to_instance(i[0], self.__feature_selection_algorithms, self.__feature_selection_algorithm_factory) if self.__feature_selection_algorithms is not None and len(self.__feature_selection_algorithms) > 0 else None,
preprocessing_algorithm=self.__float_to_instance(i[1], self.__preprocessing_algorithms, self.__preprocessing_algorithm_factory) if self.__preprocessing_algorithms is not None and len(self.__preprocessing_algorithms) > 0 else None,
classifier=self.__float_to_instance(i[2], self.__classifiers, self.__classifier_factory)
) for i in self.__pipelines_numeric
]

def __float_to_instance(self, value, collection, factory):
r"""TODO
"""
name = collection[np.int(np.round(value * (len(collection) - 1)))]
return factory.get_result(name) if name is not None else None

def optimize_pipeline(self, n_p, n_fes):
algo = ParticleSwarmOptimization(NP=n_p) # TODO define InitPopFunc
task = StoppingTask(D=3, nFES=n_fes, benchmark=PipelineOptimizerBenchmark())
best = algo.run(task)
return best

class PipelineOptimizerBenchmark(Benchmark):
def __init__(self):
Benchmark.__init__(self, 0.0, 1.0)

def function(self):
# TODO
def evaluate(D, sol):
return 0.0
return evaluate
4 changes: 2 additions & 2 deletions niaaml/preprocessing_algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from niaaml.preprocessing_algorithms.preprocessing_algorithm import PreprocessingAlgorithm
from niaaml.preprocessing_algorithms.normalizer import Normalizer
from niaaml.preprocessing_algorithms.standard_scaler import StandardScaler
from niaaml.preprocessing_algorithms.utility import PreprocessingAlgorithmUtility
from niaaml.preprocessing_algorithms.utility import PreprocessingAlgorithmFactory

__all__ = [
'PreprocessingAlgorithm',
'Normalizer',
'StandardScaler',
'PreprocessingAlgorithmUtility'
'PreprocessingAlgorithmFactory'
]
4 changes: 2 additions & 2 deletions niaaml/preprocessing_algorithms/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from niaaml import preprocessing_algorithms

__all__ = [
'PreprocessingAlgorithmUtility'
'PreprocessingAlgorithmFactory'
]

class PreprocessingAlgorithmUtility(Factory):
class PreprocessingAlgorithmFactory(Factory):
r"""Class with string mappings to preprocessing algorithms.

Attributes:
Expand Down
58 changes: 0 additions & 58 deletions niaaml/task.py

This file was deleted.