Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions niaaml/classifiers/ada_boost.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class AdaBoost(Classifier):
__adaBoost = AdaBoostClassifier()
_params = dict(
n_estimators = ParameterDefinition(MinMax(min=10, max=150), np.uint),
algorithm = ParameterDefinition(['SAMME', 'SAMME.R'], None)
algorithm = ParameterDefinition(['SAMME', 'SAMME.R'])
)

def _set_parameters(self, **kwargs):
Expand Down Expand Up @@ -56,4 +56,4 @@ def predict(self, x, **kwargs):
Returns:
numpy.array[int]: n predicted classes.
"""
self.__adaBoost.predict(x)
return self.__adaBoost.predict(x)
8 changes: 4 additions & 4 deletions niaaml/classifiers/bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ class Bagging(Classifier):
"""
__baggingClassifier = BaggingClassifier()
_params = dict(
n_estimators = ParameterDefinition(MinMax(min=5, max=30), np.uint),
bootstrap = ParameterDefinition([True, False], None),
bootstrap_features = ParameterDefinition([True, False], None)
n_estimators = ParameterDefinition(MinMax(min=10, max=150), np.uint),
bootstrap = ParameterDefinition([True, False]),
bootstrap_features = ParameterDefinition([True, False])
)

def _set_parameters(self, **kwargs):
Expand Down Expand Up @@ -57,4 +57,4 @@ def predict(self, x, **kwargs):
Returns:
numpy.array[int]: n predicted classes.
"""
self.__baggingClassifier.predict(x)
return self.__baggingClassifier.predict(x)
4 changes: 2 additions & 2 deletions niaaml/classifiers/extremely_randomized_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ExtremelyRandomizedTrees(Classifier):
__extraTreesClassifier = ExtraTreesClassifier()
_params = dict(
n_estimators = ParameterDefinition(MinMax(min=10, max=200), np.uint),
criterion = ParameterDefinition(['gini', 'entropy'], None),
criterion = ParameterDefinition(['gini', 'entropy']),
min_samples_split = ParameterDefinition(MinMax(min=2, max=10), np.uint)
)

Expand Down Expand Up @@ -57,4 +57,4 @@ def predict(self, x, **kwargs):
Returns:
numpy.array[int]: n predicted classes.
"""
self.__extraTreesClassifier.predict(x)
return self.__extraTreesClassifier.predict(x)
4 changes: 2 additions & 2 deletions niaaml/classifiers/linear_svc_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class LinearSVCClassifier(Classifier):
"""
__linearSVC = LinearSVC()
_params = dict(
penalty = ParameterDefinition(['l1', 'l2'], None),
penalty = ParameterDefinition(['l1', 'l2']),
max_iter = ParameterDefinition(MinMax(min=300, max=2000), np.uint)
)

Expand Down Expand Up @@ -56,4 +56,4 @@ def predict(self, x, **kwargs):
Returns:
numpy.array[int]: n predicted classes.
"""
self.__linearSVC.predict(x)
return self.__linearSVC.predict(x)
9 changes: 5 additions & 4 deletions niaaml/classifiers/multi_layer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ class MultiLayerPerceptron(Classifier):
"""
__multiLayerPerceptron = MLPClassifier()
_params = dict(
activation = ParameterDefinition(['identity', 'logistic', 'tanh', 'relu'], None),
solver = ParameterDefinition(['lbfgs', 'sgd', 'adam'], None),
max_iter = ParameterDefinition(MinMax(min=200, max=500), np.uint)
activation = ParameterDefinition(['identity', 'logistic', 'tanh', 'relu']),
solver = ParameterDefinition(['lbfgs', 'sgd', 'adam']),
max_iter = ParameterDefinition(MinMax(min=200, max=500), np.uint),
learning_rate = ParameterDefinition(['constant', 'invscaling', 'adaptive'])
)

def _set_parameters(self, **kwargs):
Expand Down Expand Up @@ -57,4 +58,4 @@ def predict(self, x, **kwargs):
Returns:
numpy.array[int]: n predicted classes.
"""
self.__multiLayerPerceptron.predict(x)
return self.__multiLayerPerceptron.predict(x)
2 changes: 1 addition & 1 deletion niaaml/classifiers/random_forest_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ def predict(self, x, **kwargs):
Returns:
numpy.array[int]: n predicted classes.
"""
self.__randomForestClassifier.predict(x)
return self.__randomForestClassifier.predict(x)
13 changes: 12 additions & 1 deletion niaaml/classifiers/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,15 @@ def _set_parameters(self, **kwargs):
'LinearSVCClassifier': classifiers.LinearSVCClassifier,
'MultiLayerPerceptron': classifiers.MultiLayerPerceptron,
'RandomForestClassifier': classifiers.RandomForestClassifier
}
}

def get_result(self, name):
r"""Get the resulting classifier.

Arguments:
name (str): String that represents the classifier.

Returns:
Classifier: Classifier according to the given name.
"""
return Factory.get_result(self, name).getRandomInstance()
20 changes: 8 additions & 12 deletions niaaml/feature_selection_algorithms/jDEFSTH.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from NiaPy.benchmarks import Benchmark
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_wine

__all__ = [
'jDEFSTH'
Expand Down Expand Up @@ -39,7 +38,7 @@ def _set_parameters(self, **kwargs):
"""
return

def final_output(self, sol):
def __final_output(self, sol):
selected = []
threshold = sol[len(sol)-1]
for i in range(len(sol)-1):
Expand All @@ -50,14 +49,16 @@ def final_output(self, sol):
return selected

def select_features(self, x, y, **kwargs):
num_features = X.shape[1]
num_features = x.shape[1]
algo = SelfAdaptiveDifferentialEvolution(NP=10, F=0.5, F_l=0.0, F_u=2.0, Tao1=0.9, CR=0.5, Tao2=0.45)
task = StoppingTask(D=num_features+1, nFES=1000, benchmark=FeatureSelectionThreshold(X, y))
task = StoppingTask(D=num_features+1, nFES=1000, benchmark=_FeatureSelectionThreshold(x, y))
best = algo.run(task)
return self.final_output(best[0])
return self.__final_output(best[0])


class FeatureSelectionThreshold(Benchmark):
class _FeatureSelectionThreshold(Benchmark):
r"""TODO
"""

def __init__(self, X, y):
Benchmark.__init__(self, 0.0, 1.0)
self.train_X, self.test_X, self.train_y, self.test_y = train_test_split(
Expand Down Expand Up @@ -93,8 +94,3 @@ def evaluate(D, sol):
return fitness

return evaluate

#test
#X, y = load_wine(return_X_y=True)
#a = jDEFSTH()
#a.select_features(X,y)
13 changes: 12 additions & 1 deletion niaaml/feature_selection_algorithms/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,15 @@ def _set_parameters(self, **kwargs):
'SelectKBestChi2': feature_selection_algorithms.SelectKBestChi2,
'SelectPercentileChi2': feature_selection_algorithms.SelectPercentileChi2,
'VarianceThresholdFeatureSelection': feature_selection_algorithms.VarianceThresholdFeatureSelection
}
}

def get_result(self, name):
r"""Get the resulting feature selection algorithm.

Arguments:
name (str): String that represents the feature selection algorithm.

Returns:
FeatureSelectionAlgorithm: FeatureSelectionAlgorithm according to the given name.
"""
return Factory.get_result(self, name)()
30 changes: 29 additions & 1 deletion niaaml/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

__all__ = [
'Pipeline'
]
Expand Down Expand Up @@ -42,4 +45,29 @@ def _set_parameters(self, data, feature_selection_algorithm, preprocessing_algor
self.__data = data
self.__feature_selection_algorithm = feature_selection_algorithm
self.__preprocessing_algorithm = preprocessing_algorithm
self.__classifier = classifier
self.__classifier = classifier

def optimize(self, population_size, number_of_evaluations):
r"""TODO
"""
# TODO implement optimization process
try:
X = self.__data.get_x()

if self.__feature_selection_algorithm is not None:
X = self.__feature_selection_algorithm.select_features(self.__data.get_x(), self.__data.get_y())

if self.__preprocessing_algorithm is not None:
X = self.__preprocessing_algorithm.process(X)

train_X, test_X, train_y, test_y = train_test_split(
X, self.__data.get_y(), test_size=0.2)

self.__classifier.fit(train_X, train_y)
predictions = self.__classifier.predict(test_X)

return accuracy_score(test_y, predictions)
except:
# infeasible solution as it causes some kind of error
# return negative infinity as we are looking for maximum accuracy in the optimization process
return -float('inf')
115 changes: 72 additions & 43 deletions niaaml/pipeline_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,22 @@
from niaaml.classifiers import ClassifierFactory
from niaaml.feature_selection_algorithms import FeatureSelectionAlgorithmFactory
from niaaml.preprocessing_algorithms import PreprocessingAlgorithmFactory
from NiaPy.task import StoppingTask
from NiaPy.task import StoppingTask, OptimizationType
from NiaPy.benchmarks import Benchmark
from NiaPy.algorithms.basic import ParticleSwarmOptimization
from NiaPy.algorithms.utility import AlgorithmUtility

__all__ = [
'PipelineOptimizer',
'PipelineOptimizerBenchmark'
'PipelineOptimizer'
]

class PipelineOptimizer():
def _initialize_population(task, NP, rnd=np.random, **kwargs):
r"""TODO
"""
pop = np.random.uniform(size=(NP, 3))
fpop = np.apply_along_axis(task.eval, 1, pop)
return pop, fpop

class PipelineOptimizer:
r"""Optimization task that finds the best classification pipeline according to the given input.

Date:
Expand All @@ -31,88 +37,111 @@ class PipelineOptimizer():
__classifiers (Iterable[Classifier]): Array of possible classifiers.
__pipelines_numeric (numpy.ndarray[float]): Numeric representation of pipelines.
__pipelines (Iterable[Pipeline]): Actual pipelines.
__pop_size (int): Number of individuals in the pipeline optimizer's population.

__classifier_factory (ClassifierFactory): Factory for classifier instances.
__preprocessing_algorithm_factory (ClassifierFactory): Factory for preprocessing algorithm instances.
__feature_selection_algorithm_factory (ClassifierFactory): Factory for feature selection algorithm instances.

__optimization_algorithm (str): Name of the optimization algorithm to use.
__niapy_algorithm_utility (AlgorithmUtility): Utility class used to get an optimization algorithm.
"""
__data = None
__feature_selection_algorithms = None
__preprocessing_algorithms = None
__classifiers = None

__pop_size = None
__pipelines_numeric = None
__pipelines = None

__classifier_factory = ClassifierFactory()
__preprocessing_algorithm_factory = PreprocessingAlgorithmFactory()
__feature_selection_algorithm_factory = FeatureSelectionAlgorithmFactory()
__optimization_algorithm = None
__niapy_algorithm_utility = AlgorithmUtility()

def __init__(self, **kwargs):
r"""Initialize task.
"""
self._set_parameters(**kwargs)
self.__initialize_population(self.__pop_size)

def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, pop_size, **kwargs):
def _set_parameters(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, optimization_algorithm, **kwargs):
r"""Set the parameters/arguments of the task.

Arguments:
data (DataReader): Instance of any DataReader implementation.
feature_selection_algorithms (Iterable[FeatureSelectionAlgorithm]): Array of possible feature selection algorithms.
preprocessing_algorithms (Iterable[PreprocessingAlgorithm]): Array of possible preprocessing algorithms.
classifiers (Iterable[Classificator]): Array of possible classifiers.
pop_size (int): Number of individuals in the pipeline optimizer's population.
optimization_algorithm (str): Name of the optimization algorithm to use.
"""
self.__data = data
self.__optimization_algorithm = optimization_algorithm

self.__preprocessing_algorithms = preprocessing_algorithms
try:
self.__preprocessing_algorithms.index(None)
except:
self.__preprocessing_algorithms.insert(0, None)
if self.__preprocessing_algorithms is not None:
try:
self.__preprocessing_algorithms.index(None)
except:
self.__preprocessing_algorithms.insert(0, None)

self.__classifiers = classifiers
self.__feature_selection_algorithms = feature_selection_algorithms
self.__pop_size = pop_size

def __initialize_population(self, pop_size):
r"""Initialize population of pipelines to find the best setup.
def run(self, pipeline_population_size, classifier_population_size, number_of_pipeline_evaluations, number_of_classifier_evaluations):
r"""TODO
"""
algo = self.__niapy_algorithm_utility.get_algorithm(self.__optimization_algorithm)
algo.NP = pipeline_population_size
algo.InitPopFunc = _initialize_population

task = StoppingTask(
D=3,
nFES=number_of_pipeline_evaluations,
benchmark=_PipelineOptimizerBenchmark(self.__data, self.__feature_selection_algorithms, self.__preprocessing_algorithms, self.__classifiers, classifier_population_size, number_of_classifier_evaluations),
optType=OptimizationType.MAXIMIZATION
)
best = algo.run(task)
return best

class _PipelineOptimizerBenchmark(Benchmark):
r"""TODO
"""
__data = None
__feature_selection_algorithms = None
__preprocessing_algorithms = None
__classifiers = None

__classifier_factory = ClassifierFactory()
__preprocessing_algorithm_factory = PreprocessingAlgorithmFactory()
__feature_selection_algorithm_factory = FeatureSelectionAlgorithmFactory()

Arguments:
pop_size (int): Number of individuals.
def __init__(self, data, feature_selection_algorithms, preprocessing_algorithms, classifiers, classifier_population_size, number_of_classifier_evaluations):
r"""TODO
"""
self.__data = data
self.__feature_selection_algorithms = feature_selection_algorithms
self.__preprocessing_algorithms = preprocessing_algorithms
self.__classifiers = classifiers
self.__classifier_population_size = classifier_population_size
self.__number_of_classifier_evaluations = number_of_classifier_evaluations
Benchmark.__init__(self, 0.0, 1.0)

self.__pipelines_numeric = np.random.uniform(size=(pop_size, 3))
self.__pipelines = [
Pipeline(
data=self.__data,
feature_selection_algorithm=self.__float_to_instance(i[0], self.__feature_selection_algorithms, self.__feature_selection_algorithm_factory) if self.__feature_selection_algorithms is not None and len(self.__feature_selection_algorithms) > 0 else None,
preprocessing_algorithm=self.__float_to_instance(i[1], self.__preprocessing_algorithms, self.__preprocessing_algorithm_factory) if self.__preprocessing_algorithms is not None and len(self.__preprocessing_algorithms) > 0 else None,
classifier=self.__float_to_instance(i[2], self.__classifiers, self.__classifier_factory)
) for i in self.__pipelines_numeric
]

def __float_to_instance(self, value, collection, factory):
r"""TODO
"""
name = collection[np.int(np.round(value * (len(collection) - 1)))]
return factory.get_result(name) if name is not None else None

def optimize_pipeline(self, n_p, n_fes):
algo = ParticleSwarmOptimization(NP=n_p) # TODO define InitPopFunc
task = StoppingTask(D=3, nFES=n_fes, benchmark=PipelineOptimizerBenchmark())
best = algo.run(task)
return best

class PipelineOptimizerBenchmark(Benchmark):
def __init__(self):
Benchmark.__init__(self, 0.0, 1.0)

def function(self):
r"""TODO
"""
# TODO
def evaluate(D, sol):
return 0.0
r"""TODO
"""
pipeline = Pipeline(
data=self.__data,
feature_selection_algorithm=self.__float_to_instance(sol[0], self.__feature_selection_algorithms, self.__feature_selection_algorithm_factory) if self.__feature_selection_algorithms is not None and len(self.__feature_selection_algorithms) > 0 else None,
preprocessing_algorithm=self.__float_to_instance(sol[1], self.__preprocessing_algorithms, self.__preprocessing_algorithm_factory) if self.__preprocessing_algorithms is not None and len(self.__preprocessing_algorithms) > 0 else None,
classifier=self.__float_to_instance(sol[2], self.__classifiers, self.__classifier_factory)
)
return pipeline.optimize(self.__classifier_population_size, self.__number_of_classifier_evaluations)

return evaluate
Loading