In [1]:
### PYMOO
from pymoo.core.problem import Problem
from pymoo.core.problem import ElementwiseProblem
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.optimize import minimize
from pymoo.operators.sampling.rnd import BinaryRandomSampling
from pymoo.operators.crossover.pntx import TwoPointCrossover
from pymoo.operators.mutation.bitflip import BitflipMutation
from pymoo.core.sampling import Sampling
from pymoo.core.mutation import Mutation

#multiprocessamento e pymoo
import os
import multiprocessing
from pymoo.core.problem import StarmapParallelization

#Pandas, SKLearn e etc.
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import plotly.express as px
from sklearn import svm
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt

In [2]:
data = pd.read_pickle('Data/data_normalized.pkl')

In [3]:
colunas = list(data.columns)
colunas.remove('samples')
colunas.remove('type')

X = data[colunas]
y = data['type']

Funções de Avaliação

In [4]:
def _init_worker():
    global pid_, X_worker, y_worker, colunas_worker
    pid_ = os.getpid()
    X_worker = X.copy()
    y_worker = y.copy()
    colunas_worker = colunas.copy()
    print(pid_)

In [5]:
class GeneSelection(ElementwiseProblem):
    def __init__(self, X, y, runner):
        #self.X = X_worker
        #self.y = y_worker
        self.n_features = X.shape[1]
        self.eval_dict = {'n_features':[], 'f1_score':[]}
        super().__init__(   n_var=self.n_features,
    						n_obj=2,
    						n_constr=0,
    						xl=np.zeros(self.n_features),
    						xu=np.ones(self.n_features),
    						elementwise_evaluation=True,
                            type_var=bool,
                            save_history=True,
                            elementwise_runner=runner)

    def _evaluate(self, x, out, *args, **kwargs):
    	# seleciona as features de acordo com o vetor binário
        selected_features = np.where(x == 1)[-1]
        X_selected = X_worker.iloc[:,selected_features]
    	
        # Kfolding usado para separar em treino e teste
        skf = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)
        #X_train, X_valid, y_train, y_valid = train_test_split(X_selected, self.y, test_size=0.1, random_state=100)
    	
    	# treino usando modelo SVM
        clf = svm.SVC(kernel='linear')

        # compute f_1 and AUC on validation set
        f_1 = np.mean(cross_val_score(clf, X_selected, y_worker, cv=skf, scoring='f1_macro'))
        n_features = len(selected_features)
        # salvar os resultados
        self.eval_dict['n_features'].append(n_features)
        self.eval_dict['f1_score'].append(f_1)
    	
    	# define os objetivos a serem minimizados
        out["F"] = [n_features, -f_1]

In [6]:
class BinaryDistributedRandomSampling(Sampling):

    def _do(self, problem, n_samples, **kwargs):
        population = []
        max = 1000
        ns = []
        for i in range(n_samples):
            trues = np.random.randint(1, max + 1)
            individual = np.array([True] * trues + [False] * (problem.n_var - trues))
            np.random.shuffle(individual)
            population.append(individual)
            ns.append((individual.sum()))
        #print(population)
        print(sorted(ns))
        return population

class BitflipMutation(Mutation):

    def _do(self, problem, X, **kwargs):
        prob_var = self.get_prob_var(problem, size=(len(X), 1))
        Xp = np.copy(X)
        flip = np.random.random(X.shape) < prob_var
        Xp[flip] = ~X[flip]
        return Xp


In [7]:
# initialize the thread pool and create the runner
n_proccess = 48
pool = multiprocessing.Pool(n_proccess, initializer=_init_worker)
runner = StarmapParallelization(pool.starmap)

2556425567

2556225563

25573
2556625575

25561
25565
25569
25571
25568
25579
2557225570

25577
25574
25576
25580
25578
25581
25589
25584
25587
25594
25582
25585
25598
25586
25591
25600
25596
25588
25590
25602
25595
2559925606

25597
2560325611

25605
2560125592

25604
25593
25623
25618


In [8]:
problem = GeneSelection(X,y.copy().values, runner)
algorithm = NSGA2(pop_size=96,
				  sampling=BinaryDistributedRandomSampling(),
				  crossover=TwoPointCrossover(),
				  mutation=BitflipMutation(),
                  save_history = True)

In [9]:
res = minimize(problem,  # problem class
			    algorithm,  # NSGA2 algorithm
                ("n_gen", 500), # number of iteration for eval problem class
			    verbose=True)

[6, 7, 19, 44, 80, 106, 115, 135, 138, 172, 180, 180, 191, 203, 214, 233, 236, 251, 263, 267, 272, 275, 302, 323, 329, 335, 339, 347, 352, 353, 354, 359, 364, 367, 377, 387, 395, 418, 418, 427, 433, 443, 444, 452, 460, 519, 520, 525, 559, 564, 580, 593, 596, 619, 630, 630, 634, 651, 653, 663, 664, 679, 692, 696, 705, 706, 724, 734, 776, 797, 811, 814, 815, 816, 823, 836, 848, 852, 854, 881, 882, 893, 900, 916, 920, 926, 928, 950, 951, 953, 965, 979, 985, 991, 991, 995]
n_gen  |  n_eval  | n_nds  |      eps      |   indicator  
     1 |       96 |      9 |             - |             -
     2 |      192 |     12 |  0.0119947605 |         ideal
     3 |      288 |     11 |  0.0110075954 |             f
     4 |      384 |     16 |  2.1284046693 |         nadir
     5 |      480 |     17 |  0.0275174526 |             f
     6 |      576 |     21 |  0.0236300354 |             f
     7 |      672 |     19 |  0.3627602979 |         nadir
     8 |      768 |     18 |  0.0078431373 |         n

ValueError: 
All the 10 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py", line 190, in fit
    X, y = self._validate_data(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 622, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py", line 1146, in check_X_y
    X = check_array(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py", line 795, in check_array
    dtype_orig = np.result_type(*dtypes_orig)
  File "<__array_function__ internals>", line 200, in result_type
ValueError: at least one array or dtype is required


In [None]:
n_evals = np.array([e.evaluator.n_eval for e in res.history])
opt = np.array([e.opt[0].F for e in res.history])

X_res, F_res = res.opt.get("X", "F")

hist = res.history
print(len(hist))
n_evals = []             # corresponding number of function evaluations\
hist_F = []              # the objective space values in each generation
hist_cv = []             # constraint violation in each generation
hist_cv_avg = []         # average constraint violation in the whole population

for algo in hist:

    # store the number of function evaluations
    n_evals.append(algo.evaluator.n_eval)

    # retrieve the optimum from the algorithm
    opt = algo.opt

    # store the least contraint violation and the average in each population
    hist_cv.append(opt.get("CV").min())
    hist_cv_avg.append(algo.pop.get("CV").mean())

    # filter out only the feasible and append and objective space values
    feas = np.where(opt.get("feasible"))[0]
    hist_F.append(opt.get("F")[feas])
approx_ideal = F_res.min(axis=0)
approx_nadir = F_res.max(axis=0)
from pymoo.indicators.hv import Hypervolume

metric = Hypervolume(ref_point= np.array([1.1, 1.1]),
                     norm_ref_point=False,
                     zero_to_one=True,
                     ideal=approx_ideal,
                     nadir=approx_nadir)

hv = [metric.do(_F) for _F in hist_F]

plt.figure(figsize=(7, 5))
plt.plot(n_evals, hv,  color='black', lw=0.7, label="Avg. CV of Pop")
plt.scatter(n_evals, hv,  facecolor="none", edgecolor='black', marker="p")
plt.title("Convergence")
plt.xlabel("Function Evaluations")
plt.ylabel("Hypervolume")
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
plt.scatter(F_res[:, 0], F_res[:, 1], s=30, facecolors='none', edgecolors='blue')
plt.title("Objective Space")
plt.show()