**Algoritmo Differential Evolution em Python**

**Autor**: Iran Freitas Ribeiro

**Disciplina**: Computação Natural

**Professor**: Renato A. Krohling

Implementação baseada no artigo: *A clustering method combining differential evolution with the K-means algorithm*

In [8]:
import numpy as np
import functions as f
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import rand_score
from sklearn.model_selection import train_test_split

In [3]:
def DE(X, N=100, n_gen = 100, n_clusters=3, bounds=[0,1], F=0.01, CR=0.01, lograte = 10):
    """
    Executa o algoritmo de Differential Evolution
    
    Params:
        - X: base de dados
        - N: tamanho da população de indivíduos
        - n_gen: numero de gerações
        - n_clusters: número de clusters
        - bounds: limites do espaço de busca
        - CR: taxa de mutação
        - lograte: taxa de exibição dos melhores resultados

    """

    # inicia a população
    U = f.inicia_populacao(N,bounds,n_clusters,X)
    # inicia melhor individuo e melhor score
    best = 0
    best_score = np.inf
    historico_geracoes = []
    # inicia parâmetros da população original
    Fu = [0.05 + np.random.uniform()*0.35 for _ in range(N)]
    for g in range(n_gen):
        # geração da população temporária
        U_t, Fy = f.reproducao_crossover(U,Fu,CR)
        # compara cada individuo da população original com a temporária
        for i in range(len(U_t)):            
            
            # score do individuo temporário
            pred_cluster_ut = f.predict(X, U_t[i])
            score_ut = f.eval_individual(U_t[i],pred_cluster_ut,X)
            
            # score do individuo real
            pred_cluster = f.predict(X, U[i])
            score_u = f.eval_individual(U[i], pred_cluster, X)

            # atualiza o melhor score
            if (score_ut<score_u):
                U[i] = U_t[i]
                Fu[i] = Fy[i]
                if (score_ut<best_score):
                    best_score = score_ut
                    best = i
            if (score_u<best_score):
                best_score = score_u
                best = i
        # salva o melhor score
        historico_geracoes.append(best_score)
        if (lograte>0):
            if (g%lograte==0):
                print ("{} - best: {} SSE: {:.4}".format(g, best,best_score))
    return U[best], best_score, historico_geracoes

In [14]:
def experimentos(n_testes, exp_name = "", args=[], save=False):
    """
    Realiza os experimentos

    Params:
        - n_testes: número de execuções
        - exp_name: nome do experimento
        - args: lista de argumentos
    
    return:
        - best_exp: melhor individuo 
        - best_score_exp: score do melhor individuo
        - best_historico: historico de scores do melhor individuo

    """
    best_exp = None
    best_score_exp = np.inf
    best_historico = []
    list_bests = []
    list_scores = []
    rand_index  = []
    for i in range(n_testes):
        X_treino, X_teste, y_treino, y_teste = train_test_split(args[0], args[1], test_size=0.2)

        best, best_score, historico = DE(X_treino,args[2],args[3],args[4],args[5],args[6],args[7],lograte=-1)
        list_bests.append(best)
        list_scores.append(best_score)
        best_historico.append(historico)
        pred_labels = f.predict(X_teste, best)
        ri = rand_score(y_teste, pred_labels)
        rand_index.append(ri)
        print ("Rand index {:.4f}".format(ri))
        
    print ("Media rand_index {:.4f}".format(np.mean(rand_index)))
    if save:        
        np.save("results/DE_Kmeans/rand_index_{}.npy".format(exp_name), np.array(rand_index))    
        np.save('results/DE_Kmeans/best_historico_{}.npy'.format(exp_name), np.array(best_historico))
        np.save('results/DE_Kmeans/list_scores_{}.npy'.format(exp_name), np.array(list_scores))
        np.save('results/DE_Kmeans/best_exp_{}.npy'.format(exp_name),best_exp)
    return best_exp, best_historico

# IRIS

In [4]:
irisdata,targets = f.load_iris()
X_iris = irisdata[[0,1,2,3]].values
labels = irisdata[4].values

In [12]:
# definição dos argumentos
bounds = [0.1,7.9]
n_clusters = 3
N = 100
n_gen = 200
F = 0.2
CR = 0.4
args_iris=[X_iris, labels, N,n_gen,n_clusters,bounds,F,CR]

In [16]:
best, historicoDE = experimentos(10, exp_name="DE_iris",args=args_iris)

Rand index 0.7793
Rand index 0.9632
Rand index 0.8529
Rand index 0.8276
Rand index 0.9126
Rand index 0.9218
Rand index 0.9080
Rand index 0.8621
Rand index 0.8621
Rand index 0.8989
Media rand_index 0.8789


# Wine Dataset

In [17]:
wine_data, wine_targets = f.load_wine()
wine_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [18]:
scaler = MinMaxScaler()
X_wine = scaler.fit_transform(wine_data[np.arange(1,14)])

In [19]:
labels_wine = wine_data[0].values

In [21]:
# definição dos argumentos
bounds = [np.min(X_wine),np.max(X_wine)]
n_clusters = len(wine_targets)
N = 200
n_gen = 100
F = 0.2
CR = 0.4
args_wine=[X_wine, labels_wine, N, 200, n_clusters,bounds,0.2,CR]
#best_wine, best_score_wine, _ = DE(args[0],args[1],args[2],args[3], args[4],args[5],args[6],lograte=10)
best, historicoDE = experimentos(10, exp_name="DE_wine",args=args_wine)

Rand index 0.8921
Rand index 0.9238
Rand index 0.8429
Rand index 0.8921
Rand index 0.8952
Rand index 0.8952
Rand index 0.9222
Rand index 0.8921
Rand index 0.9095
Rand index 0.8571
Media rand_index 0.8922


ValueError: not enough values to unpack (expected 3, got 2)

# Breast Cancer

In [22]:
breast_cancer, targets_breast = f.load_breast_cancer()

colunas = breast_cancer.columns
# labels de cada linha da base de dados
labels_breast = breast_cancer['diagnosis'].values
# seleciona apenas as features reais e pega os valores
data_breast_cancer = breast_cancer[colunas[1:]]
X_data_breast = data_breast_cancer.values

In [None]:
data_breast_cancer.describe()

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
count,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,...,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0
mean,14.127292,19.289649,91.969033,654.889104,0.09636,0.104341,0.088799,0.048919,0.181162,0.062798,...,16.26919,25.677223,107.261213,880.583128,0.132369,0.254265,0.272188,0.114606,0.290076,0.083946
std,3.524049,4.301036,24.298981,351.914129,0.014064,0.052813,0.07972,0.038803,0.027414,0.00706,...,4.833242,6.146258,33.602542,569.356993,0.022832,0.157336,0.208624,0.065732,0.061867,0.018061
min,6.981,9.71,43.79,143.5,0.05263,0.01938,0.0,0.0,0.106,0.04996,...,7.93,12.02,50.41,185.2,0.07117,0.02729,0.0,0.0,0.1565,0.05504
25%,11.7,16.17,75.17,420.3,0.08637,0.06492,0.02956,0.02031,0.1619,0.0577,...,13.01,21.08,84.11,515.3,0.1166,0.1472,0.1145,0.06493,0.2504,0.07146
50%,13.37,18.84,86.24,551.1,0.09587,0.09263,0.06154,0.0335,0.1792,0.06154,...,14.97,25.41,97.66,686.5,0.1313,0.2119,0.2267,0.09993,0.2822,0.08004
75%,15.78,21.8,104.1,782.7,0.1053,0.1304,0.1307,0.074,0.1957,0.06612,...,18.79,29.72,125.4,1084.0,0.146,0.3391,0.3829,0.1614,0.3179,0.09208
max,28.11,39.28,188.5,2501.0,0.1634,0.3454,0.4268,0.2012,0.304,0.09744,...,36.04,49.54,251.2,4254.0,0.2226,1.058,1.252,0.291,0.6638,0.2075


In [23]:
# transforma os dados para o intervalo [0,1]
scaler_breast = MinMaxScaler()
X_data_breast = scaler_breast.fit_transform(X_data_breast)

In [24]:
# definição dos argumentos
bounds = [np.min(X_data_breast),np.max(X_data_breast)]
n_clusters = len(targets)
N = 200
n_gen = 200
F = 0.2
CR = 0.4
args_breast=[X_data_breast, labels_breast ,N,n_gen,n_clusters,bounds,0.2,CR]
best, historicoDE = experimentos(10, exp_name="DE_breast",args=args_breast)

Rand index 0.8432
Rand index 0.6940
Rand index 0.7429
Rand index 0.6730
Rand index 0.8738
Rand index 0.7463
Rand index 0.7035
Rand index 0.7272
Rand index 0.7541
Rand index 0.8031
Media rand_index 0.7561
