# Summary
This notebook apply the proposed method (GMM clustering) to a set of microarray datasets.

In [1]:
import sys
sys.path.append("..")

#GPU configuration
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default


import random
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics.cluster import adjusted_rand_score, normalized_mutual_info_score
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scripts.data_generator as data_generator
import scripts.feature_ranking as feature_ranking
import scripts.features_2d as features_2d
import scripts.ga as ga
import scripts.preprocess as preprocess
import scripts.ga_evaluation as ga_evaluation
import scripts.bio_analysis as bio_analysis
import tensorflow as tf
from IPython import get_ipython
from tqdm import tqdm
from collections import Counter
import seaborn as sns
import time


plt.ion()
plt.show()

random_state=1
random.seed( random_state )
np.random.seed(random_state)


%load_ext autoreload
%autoreload 2

Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device
/job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5



Using TensorFlow backend.

In a future version of Scanpy, `scanpy.api` will be removed.
Simply use `import scanpy as sc` and `import scanpy.external as sce` instead.



# Steps
- Clustering 1d to select best discriminant features

- Clustering 2d to select redundant, close and outlier features

In [2]:
random_state=0
random.seed( random_state )
np.random.seed(random_state)

In [6]:
results = None
filenames = np.array(['alon', 'borovecki', 'chiaretti', 'christensen', 'golub', 'gordon',
       'khan', 'sorlie', 'su', 'yeoh', 'west'])
clustering = "gmm"
path = '../data/microarray/'
method = "adapted_ratkowsky_lance"
imp_f = np.arange(20)
for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values


    n_clusters = len(np.unique(truth))

    z_file= f"../data/microarray/Z_{name}_correlation.npy"
    print(f"\n##########  {name}, {data.shape}")

    # Clustering 1D
    meta_features = feature_ranking.rank_features(data,
                                              nb_bins=20,
                                              rank_threshold=90,
                                              z_file=z_file,
                                              metric='correlation',
                                              redundant_threshold=0.4)
    t2 = time.time()
    print(f"TIME: 1d Features : {(t2-t1)/60} min")

#     model_file = f'../models/gmm_arl.h5' 

#     population, n = features_2d.run(data,
#                                 n_clusters,
#                                 meta_features,
#                                 model_file=model_file,
#                                 add_close_population=False)
    t3 = time.time()
    t4 = time.time()
    print(f"TIME: 2d scores: {(t4-t3)/60} min")
    round_size = 3
    epochs = 10*round_size

    sampling = {
    "ARCHIVE2D": { 
        "ga": 0,
        "max": 0 },
    "CLOSE": { 
        "ga": 0.35,
        "max": 0.35 },
    "IMP1D": { 
        "ga": 0.35,
        "max": 0.35 },
    "RANDOM": { 
        "ga": 0.3,
        "max": 0.3},
    }
#     sampling = {
#         "ARCHIVE2D": { 
#             "ga": 0.25,
#             "max": 0.25 },
#         "CLOSE": { 
#             "ga": 0.4,
#             "max": 0.4 },
#         "IMP1D": { 
#             "ga": 0.25,
#             "max": 0.25 },
#         "RANDOM": { 
#             "ga": 0.1,
#             "max": 0.1},
#         }
    params = ga.ga_parameters(
        n_clusters,
        data.shape[1],
        truth,
        meta_features,
        method=method,
        truth_methods=['ari'],
        archive_2d=None,#population[:data.shape[1] // 4],
        epochs=epochs,
        sampling = sampling,
        round_size=round_size,
        allow_subspace_overlap = True,
        improvement_per_mutation_report = False,
        clustering = clustering
        
    )
    solutions, archive= ga.run(data, params)
#     display(params["report"].groupby(["op", "improvement"]).count())
    solutions["dataset_name"] = name
    
    t5 = time.time()
    n_total = t5-t1
    print(f"TIME: GA: {(t5-t4)/60} min")
    print(f"TIME: Total: {(t5-t1)/60} min")
    solutions["total_time"] = round((t5-t1)/60, 1)
    solutions["t(feature_sel)"] = round((t2-t1)/60, 1)
    solutions["t(cnn)"] = round((t3-t2)/60, 1)
    solutions["t(clustering2d)"] = round((t4-t3)/60, 1)
    solutions["t(ga)"] = round((t5-t4)/60, 1)
    
    solutions["n_ga"] = archive.shape[0]
    solutions["n_cnn"] = n_total
    solutions["input_size"] = data.shape[1]
    
    
    solutions["n_non_redundant"] = meta_features[(meta_features["redundant"] ==1) ].shape[0]
    solutions["n_imp"] = meta_features[(meta_features["relevance"] !=0)].shape[0]
    solutions["n_imp4"] = meta_features[(meta_features["relevance"] ==4)].shape[0]
    solutions["n_imp3"] = meta_features[(meta_features["relevance"] ==3)].shape[0]
    
    
    if results is None: 
        results = solutions
    else:
        results = pd.concat([results, solutions], ignore_index = True)
    results.to_excel(f"../reports/microarray_{clustering}_{method}.xlsx")
    


##########  singh, (102, 12600)
*** Computing 1D feature ranking ...
Dispersion tests took 0.42 sec
Entropy computation 1.9 sec
KNN computation 9.59 sec
Sorting and thresholds 0.04 sec
Performing hierarchical clustering...
Hierarchical clustering 6.96 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 1.51 sec
Returning 6021 redundant features and  735 important features
TIME: 1d Features : 0.34079992373784385 min
TIME: 2d scores: 7.947285970052083e-09 min
*** Optimization algorithm 
Non redundant features 6579, orig size 12600, nb imp : 735
adding 50 random population


 10%|▉         | 3/31 [00:09<01:27,  3.14s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.32,0.04,"[240, 658, 690, 716, 888, 933, 1146, 1175, 126...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,165.0,"Counter({0: 82, 1: 20})"


adding 6 random population


 19%|█▉        | 6/31 [00:44<03:14,  7.79s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.32,0.04,"[240, 658, 690, 716, 888, 933, 1146, 1175, 126...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,165.0,"Counter({0: 82, 1: 20})"
1,0.32,0.05,"[69, 254, 562, 670, 705, 812, 855, 1007, 1015,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.16,0.65,172.0,"Counter({0: 81, 1: 21})"


adding 39 random population


 29%|██▉       | 9/31 [01:21<03:23,  9.26s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.32,0.04,"[240, 658, 690, 716, 888, 933, 1146, 1175, 126...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,165.0,"Counter({0: 82, 1: 20})"
1,0.32,0.05,"[69, 254, 562, 670, 705, 812, 855, 1007, 1015,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.16,0.65,172.0,"Counter({0: 81, 1: 21})"
2,0.31,0.04,"[7, 37, 160, 212, 302, 505, 524, 605, 668, 948...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.29,0.65,201.0,"Counter({0: 82, 1: 20})"


adding 50 random population


 39%|███▊      | 12/31 [02:01<03:06,  9.80s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.32,0.04,"[240, 658, 690, 716, 888, 933, 1146, 1175, 126...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,165.0,"Counter({0: 82, 1: 20})"
1,0.32,0.05,"[69, 254, 562, 670, 705, 812, 855, 1007, 1015,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.16,0.65,172.0,"Counter({0: 81, 1: 21})"
2,0.31,0.04,"[7, 37, 160, 212, 302, 505, 524, 605, 668, 948...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.29,0.65,201.0,"Counter({0: 82, 1: 20})"
3,0.33,0.05,"[103, 218, 387, 570, 668, 690, 720, 754, 888, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,168.0,"Counter({0: 81, 1: 21})"


adding 11 random population


 48%|████▊     | 15/31 [02:36<02:33,  9.59s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.32,0.04,"[240, 658, 690, 716, 888, 933, 1146, 1175, 126...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,165.0,"Counter({0: 82, 1: 20})"
1,0.32,0.05,"[69, 254, 562, 670, 705, 812, 855, 1007, 1015,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.16,0.65,172.0,"Counter({0: 81, 1: 21})"
2,0.31,0.04,"[7, 37, 160, 212, 302, 505, 524, 605, 668, 948...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.29,0.65,201.0,"Counter({0: 82, 1: 20})"
3,0.33,0.05,"[103, 218, 387, 570, 668, 690, 720, 754, 888, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,168.0,"Counter({0: 81, 1: 21})"
4,0.32,0.05,"[102, 160, 190, 276, 467, 507, 528, 561, 592, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.17,0.65,175.0,"Counter({0: 81, 1: 21})"


adding 33 random population


 58%|█████▊    | 18/31 [03:16<02:13, 10.28s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.32,0.04,"[240, 658, 690, 716, 888, 933, 1146, 1175, 126...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,165.0,"Counter({0: 82, 1: 20})"
1,0.32,0.05,"[69, 254, 562, 670, 705, 812, 855, 1007, 1015,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.16,0.65,172.0,"Counter({0: 81, 1: 21})"
2,0.31,0.04,"[7, 37, 160, 212, 302, 505, 524, 605, 668, 948...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.29,0.65,201.0,"Counter({0: 82, 1: 20})"
3,0.33,0.05,"[103, 218, 387, 570, 668, 690, 720, 754, 888, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,168.0,"Counter({0: 81, 1: 21})"
4,0.32,0.05,"[102, 160, 190, 276, 467, 507, 528, 561, 592, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.17,0.65,175.0,"Counter({0: 81, 1: 21})"
5,0.3,0.05,"[125, 169, 201, 210, 233, 546, 593, 670, 675, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.22,0.64,202.0,"Counter({0: 81, 1: 21})"


adding 50 random population


 68%|██████▊   | 21/31 [03:56<01:40, 10.04s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.32,0.04,"[240, 658, 690, 716, 888, 933, 1146, 1175, 126...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,165.0,"Counter({0: 82, 1: 20})"
1,0.32,0.05,"[69, 254, 562, 670, 705, 812, 855, 1007, 1015,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.16,0.65,172.0,"Counter({0: 81, 1: 21})"
2,0.31,0.04,"[7, 37, 160, 212, 302, 505, 524, 605, 668, 948...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.29,0.65,201.0,"Counter({0: 82, 1: 20})"
3,0.33,0.05,"[103, 218, 387, 570, 668, 690, 720, 754, 888, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,168.0,"Counter({0: 81, 1: 21})"
4,0.32,0.05,"[102, 160, 190, 276, 467, 507, 528, 561, 592, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.17,0.65,175.0,"Counter({0: 81, 1: 21})"
5,0.3,0.05,"[125, 169, 201, 210, 233, 546, 593, 670, 675, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.22,0.64,202.0,"Counter({0: 81, 1: 21})"
6,0.31,0.05,"[124, 128, 147, 177, 400, 681, 687, 1134, 1142...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.19,0.65,183.0,"Counter({0: 81, 1: 21})"


adding 29 random population


 77%|███████▋  | 24/31 [04:34<01:08,  9.83s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.32,0.04,"[240, 658, 690, 716, 888, 933, 1146, 1175, 126...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,165.0,"Counter({0: 82, 1: 20})"
1,0.32,0.05,"[69, 254, 562, 670, 705, 812, 855, 1007, 1015,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.16,0.65,172.0,"Counter({0: 81, 1: 21})"
2,0.31,0.04,"[7, 37, 160, 212, 302, 505, 524, 605, 668, 948...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.29,0.65,201.0,"Counter({0: 82, 1: 20})"
3,0.33,0.05,"[103, 218, 387, 570, 668, 690, 720, 754, 888, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,168.0,"Counter({0: 81, 1: 21})"
4,0.32,0.05,"[102, 160, 190, 276, 467, 507, 528, 561, 592, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.17,0.65,175.0,"Counter({0: 81, 1: 21})"
5,0.3,0.05,"[125, 169, 201, 210, 233, 546, 593, 670, 675, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.22,0.64,202.0,"Counter({0: 81, 1: 21})"
6,0.31,0.05,"[124, 128, 147, 177, 400, 681, 687, 1134, 1142...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.19,0.65,183.0,"Counter({0: 81, 1: 21})"
7,0.3,0.05,"[102, 149, 318, 693, 715, 860, 942, 966, 1101,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.21,0.64,201.0,"Counter({0: 81, 1: 21})"


adding 41 random population


 87%|████████▋ | 27/31 [05:14<00:40, 10.09s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.32,0.04,"[240, 658, 690, 716, 888, 933, 1146, 1175, 126...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,165.0,"Counter({0: 82, 1: 20})"
1,0.32,0.05,"[69, 254, 562, 670, 705, 812, 855, 1007, 1015,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.16,0.65,172.0,"Counter({0: 81, 1: 21})"
2,0.31,0.04,"[7, 37, 160, 212, 302, 505, 524, 605, 668, 948...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.29,0.65,201.0,"Counter({0: 82, 1: 20})"
3,0.33,0.05,"[103, 218, 387, 570, 668, 690, 720, 754, 888, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,168.0,"Counter({0: 81, 1: 21})"
4,0.32,0.05,"[102, 160, 190, 276, 467, 507, 528, 561, 592, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.17,0.65,175.0,"Counter({0: 81, 1: 21})"
5,0.3,0.05,"[125, 169, 201, 210, 233, 546, 593, 670, 675, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.22,0.64,202.0,"Counter({0: 81, 1: 21})"
6,0.31,0.05,"[124, 128, 147, 177, 400, 681, 687, 1134, 1142...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.19,0.65,183.0,"Counter({0: 81, 1: 21})"
7,0.3,0.05,"[102, 149, 318, 693, 715, 860, 942, 966, 1101,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.21,0.64,201.0,"Counter({0: 81, 1: 21})"
8,0.31,0.04,"[261, 352, 658, 888, 983, 1030, 1238, 1247, 15...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.11,0.65,165.0,"Counter({0: 82, 1: 20})"


adding 22 random population


 97%|█████████▋| 30/31 [05:47<00:09,  9.38s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.32,0.04,"[240, 658, 690, 716, 888, 933, 1146, 1175, 126...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,165.0,"Counter({0: 82, 1: 20})"
1,0.32,0.05,"[69, 254, 562, 670, 705, 812, 855, 1007, 1015,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.16,0.65,172.0,"Counter({0: 81, 1: 21})"
2,0.31,0.04,"[7, 37, 160, 212, 302, 505, 524, 605, 668, 948...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.29,0.65,201.0,"Counter({0: 82, 1: 20})"
3,0.33,0.05,"[103, 218, 387, 570, 668, 690, 720, 754, 888, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.18,0.66,168.0,"Counter({0: 81, 1: 21})"
4,0.32,0.05,"[102, 160, 190, 276, 467, 507, 528, 561, 592, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.17,0.65,175.0,"Counter({0: 81, 1: 21})"
5,0.3,0.05,"[125, 169, 201, 210, 233, 546, 593, 670, 675, ...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.22,0.64,202.0,"Counter({0: 81, 1: 21})"
6,0.31,0.05,"[124, 128, 147, 177, 400, 681, 687, 1134, 1142...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.19,0.65,183.0,"Counter({0: 81, 1: 21})"
7,0.3,0.05,"[102, 149, 318, 693, 715, 860, 942, 966, 1101,...",0.07,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.21,0.64,201.0,"Counter({0: 81, 1: 21})"
8,0.31,0.04,"[261, 352, 658, 888, 983, 1030, 1238, 1247, 15...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.11,0.65,165.0,"Counter({0: 82, 1: 20})"
9,0.3,0.04,"[124, 143, 353, 363, 599, 778, 781, 919, 942, ...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...",2.11,0.63,190.0,"Counter({0: 82, 1: 20})"


adding 47 random population


100%|██████████| 31/31 [06:16<00:00, 12.16s/it]


TIME: GA: 6.2816555261611935 min
TIME: Total: 6.622455875078837 min

##########  chiaretti, (128, 12625)
*** Computing 1D feature ranking ...
Dispersion tests took 0.49 sec
Entropy computation 2.13 sec
KNN computation 12.41 sec
Sorting and thresholds 0.04 sec
Performing hierarchical clustering...
Hierarchical clustering 7.62 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 1.88 sec
Returning 3395 redundant features and  346 important features
TIME: 1d Features : 0.40981199343999225 min
TIME: 2d scores: 7.947285970052083e-09 min
*** Optimization algorithm 
Non redundant features 9230, orig size 12625, nb imp : 346
adding 50 random population


 10%|▉         | 3/31 [00:14<02:12,  4.73s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.04,"[41, 214, 249, 319, 458, 538, 539, 646, 716, 7...",0.18,"[3, 5, 2, 0, 2, 3, 0, 2, 0, 5, 3, 2, 3, 3, 5, ...",0.37,0.13,45.0,"Counter({2: 41, 1: 20, 0: 19, 5: 18, 3: 17, 4:..."


adding 4 random population


 19%|█▉        | 6/31 [01:07<04:40, 11.21s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.04,"[41, 214, 249, 319, 458, 538, 539, 646, 716, 7...",0.18,"[3, 5, 2, 0, 2, 3, 0, 2, 0, 5, 3, 2, 3, 3, 5, ...",0.37,0.13,45.0,"Counter({2: 41, 1: 20, 0: 19, 5: 18, 3: 17, 4:..."
1,0.1,0.02,"[15, 41, 102, 249, 423, 538, 578, 716, 764, 81...",0.19,"[3, 0, 3, 0, 2, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, ...",0.45,0.13,53.0,"Counter({3: 49, 2: 25, 0: 21, 5: 16, 4: 11, 1:..."


adding 12 random population


 29%|██▉       | 9/31 [02:12<05:23, 14.71s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.04,"[41, 214, 249, 319, 458, 538, 539, 646, 716, 7...",0.18,"[3, 5, 2, 0, 2, 3, 0, 2, 0, 5, 3, 2, 3, 3, 5, ...",0.37,0.13,45.0,"Counter({2: 41, 1: 20, 0: 19, 5: 18, 3: 17, 4:..."
1,0.1,0.02,"[15, 41, 102, 249, 423, 538, 578, 716, 764, 81...",0.19,"[3, 0, 3, 0, 2, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, ...",0.45,0.13,53.0,"Counter({3: 49, 2: 25, 0: 21, 5: 16, 4: 11, 1:..."
2,0.1,0.05,"[458, 509, 538, 646, 716, 764, 765, 817, 829, ...",0.26,"[5, 0, 5, 0, 5, 5, 3, 5, 0, 5, 5, 3, 0, 5, 3, ...",0.46,0.13,64.0,"Counter({3: 37, 0: 35, 5: 23, 2: 15, 4: 12, 1:..."


adding 19 random population


 39%|███▊      | 12/31 [04:04<07:14, 22.87s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.04,"[41, 214, 249, 319, 458, 538, 539, 646, 716, 7...",0.18,"[3, 5, 2, 0, 2, 3, 0, 2, 0, 5, 3, 2, 3, 3, 5, ...",0.37,0.13,45.0,"Counter({2: 41, 1: 20, 0: 19, 5: 18, 3: 17, 4:..."
1,0.1,0.02,"[15, 41, 102, 249, 423, 538, 578, 716, 764, 81...",0.19,"[3, 0, 3, 0, 2, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, ...",0.45,0.13,53.0,"Counter({3: 49, 2: 25, 0: 21, 5: 16, 4: 11, 1:..."
2,0.1,0.05,"[458, 509, 538, 646, 716, 764, 765, 817, 829, ...",0.26,"[5, 0, 5, 0, 5, 5, 3, 5, 0, 5, 5, 3, 0, 5, 3, ...",0.46,0.13,64.0,"Counter({3: 37, 0: 35, 5: 23, 2: 15, 4: 12, 1:..."
3,0.1,0.08,"[182, 249, 281, 410, 458, 538, 646, 716, 796, ...",0.13,"[1, 5, 2, 0, 2, 1, 5, 2, 1, 3, 1, 2, 1, 1, 5, ...",0.37,0.2,61.0,"Counter({2: 32, 4: 29, 1: 20, 5: 18, 3: 17, 0:..."


adding 24 random population


 48%|████▊     | 15/31 [05:00<04:34, 17.17s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.04,"[41, 214, 249, 319, 458, 538, 539, 646, 716, 7...",0.18,"[3, 5, 2, 0, 2, 3, 0, 2, 0, 5, 3, 2, 3, 3, 5, ...",0.37,0.13,45.0,"Counter({2: 41, 1: 20, 0: 19, 5: 18, 3: 17, 4:..."
1,0.1,0.02,"[15, 41, 102, 249, 423, 538, 578, 716, 764, 81...",0.19,"[3, 0, 3, 0, 2, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, ...",0.45,0.13,53.0,"Counter({3: 49, 2: 25, 0: 21, 5: 16, 4: 11, 1:..."
2,0.1,0.05,"[458, 509, 538, 646, 716, 764, 765, 817, 829, ...",0.26,"[5, 0, 5, 0, 5, 5, 3, 5, 0, 5, 5, 3, 0, 5, 3, ...",0.46,0.13,64.0,"Counter({3: 37, 0: 35, 5: 23, 2: 15, 4: 12, 1:..."
3,0.1,0.08,"[182, 249, 281, 410, 458, 538, 646, 716, 796, ...",0.13,"[1, 5, 2, 0, 2, 1, 5, 2, 1, 3, 1, 2, 1, 1, 5, ...",0.37,0.2,61.0,"Counter({2: 32, 4: 29, 1: 20, 5: 18, 3: 17, 0:..."
4,0.1,0.09,"[18, 25, 92, 249, 320, 321, 423, 817, 1157, 14...",0.3,"[5, 0, 2, 5, 2, 5, 0, 2, 5, 2, 5, 5, 5, 5, 0, ...",0.37,0.13,53.0,"Counter({5: 30, 0: 30, 2: 27, 4: 17, 1: 16, 3:..."


adding 15 random population


 58%|█████▊    | 18/31 [06:17<03:59, 18.44s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.04,"[41, 214, 249, 319, 458, 538, 539, 646, 716, 7...",0.18,"[3, 5, 2, 0, 2, 3, 0, 2, 0, 5, 3, 2, 3, 3, 5, ...",0.37,0.13,45.0,"Counter({2: 41, 1: 20, 0: 19, 5: 18, 3: 17, 4:..."
1,0.1,0.02,"[15, 41, 102, 249, 423, 538, 578, 716, 764, 81...",0.19,"[3, 0, 3, 0, 2, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, ...",0.45,0.13,53.0,"Counter({3: 49, 2: 25, 0: 21, 5: 16, 4: 11, 1:..."
2,0.1,0.05,"[458, 509, 538, 646, 716, 764, 765, 817, 829, ...",0.26,"[5, 0, 5, 0, 5, 5, 3, 5, 0, 5, 5, 3, 0, 5, 3, ...",0.46,0.13,64.0,"Counter({3: 37, 0: 35, 5: 23, 2: 15, 4: 12, 1:..."
3,0.1,0.08,"[182, 249, 281, 410, 458, 538, 646, 716, 796, ...",0.13,"[1, 5, 2, 0, 2, 1, 5, 2, 1, 3, 1, 2, 1, 1, 5, ...",0.37,0.2,61.0,"Counter({2: 32, 4: 29, 1: 20, 5: 18, 3: 17, 0:..."
4,0.1,0.09,"[18, 25, 92, 249, 320, 321, 423, 817, 1157, 14...",0.3,"[5, 0, 2, 5, 2, 5, 0, 2, 5, 2, 5, 5, 5, 5, 0, ...",0.37,0.13,53.0,"Counter({5: 30, 0: 30, 2: 27, 4: 17, 1: 16, 3:..."
5,0.1,0.03,"[102, 182, 221, 321, 383, 458, 516, 538, 539, ...",0.16,"[3, 0, 1, 0, 5, 0, 0, 5, 3, 5, 3, 5, 3, 1, 0, ...",0.47,0.14,114.0,"Counter({3: 27, 1: 27, 0: 21, 5: 20, 4: 20, 2:..."


adding 15 random population


 68%|██████▊   | 21/31 [08:59<05:14, 31.46s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.04,"[41, 214, 249, 319, 458, 538, 539, 646, 716, 7...",0.18,"[3, 5, 2, 0, 2, 3, 0, 2, 0, 5, 3, 2, 3, 3, 5, ...",0.37,0.13,45.0,"Counter({2: 41, 1: 20, 0: 19, 5: 18, 3: 17, 4:..."
1,0.1,0.02,"[15, 41, 102, 249, 423, 538, 578, 716, 764, 81...",0.19,"[3, 0, 3, 0, 2, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, ...",0.45,0.13,53.0,"Counter({3: 49, 2: 25, 0: 21, 5: 16, 4: 11, 1:..."
2,0.1,0.05,"[458, 509, 538, 646, 716, 764, 765, 817, 829, ...",0.26,"[5, 0, 5, 0, 5, 5, 3, 5, 0, 5, 5, 3, 0, 5, 3, ...",0.46,0.13,64.0,"Counter({3: 37, 0: 35, 5: 23, 2: 15, 4: 12, 1:..."
3,0.1,0.08,"[182, 249, 281, 410, 458, 538, 646, 716, 796, ...",0.13,"[1, 5, 2, 0, 2, 1, 5, 2, 1, 3, 1, 2, 1, 1, 5, ...",0.37,0.2,61.0,"Counter({2: 32, 4: 29, 1: 20, 5: 18, 3: 17, 0:..."
4,0.1,0.09,"[18, 25, 92, 249, 320, 321, 423, 817, 1157, 14...",0.3,"[5, 0, 2, 5, 2, 5, 0, 2, 5, 2, 5, 5, 5, 5, 0, ...",0.37,0.13,53.0,"Counter({5: 30, 0: 30, 2: 27, 4: 17, 1: 16, 3:..."
5,0.1,0.03,"[102, 182, 221, 321, 383, 458, 516, 538, 539, ...",0.16,"[3, 0, 1, 0, 5, 0, 0, 5, 3, 5, 3, 5, 3, 1, 0, ...",0.47,0.14,114.0,"Counter({3: 27, 1: 27, 0: 21, 5: 20, 4: 20, 2:..."
6,0.1,0.01,"[102, 187, 290, 368, 530, 561, 866, 1094, 1219...",0.06,"[5, 0, 5, 0, 5, 4, 0, 5, 3, 5, 5, 5, 5, 3, 4, ...",0.48,0.08,151.0,"Counter({5: 40, 2: 32, 3: 29, 0: 12, 4: 9, 1: 6})"


adding 5 random population


 77%|███████▋  | 24/31 [11:15<03:45, 32.27s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.04,"[41, 214, 249, 319, 458, 538, 539, 646, 716, 7...",0.18,"[3, 5, 2, 0, 2, 3, 0, 2, 0, 5, 3, 2, 3, 3, 5, ...",0.37,0.13,45.0,"Counter({2: 41, 1: 20, 0: 19, 5: 18, 3: 17, 4:..."
1,0.1,0.02,"[15, 41, 102, 249, 423, 538, 578, 716, 764, 81...",0.19,"[3, 0, 3, 0, 2, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, ...",0.45,0.13,53.0,"Counter({3: 49, 2: 25, 0: 21, 5: 16, 4: 11, 1:..."
2,0.1,0.05,"[458, 509, 538, 646, 716, 764, 765, 817, 829, ...",0.26,"[5, 0, 5, 0, 5, 5, 3, 5, 0, 5, 5, 3, 0, 5, 3, ...",0.46,0.13,64.0,"Counter({3: 37, 0: 35, 5: 23, 2: 15, 4: 12, 1:..."
3,0.1,0.08,"[182, 249, 281, 410, 458, 538, 646, 716, 796, ...",0.13,"[1, 5, 2, 0, 2, 1, 5, 2, 1, 3, 1, 2, 1, 1, 5, ...",0.37,0.2,61.0,"Counter({2: 32, 4: 29, 1: 20, 5: 18, 3: 17, 0:..."
4,0.1,0.09,"[18, 25, 92, 249, 320, 321, 423, 817, 1157, 14...",0.3,"[5, 0, 2, 5, 2, 5, 0, 2, 5, 2, 5, 5, 5, 5, 0, ...",0.37,0.13,53.0,"Counter({5: 30, 0: 30, 2: 27, 4: 17, 1: 16, 3:..."
5,0.1,0.03,"[102, 182, 221, 321, 383, 458, 516, 538, 539, ...",0.16,"[3, 0, 1, 0, 5, 0, 0, 5, 3, 5, 3, 5, 3, 1, 0, ...",0.47,0.14,114.0,"Counter({3: 27, 1: 27, 0: 21, 5: 20, 4: 20, 2:..."
6,0.1,0.01,"[102, 187, 290, 368, 530, 561, 866, 1094, 1219...",0.06,"[5, 0, 5, 0, 5, 4, 0, 5, 3, 5, 5, 5, 5, 3, 4, ...",0.48,0.08,151.0,"Counter({5: 40, 2: 32, 3: 29, 0: 12, 4: 9, 1: 6})"
7,0.1,0.09,"[41, 161, 538, 539, 764, 765, 1320, 1425, 1439...",0.26,"[2, 0, 3, 5, 2, 2, 0, 3, 2, 3, 2, 2, 2, 3, 0, ...",0.32,0.12,43.0,"Counter({5: 26, 3: 24, 2: 23, 0: 22, 4: 21, 1:..."


adding 8 random population


 87%|████████▋ | 27/31 [12:00<01:16, 19.17s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.04,"[41, 214, 249, 319, 458, 538, 539, 646, 716, 7...",0.18,"[3, 5, 2, 0, 2, 3, 0, 2, 0, 5, 3, 2, 3, 3, 5, ...",0.37,0.13,45.0,"Counter({2: 41, 1: 20, 0: 19, 5: 18, 3: 17, 4:..."
1,0.1,0.02,"[15, 41, 102, 249, 423, 538, 578, 716, 764, 81...",0.19,"[3, 0, 3, 0, 2, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, ...",0.45,0.13,53.0,"Counter({3: 49, 2: 25, 0: 21, 5: 16, 4: 11, 1:..."
2,0.1,0.05,"[458, 509, 538, 646, 716, 764, 765, 817, 829, ...",0.26,"[5, 0, 5, 0, 5, 5, 3, 5, 0, 5, 5, 3, 0, 5, 3, ...",0.46,0.13,64.0,"Counter({3: 37, 0: 35, 5: 23, 2: 15, 4: 12, 1:..."
3,0.1,0.08,"[182, 249, 281, 410, 458, 538, 646, 716, 796, ...",0.13,"[1, 5, 2, 0, 2, 1, 5, 2, 1, 3, 1, 2, 1, 1, 5, ...",0.37,0.2,61.0,"Counter({2: 32, 4: 29, 1: 20, 5: 18, 3: 17, 0:..."
4,0.1,0.09,"[18, 25, 92, 249, 320, 321, 423, 817, 1157, 14...",0.3,"[5, 0, 2, 5, 2, 5, 0, 2, 5, 2, 5, 5, 5, 5, 0, ...",0.37,0.13,53.0,"Counter({5: 30, 0: 30, 2: 27, 4: 17, 1: 16, 3:..."
5,0.1,0.03,"[102, 182, 221, 321, 383, 458, 516, 538, 539, ...",0.16,"[3, 0, 1, 0, 5, 0, 0, 5, 3, 5, 3, 5, 3, 1, 0, ...",0.47,0.14,114.0,"Counter({3: 27, 1: 27, 0: 21, 5: 20, 4: 20, 2:..."
6,0.1,0.01,"[102, 187, 290, 368, 530, 561, 866, 1094, 1219...",0.06,"[5, 0, 5, 0, 5, 4, 0, 5, 3, 5, 5, 5, 5, 3, 4, ...",0.48,0.08,151.0,"Counter({5: 40, 2: 32, 3: 29, 0: 12, 4: 9, 1: 6})"
7,0.1,0.09,"[41, 161, 538, 539, 764, 765, 1320, 1425, 1439...",0.26,"[2, 0, 3, 5, 2, 2, 0, 3, 2, 3, 2, 2, 2, 3, 0, ...",0.32,0.12,43.0,"Counter({5: 26, 3: 24, 2: 23, 0: 22, 4: 21, 1:..."
8,0.1,0.04,"[102, 164, 249, 281, 354, 458, 538, 566, 716, ...",0.17,"[3, 0, 3, 0, 2, 3, 2, 5, 3, 2, 3, 5, 3, 5, 2, ...",0.47,0.15,86.0,"Counter({2: 36, 5: 25, 3: 19, 1: 17, 4: 16, 0:..."


adding 48 random population


 97%|█████████▋| 30/31 [13:18<00:18, 18.92s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.04,"[41, 214, 249, 319, 458, 538, 539, 646, 716, 7...",0.18,"[3, 5, 2, 0, 2, 3, 0, 2, 0, 5, 3, 2, 3, 3, 5, ...",0.37,0.13,45.0,"Counter({2: 41, 1: 20, 0: 19, 5: 18, 3: 17, 4:..."
1,0.1,0.02,"[15, 41, 102, 249, 423, 538, 578, 716, 764, 81...",0.19,"[3, 0, 3, 0, 2, 0, 3, 3, 0, 3, 3, 3, 3, 3, 3, ...",0.45,0.13,53.0,"Counter({3: 49, 2: 25, 0: 21, 5: 16, 4: 11, 1:..."
2,0.1,0.05,"[458, 509, 538, 646, 716, 764, 765, 817, 829, ...",0.26,"[5, 0, 5, 0, 5, 5, 3, 5, 0, 5, 5, 3, 0, 5, 3, ...",0.46,0.13,64.0,"Counter({3: 37, 0: 35, 5: 23, 2: 15, 4: 12, 1:..."
3,0.1,0.08,"[182, 249, 281, 410, 458, 538, 646, 716, 796, ...",0.13,"[1, 5, 2, 0, 2, 1, 5, 2, 1, 3, 1, 2, 1, 1, 5, ...",0.37,0.2,61.0,"Counter({2: 32, 4: 29, 1: 20, 5: 18, 3: 17, 0:..."
4,0.1,0.09,"[18, 25, 92, 249, 320, 321, 423, 817, 1157, 14...",0.3,"[5, 0, 2, 5, 2, 5, 0, 2, 5, 2, 5, 5, 5, 5, 0, ...",0.37,0.13,53.0,"Counter({5: 30, 0: 30, 2: 27, 4: 17, 1: 16, 3:..."
5,0.1,0.03,"[102, 182, 221, 321, 383, 458, 516, 538, 539, ...",0.16,"[3, 0, 1, 0, 5, 0, 0, 5, 3, 5, 3, 5, 3, 1, 0, ...",0.47,0.14,114.0,"Counter({3: 27, 1: 27, 0: 21, 5: 20, 4: 20, 2:..."
6,0.1,0.01,"[102, 187, 290, 368, 530, 561, 866, 1094, 1219...",0.06,"[5, 0, 5, 0, 5, 4, 0, 5, 3, 5, 5, 5, 5, 3, 4, ...",0.48,0.08,151.0,"Counter({5: 40, 2: 32, 3: 29, 0: 12, 4: 9, 1: 6})"
7,0.1,0.09,"[41, 161, 538, 539, 764, 765, 1320, 1425, 1439...",0.26,"[2, 0, 3, 5, 2, 2, 0, 3, 2, 3, 2, 2, 2, 3, 0, ...",0.32,0.12,43.0,"Counter({5: 26, 3: 24, 2: 23, 0: 22, 4: 21, 1:..."
8,0.1,0.04,"[102, 164, 249, 281, 354, 458, 538, 566, 716, ...",0.17,"[3, 0, 3, 0, 2, 3, 2, 5, 3, 2, 3, 5, 3, 5, 2, ...",0.47,0.15,86.0,"Counter({2: 36, 5: 25, 3: 19, 1: 17, 4: 16, 0:..."
9,0.1,0.16,"[0, 15, 41, 249, 320, 764, 765, 974, 1157, 132...",0.39,"[5, 0, 5, 3, 5, 0, 0, 0, 0, 5, 5, 0, 0, 5, 5, ...",0.48,0.16,51.0,"Counter({5: 48, 0: 35, 1: 13, 3: 12, 2: 11, 4:..."


adding 7 random population


100%|██████████| 31/31 [14:10<00:00, 27.42s/it]


TIME: GA: 14.168665905793508 min
TIME: Total: 14.578481117884317 min

##########  yeoh, (248, 12625)
*** Computing 1D feature ranking ...
Dispersion tests took 0.82 sec
Entropy computation 3.12 sec
KNN computation 23.87 sec
Sorting and thresholds 0.04 sec
Performing hierarchical clustering...
Hierarchical clustering 13.79 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 0.85 sec
Returning 1357 redundant features and  330 important features
TIME: 1d Features : 0.7086523572603861 min
TIME: 2d scores: 7.947285970052083e-09 min
*** Optimization algorithm 
Non redundant features 11268, orig size 12625, nb imp : 330
adding 50 random population


 10%|▉         | 3/31 [00:15<02:26,  5.25s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.6,"[0, 1, 3, 4, 9, 15, 19, 21, 23, 26, 31, 50, 51...",0.77,"[3, 3, 5, 5, 3, 3, 3, 5, 3, 3, 5, 5, 3, 3, 3, ...",0.46,0.25,90.0,"Counter({3: 63, 4: 50, 1: 44, 5: 35, 0: 29, 2:..."


adding 13 random population


 19%|█▉        | 6/31 [03:57<16:26, 39.45s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.6,"[0, 1, 3, 4, 9, 15, 19, 21, 23, 26, 31, 50, 51...",0.77,"[3, 3, 5, 5, 3, 3, 3, 5, 3, 3, 5, 5, 3, 3, 3, ...",0.46,0.25,90.0,"Counter({3: 63, 4: 50, 1: 44, 5: 35, 0: 29, 2:..."
1,0.08,0.4,"[1, 4, 78, 278, 311, 722, 771, 787, 991, 1105,...",0.57,"[2, 4, 2, 0, 0, 4, 0, 2, 0, 4, 2, 2, 4, 0, 0, ...",0.32,0.18,65.0,"Counter({4: 76, 2: 55, 0: 41, 1: 32, 3: 26, 5:..."


adding 8 random population


 29%|██▉       | 9/31 [06:52<14:50, 40.47s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.6,"[0, 1, 3, 4, 9, 15, 19, 21, 23, 26, 31, 50, 51...",0.77,"[3, 3, 5, 5, 3, 3, 3, 5, 3, 3, 5, 5, 3, 3, 3, ...",0.46,0.25,90.0,"Counter({3: 63, 4: 50, 1: 44, 5: 35, 0: 29, 2:..."
1,0.08,0.4,"[1, 4, 78, 278, 311, 722, 771, 787, 991, 1105,...",0.57,"[2, 4, 2, 0, 0, 4, 0, 2, 0, 4, 2, 2, 4, 0, 0, ...",0.32,0.18,65.0,"Counter({4: 76, 2: 55, 0: 41, 1: 32, 3: 26, 5:..."
2,0.09,0.49,"[0, 4, 15, 17, 26, 165, 466, 539, 587, 705, 77...",0.61,"[3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 5, 5, 3, 3, 3, ...",0.39,0.2,82.0,"Counter({4: 60, 1: 55, 3: 47, 2: 43, 5: 22, 0:..."


adding 8 random population


 39%|███▊      | 12/31 [08:32<09:22, 29.62s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.6,"[0, 1, 3, 4, 9, 15, 19, 21, 23, 26, 31, 50, 51...",0.77,"[3, 3, 5, 5, 3, 3, 3, 5, 3, 3, 5, 5, 3, 3, 3, ...",0.46,0.25,90.0,"Counter({3: 63, 4: 50, 1: 44, 5: 35, 0: 29, 2:..."
1,0.08,0.4,"[1, 4, 78, 278, 311, 722, 771, 787, 991, 1105,...",0.57,"[2, 4, 2, 0, 0, 4, 0, 2, 0, 4, 2, 2, 4, 0, 0, ...",0.32,0.18,65.0,"Counter({4: 76, 2: 55, 0: 41, 1: 32, 3: 26, 5:..."
2,0.09,0.49,"[0, 4, 15, 17, 26, 165, 466, 539, 587, 705, 77...",0.61,"[3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 5, 5, 3, 3, 3, ...",0.39,0.2,82.0,"Counter({4: 60, 1: 55, 3: 47, 2: 43, 5: 22, 0:..."
3,0.1,0.6,"[1, 3, 4, 9, 15, 18, 19, 26, 34, 51, 75, 78, 8...",0.77,"[3, 3, 5, 0, 3, 3, 3, 3, 3, 3, 5, 5, 3, 3, 3, ...",0.49,0.22,125.0,"Counter({3: 60, 4: 48, 1: 43, 5: 38, 0: 32, 2:..."


adding 5 random population


 48%|████▊     | 15/31 [13:35<14:57, 56.08s/it] 

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.6,"[0, 1, 3, 4, 9, 15, 19, 21, 23, 26, 31, 50, 51...",0.77,"[3, 3, 5, 5, 3, 3, 3, 5, 3, 3, 5, 5, 3, 3, 3, ...",0.46,0.25,90.0,"Counter({3: 63, 4: 50, 1: 44, 5: 35, 0: 29, 2:..."
1,0.08,0.4,"[1, 4, 78, 278, 311, 722, 771, 787, 991, 1105,...",0.57,"[2, 4, 2, 0, 0, 4, 0, 2, 0, 4, 2, 2, 4, 0, 0, ...",0.32,0.18,65.0,"Counter({4: 76, 2: 55, 0: 41, 1: 32, 3: 26, 5:..."
2,0.09,0.49,"[0, 4, 15, 17, 26, 165, 466, 539, 587, 705, 77...",0.61,"[3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 5, 5, 3, 3, 3, ...",0.39,0.2,82.0,"Counter({4: 60, 1: 55, 3: 47, 2: 43, 5: 22, 0:..."
3,0.1,0.6,"[1, 3, 4, 9, 15, 18, 19, 26, 34, 51, 75, 78, 8...",0.77,"[3, 3, 5, 0, 3, 3, 3, 3, 3, 3, 5, 5, 3, 3, 3, ...",0.49,0.22,125.0,"Counter({3: 60, 4: 48, 1: 43, 5: 38, 0: 32, 2:..."
4,0.1,0.31,"[3, 4, 15, 19, 71, 305, 306, 465, 539, 576, 77...",0.45,"[2, 0, 3, 3, 0, 0, 0, 3, 5, 5, 3, 3, 5, 0, 0, ...",0.32,0.15,41.0,"Counter({5: 63, 0: 57, 3: 43, 2: 41, 4: 31, 1:..."


adding 2 random population


 58%|█████▊    | 18/31 [14:29<06:08, 28.31s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.6,"[0, 1, 3, 4, 9, 15, 19, 21, 23, 26, 31, 50, 51...",0.77,"[3, 3, 5, 5, 3, 3, 3, 5, 3, 3, 5, 5, 3, 3, 3, ...",0.46,0.25,90.0,"Counter({3: 63, 4: 50, 1: 44, 5: 35, 0: 29, 2:..."
1,0.08,0.4,"[1, 4, 78, 278, 311, 722, 771, 787, 991, 1105,...",0.57,"[2, 4, 2, 0, 0, 4, 0, 2, 0, 4, 2, 2, 4, 0, 0, ...",0.32,0.18,65.0,"Counter({4: 76, 2: 55, 0: 41, 1: 32, 3: 26, 5:..."
2,0.09,0.49,"[0, 4, 15, 17, 26, 165, 466, 539, 587, 705, 77...",0.61,"[3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 5, 5, 3, 3, 3, ...",0.39,0.2,82.0,"Counter({4: 60, 1: 55, 3: 47, 2: 43, 5: 22, 0:..."
3,0.1,0.6,"[1, 3, 4, 9, 15, 18, 19, 26, 34, 51, 75, 78, 8...",0.77,"[3, 3, 5, 0, 3, 3, 3, 3, 3, 3, 5, 5, 3, 3, 3, ...",0.49,0.22,125.0,"Counter({3: 60, 4: 48, 1: 43, 5: 38, 0: 32, 2:..."
4,0.1,0.31,"[3, 4, 15, 19, 71, 305, 306, 465, 539, 576, 77...",0.45,"[2, 0, 3, 3, 0, 0, 0, 3, 5, 5, 3, 3, 5, 0, 0, ...",0.32,0.15,41.0,"Counter({5: 63, 0: 57, 3: 43, 2: 41, 4: 31, 1:..."
5,0.09,0.68,"[1, 3, 9, 14, 15, 18, 19, 21, 22, 23, 26, 31, ...",0.75,"[5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 5, 5, 5, ...",0.55,0.23,148.0,"Counter({0: 71, 5: 59, 1: 39, 2: 32, 3: 29, 4:..."


adding 5 random population


 68%|██████▊   | 21/31 [20:07<10:05, 60.60s/it] 

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.6,"[0, 1, 3, 4, 9, 15, 19, 21, 23, 26, 31, 50, 51...",0.77,"[3, 3, 5, 5, 3, 3, 3, 5, 3, 3, 5, 5, 3, 3, 3, ...",0.46,0.25,90.0,"Counter({3: 63, 4: 50, 1: 44, 5: 35, 0: 29, 2:..."
1,0.08,0.4,"[1, 4, 78, 278, 311, 722, 771, 787, 991, 1105,...",0.57,"[2, 4, 2, 0, 0, 4, 0, 2, 0, 4, 2, 2, 4, 0, 0, ...",0.32,0.18,65.0,"Counter({4: 76, 2: 55, 0: 41, 1: 32, 3: 26, 5:..."
2,0.09,0.49,"[0, 4, 15, 17, 26, 165, 466, 539, 587, 705, 77...",0.61,"[3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 5, 5, 3, 3, 3, ...",0.39,0.2,82.0,"Counter({4: 60, 1: 55, 3: 47, 2: 43, 5: 22, 0:..."
3,0.1,0.6,"[1, 3, 4, 9, 15, 18, 19, 26, 34, 51, 75, 78, 8...",0.77,"[3, 3, 5, 0, 3, 3, 3, 3, 3, 3, 5, 5, 3, 3, 3, ...",0.49,0.22,125.0,"Counter({3: 60, 4: 48, 1: 43, 5: 38, 0: 32, 2:..."
4,0.1,0.31,"[3, 4, 15, 19, 71, 305, 306, 465, 539, 576, 77...",0.45,"[2, 0, 3, 3, 0, 0, 0, 3, 5, 5, 3, 3, 5, 0, 0, ...",0.32,0.15,41.0,"Counter({5: 63, 0: 57, 3: 43, 2: 41, 4: 31, 1:..."
5,0.09,0.68,"[1, 3, 9, 14, 15, 18, 19, 21, 22, 23, 26, 31, ...",0.75,"[5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 5, 5, 5, ...",0.55,0.23,148.0,"Counter({0: 71, 5: 59, 1: 39, 2: 32, 3: 29, 4:..."
6,0.09,0.6,"[3, 19, 31, 70, 78, 265, 305, 465, 466, 520, 5...",0.67,"[5, 5, 3, 3, 3, 5, 5, 3, 3, 5, 3, 3, 5, 3, 3, ...",0.42,0.2,94.0,"Counter({0: 67, 3: 52, 5: 49, 2: 33, 1: 33, 4:..."


adding 3 random population


 77%|███████▋  | 24/31 [24:46<07:21, 63.11s/it] 

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.6,"[0, 1, 3, 4, 9, 15, 19, 21, 23, 26, 31, 50, 51...",0.77,"[3, 3, 5, 5, 3, 3, 3, 5, 3, 3, 5, 5, 3, 3, 3, ...",0.46,0.25,90.0,"Counter({3: 63, 4: 50, 1: 44, 5: 35, 0: 29, 2:..."
1,0.08,0.4,"[1, 4, 78, 278, 311, 722, 771, 787, 991, 1105,...",0.57,"[2, 4, 2, 0, 0, 4, 0, 2, 0, 4, 2, 2, 4, 0, 0, ...",0.32,0.18,65.0,"Counter({4: 76, 2: 55, 0: 41, 1: 32, 3: 26, 5:..."
2,0.09,0.49,"[0, 4, 15, 17, 26, 165, 466, 539, 587, 705, 77...",0.61,"[3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 5, 5, 3, 3, 3, ...",0.39,0.2,82.0,"Counter({4: 60, 1: 55, 3: 47, 2: 43, 5: 22, 0:..."
3,0.1,0.6,"[1, 3, 4, 9, 15, 18, 19, 26, 34, 51, 75, 78, 8...",0.77,"[3, 3, 5, 0, 3, 3, 3, 3, 3, 3, 5, 5, 3, 3, 3, ...",0.49,0.22,125.0,"Counter({3: 60, 4: 48, 1: 43, 5: 38, 0: 32, 2:..."
4,0.1,0.31,"[3, 4, 15, 19, 71, 305, 306, 465, 539, 576, 77...",0.45,"[2, 0, 3, 3, 0, 0, 0, 3, 5, 5, 3, 3, 5, 0, 0, ...",0.32,0.15,41.0,"Counter({5: 63, 0: 57, 3: 43, 2: 41, 4: 31, 1:..."
5,0.09,0.68,"[1, 3, 9, 14, 15, 18, 19, 21, 22, 23, 26, 31, ...",0.75,"[5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 5, 5, 5, ...",0.55,0.23,148.0,"Counter({0: 71, 5: 59, 1: 39, 2: 32, 3: 29, 4:..."
6,0.09,0.6,"[3, 19, 31, 70, 78, 265, 305, 465, 466, 520, 5...",0.67,"[5, 5, 3, 3, 3, 5, 5, 3, 3, 5, 3, 3, 5, 3, 3, ...",0.42,0.2,94.0,"Counter({0: 67, 3: 52, 5: 49, 2: 33, 1: 33, 4:..."
7,0.1,0.15,"[4, 19, 26, 129, 166, 278, 305, 325, 327, 378,...",0.31,"[5, 0, 2, 5, 5, 5, 5, 5, 5, 5, 2, 5, 5, 5, 5, ...",0.43,0.18,78.0,"Counter({5: 75, 0: 69, 2: 49, 3: 22, 1: 17, 4:..."


adding 5 random population


 87%|████████▋ | 27/31 [29:09<04:06, 61.62s/it] 

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.6,"[0, 1, 3, 4, 9, 15, 19, 21, 23, 26, 31, 50, 51...",0.77,"[3, 3, 5, 5, 3, 3, 3, 5, 3, 3, 5, 5, 3, 3, 3, ...",0.46,0.25,90.0,"Counter({3: 63, 4: 50, 1: 44, 5: 35, 0: 29, 2:..."
1,0.08,0.4,"[1, 4, 78, 278, 311, 722, 771, 787, 991, 1105,...",0.57,"[2, 4, 2, 0, 0, 4, 0, 2, 0, 4, 2, 2, 4, 0, 0, ...",0.32,0.18,65.0,"Counter({4: 76, 2: 55, 0: 41, 1: 32, 3: 26, 5:..."
2,0.09,0.49,"[0, 4, 15, 17, 26, 165, 466, 539, 587, 705, 77...",0.61,"[3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 5, 5, 3, 3, 3, ...",0.39,0.2,82.0,"Counter({4: 60, 1: 55, 3: 47, 2: 43, 5: 22, 0:..."
3,0.1,0.6,"[1, 3, 4, 9, 15, 18, 19, 26, 34, 51, 75, 78, 8...",0.77,"[3, 3, 5, 0, 3, 3, 3, 3, 3, 3, 5, 5, 3, 3, 3, ...",0.49,0.22,125.0,"Counter({3: 60, 4: 48, 1: 43, 5: 38, 0: 32, 2:..."
4,0.1,0.31,"[3, 4, 15, 19, 71, 305, 306, 465, 539, 576, 77...",0.45,"[2, 0, 3, 3, 0, 0, 0, 3, 5, 5, 3, 3, 5, 0, 0, ...",0.32,0.15,41.0,"Counter({5: 63, 0: 57, 3: 43, 2: 41, 4: 31, 1:..."
5,0.09,0.68,"[1, 3, 9, 14, 15, 18, 19, 21, 22, 23, 26, 31, ...",0.75,"[5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 5, 5, 5, ...",0.55,0.23,148.0,"Counter({0: 71, 5: 59, 1: 39, 2: 32, 3: 29, 4:..."
6,0.09,0.6,"[3, 19, 31, 70, 78, 265, 305, 465, 466, 520, 5...",0.67,"[5, 5, 3, 3, 3, 5, 5, 3, 3, 5, 3, 3, 5, 3, 3, ...",0.42,0.2,94.0,"Counter({0: 67, 3: 52, 5: 49, 2: 33, 1: 33, 4:..."
7,0.1,0.15,"[4, 19, 26, 129, 166, 278, 305, 325, 327, 378,...",0.31,"[5, 0, 2, 5, 5, 5, 5, 5, 5, 5, 2, 5, 5, 5, 5, ...",0.43,0.18,78.0,"Counter({5: 75, 0: 69, 2: 49, 3: 22, 1: 17, 4:..."
8,0.09,0.27,"[70, 71, 2361, 2774, 5325, 5953, 6413]",0.33,"[4, 5, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 5, 0, ...",0.13,0.17,7.0,"Counter({1: 76, 0: 67, 5: 35, 4: 33, 3: 29, 2:..."


adding 4 random population


 97%|█████████▋| 30/31 [29:52<00:28, 28.60s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.1,0.6,"[0, 1, 3, 4, 9, 15, 19, 21, 23, 26, 31, 50, 51...",0.77,"[3, 3, 5, 5, 3, 3, 3, 5, 3, 3, 5, 5, 3, 3, 3, ...",0.46,0.25,90.0,"Counter({3: 63, 4: 50, 1: 44, 5: 35, 0: 29, 2:..."
1,0.08,0.4,"[1, 4, 78, 278, 311, 722, 771, 787, 991, 1105,...",0.57,"[2, 4, 2, 0, 0, 4, 0, 2, 0, 4, 2, 2, 4, 0, 0, ...",0.32,0.18,65.0,"Counter({4: 76, 2: 55, 0: 41, 1: 32, 3: 26, 5:..."
2,0.09,0.49,"[0, 4, 15, 17, 26, 165, 466, 539, 587, 705, 77...",0.61,"[3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 5, 5, 3, 3, 3, ...",0.39,0.2,82.0,"Counter({4: 60, 1: 55, 3: 47, 2: 43, 5: 22, 0:..."
3,0.1,0.6,"[1, 3, 4, 9, 15, 18, 19, 26, 34, 51, 75, 78, 8...",0.77,"[3, 3, 5, 0, 3, 3, 3, 3, 3, 3, 5, 5, 3, 3, 3, ...",0.49,0.22,125.0,"Counter({3: 60, 4: 48, 1: 43, 5: 38, 0: 32, 2:..."
4,0.1,0.31,"[3, 4, 15, 19, 71, 305, 306, 465, 539, 576, 77...",0.45,"[2, 0, 3, 3, 0, 0, 0, 3, 5, 5, 3, 3, 5, 0, 0, ...",0.32,0.15,41.0,"Counter({5: 63, 0: 57, 3: 43, 2: 41, 4: 31, 1:..."
5,0.09,0.68,"[1, 3, 9, 14, 15, 18, 19, 21, 22, 23, 26, 31, ...",0.75,"[5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 2, 2, 5, 5, 5, ...",0.55,0.23,148.0,"Counter({0: 71, 5: 59, 1: 39, 2: 32, 3: 29, 4:..."
6,0.09,0.6,"[3, 19, 31, 70, 78, 265, 305, 465, 466, 520, 5...",0.67,"[5, 5, 3, 3, 3, 5, 5, 3, 3, 5, 3, 3, 5, 3, 3, ...",0.42,0.2,94.0,"Counter({0: 67, 3: 52, 5: 49, 2: 33, 1: 33, 4:..."
7,0.1,0.15,"[4, 19, 26, 129, 166, 278, 305, 325, 327, 378,...",0.31,"[5, 0, 2, 5, 5, 5, 5, 5, 5, 5, 2, 5, 5, 5, 5, ...",0.43,0.18,78.0,"Counter({5: 75, 0: 69, 2: 49, 3: 22, 1: 17, 4:..."
8,0.09,0.27,"[70, 71, 2361, 2774, 5325, 5953, 6413]",0.33,"[4, 5, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 5, 0, ...",0.13,0.17,7.0,"Counter({1: 76, 0: 67, 5: 35, 4: 33, 3: 29, 2:..."
9,0.09,0.67,"[0, 4, 9, 14, 15, 19, 21, 22, 26, 34, 51, 71, ...",0.71,"[4, 2, 3, 3, 2, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, ...",0.52,0.23,119.0,"Counter({0: 71, 2: 62, 3: 50, 1: 34, 4: 21, 5:..."


adding 4 random population


100%|██████████| 31/31 [33:20<00:00, 64.53s/it]


TIME: GA: 33.338376780351005 min
TIME: Total: 34.04702957868576 min

##########  tian, (173, 12625)
*** Computing 1D feature ranking ...
Dispersion tests took 0.59 sec
Entropy computation 2.59 sec
KNN computation 17.15 sec
Sorting and thresholds 0.04 sec
Performing hierarchical clustering...
Hierarchical clustering 10.16 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 0.92 sec
Returning 686 redundant features and  469 important features
TIME: 1d Features : 0.5244625488917033 min
TIME: 2d scores: 7.947285970052083e-09 min
*** Optimization algorithm 
Non redundant features 11939, orig size 12625, nb imp : 469
adding 50 random population


 10%|▉         | 3/31 [00:09<01:26,  3.09s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,-0.04,"[87, 5992, 9336, 12493]",0.01,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",0.16,0.43,4.0,"Counter({1: 126, 0: 47})"


adding 1 random population


 19%|█▉        | 6/31 [00:26<01:49,  4.39s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,-0.04,"[87, 5992, 9336, 12493]",0.01,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",0.16,0.43,4.0,"Counter({1: 126, 0: 47})"
1,0.17,0.01,"[831, 4452, 11260]",0.0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, ...",0.17,0.49,3.0,"Counter({0: 137, 1: 36})"


adding 3 random population


 29%|██▉       | 9/31 [00:42<01:44,  4.77s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,-0.04,"[87, 5992, 9336, 12493]",0.01,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",0.16,0.43,4.0,"Counter({1: 126, 0: 47})"
1,0.17,0.01,"[831, 4452, 11260]",0.0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, ...",0.17,0.49,3.0,"Counter({0: 137, 1: 36})"
2,0.19,0.04,"[3872, 6261, 6271]",0.01,"[1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, ...",0.14,0.46,3.0,"Counter({0: 114, 1: 59})"


adding 1 random population


 39%|███▊      | 12/31 [01:00<01:39,  5.22s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,-0.04,"[87, 5992, 9336, 12493]",0.01,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",0.16,0.43,4.0,"Counter({1: 126, 0: 47})"
1,0.17,0.01,"[831, 4452, 11260]",0.0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, ...",0.17,0.49,3.0,"Counter({0: 137, 1: 36})"
2,0.19,0.04,"[3872, 6261, 6271]",0.01,"[1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, ...",0.14,0.46,3.0,"Counter({0: 114, 1: 59})"
3,0.2,0.03,"[7277, 8864]",0.0,"[1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, ...",0.12,0.53,2.0,"Counter({1: 126, 0: 47})"


adding 1 random population


 48%|████▊     | 15/31 [01:19<01:30,  5.66s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,-0.04,"[87, 5992, 9336, 12493]",0.01,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",0.16,0.43,4.0,"Counter({1: 126, 0: 47})"
1,0.17,0.01,"[831, 4452, 11260]",0.0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, ...",0.17,0.49,3.0,"Counter({0: 137, 1: 36})"
2,0.19,0.04,"[3872, 6261, 6271]",0.01,"[1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, ...",0.14,0.46,3.0,"Counter({0: 114, 1: 59})"
3,0.2,0.03,"[7277, 8864]",0.0,"[1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, ...",0.12,0.53,2.0,"Counter({1: 126, 0: 47})"
4,0.19,-0.04,"[6293, 6908, 8502, 11619]",0.01,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, ...",0.15,0.44,4.0,"Counter({0: 122, 1: 51})"


adding 2 random population


 58%|█████▊    | 18/31 [01:37<01:13,  5.65s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,-0.04,"[87, 5992, 9336, 12493]",0.01,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",0.16,0.43,4.0,"Counter({1: 126, 0: 47})"
1,0.17,0.01,"[831, 4452, 11260]",0.0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, ...",0.17,0.49,3.0,"Counter({0: 137, 1: 36})"
2,0.19,0.04,"[3872, 6261, 6271]",0.01,"[1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, ...",0.14,0.46,3.0,"Counter({0: 114, 1: 59})"
3,0.2,0.03,"[7277, 8864]",0.0,"[1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, ...",0.12,0.53,2.0,"Counter({1: 126, 0: 47})"
4,0.19,-0.04,"[6293, 6908, 8502, 11619]",0.01,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, ...",0.15,0.44,4.0,"Counter({0: 122, 1: 51})"
5,0.2,0.08,"[5324, 8375]",0.03,"[1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, ...",0.13,0.53,2.0,"Counter({0: 117, 1: 56})"


adding 4 random population


 68%|██████▊   | 21/31 [01:56<00:56,  5.65s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,-0.04,"[87, 5992, 9336, 12493]",0.01,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",0.16,0.43,4.0,"Counter({1: 126, 0: 47})"
1,0.17,0.01,"[831, 4452, 11260]",0.0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, ...",0.17,0.49,3.0,"Counter({0: 137, 1: 36})"
2,0.19,0.04,"[3872, 6261, 6271]",0.01,"[1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, ...",0.14,0.46,3.0,"Counter({0: 114, 1: 59})"
3,0.2,0.03,"[7277, 8864]",0.0,"[1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, ...",0.12,0.53,2.0,"Counter({1: 126, 0: 47})"
4,0.19,-0.04,"[6293, 6908, 8502, 11619]",0.01,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, ...",0.15,0.44,4.0,"Counter({0: 122, 1: 51})"
5,0.2,0.08,"[5324, 8375]",0.03,"[1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, ...",0.13,0.53,2.0,"Counter({0: 117, 1: 56})"
6,0.18,-0.02,"[409, 512, 1674, 1675, 7609]",0.01,"[0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, ...",0.15,0.38,5.0,"Counter({0: 105, 1: 68})"


adding 5 random population


 77%|███████▋  | 24/31 [02:17<00:42,  6.07s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,-0.04,"[87, 5992, 9336, 12493]",0.01,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",0.16,0.43,4.0,"Counter({1: 126, 0: 47})"
1,0.17,0.01,"[831, 4452, 11260]",0.0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, ...",0.17,0.49,3.0,"Counter({0: 137, 1: 36})"
2,0.19,0.04,"[3872, 6261, 6271]",0.01,"[1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, ...",0.14,0.46,3.0,"Counter({0: 114, 1: 59})"
3,0.2,0.03,"[7277, 8864]",0.0,"[1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, ...",0.12,0.53,2.0,"Counter({1: 126, 0: 47})"
4,0.19,-0.04,"[6293, 6908, 8502, 11619]",0.01,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, ...",0.15,0.44,4.0,"Counter({0: 122, 1: 51})"
5,0.2,0.08,"[5324, 8375]",0.03,"[1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, ...",0.13,0.53,2.0,"Counter({0: 117, 1: 56})"
6,0.18,-0.02,"[409, 512, 1674, 1675, 7609]",0.01,"[0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, ...",0.15,0.38,5.0,"Counter({0: 105, 1: 68})"
7,0.2,-0.0,"[405, 1744, 2242, 2243, 4990]",0.0,"[1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, ...",0.2,0.46,5.0,"Counter({1: 121, 0: 52})"


adding 4 random population


 87%|████████▋ | 27/31 [02:38<00:24,  6.05s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,-0.04,"[87, 5992, 9336, 12493]",0.01,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",0.16,0.43,4.0,"Counter({1: 126, 0: 47})"
1,0.17,0.01,"[831, 4452, 11260]",0.0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, ...",0.17,0.49,3.0,"Counter({0: 137, 1: 36})"
2,0.19,0.04,"[3872, 6261, 6271]",0.01,"[1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, ...",0.14,0.46,3.0,"Counter({0: 114, 1: 59})"
3,0.2,0.03,"[7277, 8864]",0.0,"[1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, ...",0.12,0.53,2.0,"Counter({1: 126, 0: 47})"
4,0.19,-0.04,"[6293, 6908, 8502, 11619]",0.01,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, ...",0.15,0.44,4.0,"Counter({0: 122, 1: 51})"
5,0.2,0.08,"[5324, 8375]",0.03,"[1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, ...",0.13,0.53,2.0,"Counter({0: 117, 1: 56})"
6,0.18,-0.02,"[409, 512, 1674, 1675, 7609]",0.01,"[0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, ...",0.15,0.38,5.0,"Counter({0: 105, 1: 68})"
7,0.2,-0.0,"[405, 1744, 2242, 2243, 4990]",0.0,"[1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, ...",0.2,0.46,5.0,"Counter({1: 121, 0: 52})"
8,0.19,0.01,"[1027, 1345, 1590, 1727, 1933, 2006, 2162, 397...",0.0,"[1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, ...",0.35,0.34,34.0,"Counter({0: 114, 1: 59})"


adding 20 random population


 97%|█████████▋| 30/31 [03:25<00:09,  9.94s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,-0.04,"[87, 5992, 9336, 12493]",0.01,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",0.16,0.43,4.0,"Counter({1: 126, 0: 47})"
1,0.17,0.01,"[831, 4452, 11260]",0.0,"[0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, ...",0.17,0.49,3.0,"Counter({0: 137, 1: 36})"
2,0.19,0.04,"[3872, 6261, 6271]",0.01,"[1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, ...",0.14,0.46,3.0,"Counter({0: 114, 1: 59})"
3,0.2,0.03,"[7277, 8864]",0.0,"[1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, ...",0.12,0.53,2.0,"Counter({1: 126, 0: 47})"
4,0.19,-0.04,"[6293, 6908, 8502, 11619]",0.01,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, ...",0.15,0.44,4.0,"Counter({0: 122, 1: 51})"
5,0.2,0.08,"[5324, 8375]",0.03,"[1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, ...",0.13,0.53,2.0,"Counter({0: 117, 1: 56})"
6,0.18,-0.02,"[409, 512, 1674, 1675, 7609]",0.01,"[0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, ...",0.15,0.38,5.0,"Counter({0: 105, 1: 68})"
7,0.2,-0.0,"[405, 1744, 2242, 2243, 4990]",0.0,"[1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, ...",0.2,0.46,5.0,"Counter({1: 121, 0: 52})"
8,0.19,0.01,"[1027, 1345, 1590, 1727, 1933, 2006, 2162, 397...",0.0,"[1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, ...",0.35,0.34,34.0,"Counter({0: 114, 1: 59})"
9,0.2,0.03,"[851, 3136]",0.02,"[0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, ...",0.12,0.5,2.0,"Counter({0: 95, 1: 78})"


adding 7 random population


100%|██████████| 31/31 [03:36<00:00,  6.97s/it]


TIME: GA: 3.6023046493530275 min
TIME: Total: 4.126767635345459 min


In [21]:
results = pd.read_excel("../reports/microarray_gmm_adapted_ratkowsky_lance.xlsx")

max_ari = results.groupby("dataset_name").agg({"ari": max}).reset_index()

pd.merge(results[["dataset_name", "ari", "silhouette"]], 
         max_ari, on = ["dataset_name", "ari"]).groupby("dataset_name").max()

Unnamed: 0_level_0,ari,silhouette
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1
alon,0.15,0.44
borovecki,0.54,0.52
chiaretti,0.16,0.16
christensen,1.0,0.63
golub,0.42,0.52
gordon,0.86,0.6
gravier,0.11,0.57
khan,0.65,0.33
singh,0.05,0.66
sorlie,0.46,0.2


In [4]:
results = None
filenames = np.array([
    'alon', 'borovecki', 'chiaretti', 'christensen', 'golub', 'gordon',
       'khan', 'sorlie', 'su', 'yeoh', 'west'])
clustering = "hdbscan"
path = '../data/microarray/'
method = "adapted_ratkowsky_lance"
imp_f = np.arange(20)
for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values


    n_clusters = len(np.unique(truth))

    z_file= f"../data/microarray/Z_{name}_correlation.npy"
    print(f"\n##########  {name}, {data.shape}")

    # Clustering 1D
    meta_features = feature_ranking.rank_features(data,
                                              nb_bins=20,
                                              rank_threshold=90,
                                              z_file=z_file,
                                              metric='correlation',
                                              redundant_threshold=0.4)
    t2 = time.time()
    print(f"TIME: 1d Features : {(t2-t1)/60} min")

    t3 = time.time()
    t4 = time.time()
    print(f"TIME: 2d scores: {(t4-t3)/60} min")
    round_size = 3
    epochs = 10*round_size

    sampling = {
    "ARCHIVE2D": { 
        "ga": 0,
        "max": 0 },
    "CLOSE": { 
        "ga": 0.35,
        "max": 0.35 },
    "IMP1D": { 
        "ga": 0.35,
        "max": 0.35 },
    "RANDOM": { 
        "ga": 0.3,
        "max": 0.3},
    }

    params = ga.ga_parameters(
        n_clusters,
        data.shape[1],
        truth,
        meta_features,
        method=method,
        truth_methods=['ari'],
        archive_2d=None,#population[:data.shape[1] // 4],
        epochs=epochs,
        sampling = sampling,
        round_size=round_size,
        allow_subspace_overlap = True,
        improvement_per_mutation_report = False,
        clustering = clustering
        
    )
    solutions, archive= ga.run(data, params)
#     display(params["report"].groupby(["op", "improvement"]).count())
    solutions["dataset_name"] = name
    
    t5 = time.time()
    n_total = t5-t1
    print(f"TIME: GA: {(t5-t4)/60} min")
    print(f"TIME: Total: {(t5-t1)/60} min")
    solutions["total_time"] = round((t5-t1)/60, 1)
    solutions["t(feature_sel)"] = round((t2-t1)/60, 1)
    solutions["t(cnn)"] = round((t3-t2)/60, 1)
    solutions["t(clustering2d)"] = round((t4-t3)/60, 1)
    solutions["t(ga)"] = round((t5-t4)/60, 1)
    
    solutions["n_ga"] = archive.shape[0]
    solutions["n_cnn"] = n_total
    solutions["input_size"] = data.shape[1]
    
    
    solutions["n_non_redundant"] = meta_features[(meta_features["redundant"] ==1) ].shape[0]
    solutions["n_imp"] = meta_features[(meta_features["relevance"] !=0)].shape[0]
    solutions["n_imp4"] = meta_features[(meta_features["relevance"] ==4)].shape[0]
    solutions["n_imp3"] = meta_features[(meta_features["relevance"] ==3)].shape[0]
    
    
    if results is None: 
        results = solutions
    else:
        results = pd.concat([results, solutions], ignore_index = True)
    results.to_excel(f"../reports/microarray_{clustering}_{method}.xlsx")
results.groupby("dataset_name").agg({"ari": max})


##########  west, (49, 6858)
*** Computing 1D feature ranking ...
Dispersion tests took 0.19 sec
Entropy computation 0.87 sec
KNN computation 1.43 sec
Sorting and thresholds 0.11 sec
Loading clustering from file
Hierarchical clustering 0.02 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 1.13 sec
Returning 1530 redundant features and  198 important features
TIME: 1d Features : 0.0629282792409261 min
TIME: 2d scores: 3.973642985026042e-09 min
*** Optimization algorithm 
Non redundant features 5328, orig size 6858, nb imp : 198
adding 50 random population


 10%|▉         | 3/31 [00:05<00:50,  1.82s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,0.06,"[4062, 4761]",0.08,"[-1, -1, 1, -1, 1, 1, 0, 0, 1, 1, 1, 0, -1, 1,...",0.01,0.58,2.0,"Counter({1: 19, -1: 16, 0: 14})"


adding 2 random population


 19%|█▉        | 6/31 [00:13<00:54,  2.20s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,0.06,"[4062, 4761]",0.08,"[-1, -1, 1, -1, 1, 1, 0, 0, 1, 1, 1, 0, -1, 1,...",0.01,0.58,2.0,"Counter({1: 19, -1: 16, 0: 14})"
1,0.12,-0.02,"[24, 383, 450, 947, 1115, 1219, 1419, 1443, 19...",0.01,"[0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0...",-0.19,0.26,27.0,"Counter({0: 23, 1: 14, -1: 12})"


adding 8 random population


 29%|██▉       | 9/31 [00:27<01:11,  3.25s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,0.06,"[4062, 4761]",0.08,"[-1, -1, 1, -1, 1, 1, 0, 0, 1, 1, 1, 0, -1, 1,...",0.01,0.58,2.0,"Counter({1: 19, -1: 16, 0: 14})"
1,0.12,-0.02,"[24, 383, 450, 947, 1115, 1219, 1419, 1443, 19...",0.01,"[0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0...",-0.19,0.26,27.0,"Counter({0: 23, 1: 14, -1: 12})"
2,0.16,0.01,"[42, 291, 947, 1119, 1248, 1402, 1419, 2041, 2...",0.04,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.07,0.37,21.0,"Counter({0: 30, 1: 13, -1: 6})"


adding 1 random population


 39%|███▊      | 12/31 [00:40<01:03,  3.35s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,0.06,"[4062, 4761]",0.08,"[-1, -1, 1, -1, 1, 1, 0, 0, 1, 1, 1, 0, -1, 1,...",0.01,0.58,2.0,"Counter({1: 19, -1: 16, 0: 14})"
1,0.12,-0.02,"[24, 383, 450, 947, 1115, 1219, 1419, 1443, 19...",0.01,"[0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0...",-0.19,0.26,27.0,"Counter({0: 23, 1: 14, -1: 12})"
2,0.16,0.01,"[42, 291, 947, 1119, 1248, 1402, 1419, 2041, 2...",0.04,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.07,0.37,21.0,"Counter({0: 30, 1: 13, -1: 6})"
3,0.09,-0.01,"[126, 553]",0.01,"[-1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, -1, ...",0.02,0.45,2.0,"Counter({0: 24, -1: 13, 1: 12})"


adding 1 random population


 48%|████▊     | 15/31 [00:47<00:42,  2.65s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,0.06,"[4062, 4761]",0.08,"[-1, -1, 1, -1, 1, 1, 0, 0, 1, 1, 1, 0, -1, 1,...",0.01,0.58,2.0,"Counter({1: 19, -1: 16, 0: 14})"
1,0.12,-0.02,"[24, 383, 450, 947, 1115, 1219, 1419, 1443, 19...",0.01,"[0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0...",-0.19,0.26,27.0,"Counter({0: 23, 1: 14, -1: 12})"
2,0.16,0.01,"[42, 291, 947, 1119, 1248, 1402, 1419, 2041, 2...",0.04,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.07,0.37,21.0,"Counter({0: 30, 1: 13, -1: 6})"
3,0.09,-0.01,"[126, 553]",0.01,"[-1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, -1, ...",0.02,0.45,2.0,"Counter({0: 24, -1: 13, 1: 12})"
4,0.21,0.01,"[2694, 3401, 3580, 4694, 5536, 6779]",0.03,"[0, 0, 0, 1, 1, 1, -1, -1, 0, 0, -1, 1, 1, 1, ...",0.11,0.61,6.0,"Counter({0: 31, 1: 13, -1: 5})"


 58%|█████▊    | 18/31 [00:56<00:33,  2.59s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,0.06,"[4062, 4761]",0.08,"[-1, -1, 1, -1, 1, 1, 0, 0, 1, 1, 1, 0, -1, 1,...",0.01,0.58,2.0,"Counter({1: 19, -1: 16, 0: 14})"
1,0.12,-0.02,"[24, 383, 450, 947, 1115, 1219, 1419, 1443, 19...",0.01,"[0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0...",-0.19,0.26,27.0,"Counter({0: 23, 1: 14, -1: 12})"
2,0.16,0.01,"[42, 291, 947, 1119, 1248, 1402, 1419, 2041, 2...",0.04,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.07,0.37,21.0,"Counter({0: 30, 1: 13, -1: 6})"
3,0.09,-0.01,"[126, 553]",0.01,"[-1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, -1, ...",0.02,0.45,2.0,"Counter({0: 24, -1: 13, 1: 12})"
4,0.21,0.01,"[2694, 3401, 3580, 4694, 5536, 6779]",0.03,"[0, 0, 0, 1, 1, 1, -1, -1, 0, 0, -1, 1, 1, 1, ...",0.11,0.61,6.0,"Counter({0: 31, 1: 13, -1: 5})"
5,0.11,-0.02,"[1269, 5593, 6515]",0.0,"[0, 0, 0, 0, 0, 0, 0, 1, -1, 0, -1, 0, -1, 1, ...",-0.01,0.36,3.0,"Counter({-1: 23, 0: 15, 1: 11})"


adding 2 random population


 68%|██████▊   | 21/31 [01:05<00:25,  2.56s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,0.06,"[4062, 4761]",0.08,"[-1, -1, 1, -1, 1, 1, 0, 0, 1, 1, 1, 0, -1, 1,...",0.01,0.58,2.0,"Counter({1: 19, -1: 16, 0: 14})"
1,0.12,-0.02,"[24, 383, 450, 947, 1115, 1219, 1419, 1443, 19...",0.01,"[0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0...",-0.19,0.26,27.0,"Counter({0: 23, 1: 14, -1: 12})"
2,0.16,0.01,"[42, 291, 947, 1119, 1248, 1402, 1419, 2041, 2...",0.04,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.07,0.37,21.0,"Counter({0: 30, 1: 13, -1: 6})"
3,0.09,-0.01,"[126, 553]",0.01,"[-1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, -1, ...",0.02,0.45,2.0,"Counter({0: 24, -1: 13, 1: 12})"
4,0.21,0.01,"[2694, 3401, 3580, 4694, 5536, 6779]",0.03,"[0, 0, 0, 1, 1, 1, -1, -1, 0, 0, -1, 1, 1, 1, ...",0.11,0.61,6.0,"Counter({0: 31, 1: 13, -1: 5})"
5,0.11,-0.02,"[1269, 5593, 6515]",0.0,"[0, 0, 0, 0, 0, 0, 0, 1, -1, 0, -1, 0, -1, 1, ...",-0.01,0.36,3.0,"Counter({-1: 23, 0: 15, 1: 11})"
6,0.16,0.01,"[516, 2094, 2529]",0.02,"[0, 0, 0, 0, 0, 0, -1, -1, -1, 1, -1, 0, -1, 0...",0.05,0.58,3.0,"Counter({0: 23, 1: 16, -1: 10})"


adding 5 random population


 77%|███████▋  | 24/31 [01:13<00:17,  2.45s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,0.06,"[4062, 4761]",0.08,"[-1, -1, 1, -1, 1, 1, 0, 0, 1, 1, 1, 0, -1, 1,...",0.01,0.58,2.0,"Counter({1: 19, -1: 16, 0: 14})"
1,0.12,-0.02,"[24, 383, 450, 947, 1115, 1219, 1419, 1443, 19...",0.01,"[0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0...",-0.19,0.26,27.0,"Counter({0: 23, 1: 14, -1: 12})"
2,0.16,0.01,"[42, 291, 947, 1119, 1248, 1402, 1419, 2041, 2...",0.04,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.07,0.37,21.0,"Counter({0: 30, 1: 13, -1: 6})"
3,0.09,-0.01,"[126, 553]",0.01,"[-1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, -1, ...",0.02,0.45,2.0,"Counter({0: 24, -1: 13, 1: 12})"
4,0.21,0.01,"[2694, 3401, 3580, 4694, 5536, 6779]",0.03,"[0, 0, 0, 1, 1, 1, -1, -1, 0, 0, -1, 1, 1, 1, ...",0.11,0.61,6.0,"Counter({0: 31, 1: 13, -1: 5})"
5,0.11,-0.02,"[1269, 5593, 6515]",0.0,"[0, 0, 0, 0, 0, 0, 0, 1, -1, 0, -1, 0, -1, 1, ...",-0.01,0.36,3.0,"Counter({-1: 23, 0: 15, 1: 11})"
6,0.16,0.01,"[516, 2094, 2529]",0.02,"[0, 0, 0, 0, 0, 0, -1, -1, -1, 1, -1, 0, -1, 0...",0.05,0.58,3.0,"Counter({0: 23, 1: 16, -1: 10})"
7,0.15,-0.02,"[3395, 4726, 5179, 5356, 5400, 5401]",0.0,"[-1, -1, 0, 0, 1, 1, -1, -1, 1, 1, 1, 0, 0, 1,...",0.01,0.37,6.0,"Counter({1: 25, 0: 13, -1: 11})"


adding 3 random population


 87%|████████▋ | 27/31 [01:21<00:09,  2.49s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,0.06,"[4062, 4761]",0.08,"[-1, -1, 1, -1, 1, 1, 0, 0, 1, 1, 1, 0, -1, 1,...",0.01,0.58,2.0,"Counter({1: 19, -1: 16, 0: 14})"
1,0.12,-0.02,"[24, 383, 450, 947, 1115, 1219, 1419, 1443, 19...",0.01,"[0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0...",-0.19,0.26,27.0,"Counter({0: 23, 1: 14, -1: 12})"
2,0.16,0.01,"[42, 291, 947, 1119, 1248, 1402, 1419, 2041, 2...",0.04,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.07,0.37,21.0,"Counter({0: 30, 1: 13, -1: 6})"
3,0.09,-0.01,"[126, 553]",0.01,"[-1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, -1, ...",0.02,0.45,2.0,"Counter({0: 24, -1: 13, 1: 12})"
4,0.21,0.01,"[2694, 3401, 3580, 4694, 5536, 6779]",0.03,"[0, 0, 0, 1, 1, 1, -1, -1, 0, 0, -1, 1, 1, 1, ...",0.11,0.61,6.0,"Counter({0: 31, 1: 13, -1: 5})"
5,0.11,-0.02,"[1269, 5593, 6515]",0.0,"[0, 0, 0, 0, 0, 0, 0, 1, -1, 0, -1, 0, -1, 1, ...",-0.01,0.36,3.0,"Counter({-1: 23, 0: 15, 1: 11})"
6,0.16,0.01,"[516, 2094, 2529]",0.02,"[0, 0, 0, 0, 0, 0, -1, -1, -1, 1, -1, 0, -1, 0...",0.05,0.58,3.0,"Counter({0: 23, 1: 16, -1: 10})"
7,0.15,-0.02,"[3395, 4726, 5179, 5356, 5400, 5401]",0.0,"[-1, -1, 0, 0, 1, 1, -1, -1, 1, 1, 1, 0, 0, 1,...",0.01,0.37,6.0,"Counter({1: 25, 0: 13, -1: 11})"
8,0.12,0.03,"[2772, 3462, 3673]",0.04,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...",0.03,0.45,3.0,"Counter({0: 27, 1: 12, -1: 10})"


adding 14 random population


 97%|█████████▋| 30/31 [01:30<00:02,  2.47s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,0.06,"[4062, 4761]",0.08,"[-1, -1, 1, -1, 1, 1, 0, 0, 1, 1, 1, 0, -1, 1,...",0.01,0.58,2.0,"Counter({1: 19, -1: 16, 0: 14})"
1,0.12,-0.02,"[24, 383, 450, 947, 1115, 1219, 1419, 1443, 19...",0.01,"[0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0...",-0.19,0.26,27.0,"Counter({0: 23, 1: 14, -1: 12})"
2,0.16,0.01,"[42, 291, 947, 1119, 1248, 1402, 1419, 2041, 2...",0.04,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.07,0.37,21.0,"Counter({0: 30, 1: 13, -1: 6})"
3,0.09,-0.01,"[126, 553]",0.01,"[-1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, -1, ...",0.02,0.45,2.0,"Counter({0: 24, -1: 13, 1: 12})"
4,0.21,0.01,"[2694, 3401, 3580, 4694, 5536, 6779]",0.03,"[0, 0, 0, 1, 1, 1, -1, -1, 0, 0, -1, 1, 1, 1, ...",0.11,0.61,6.0,"Counter({0: 31, 1: 13, -1: 5})"
5,0.11,-0.02,"[1269, 5593, 6515]",0.0,"[0, 0, 0, 0, 0, 0, 0, 1, -1, 0, -1, 0, -1, 1, ...",-0.01,0.36,3.0,"Counter({-1: 23, 0: 15, 1: 11})"
6,0.16,0.01,"[516, 2094, 2529]",0.02,"[0, 0, 0, 0, 0, 0, -1, -1, -1, 1, -1, 0, -1, 0...",0.05,0.58,3.0,"Counter({0: 23, 1: 16, -1: 10})"
7,0.15,-0.02,"[3395, 4726, 5179, 5356, 5400, 5401]",0.0,"[-1, -1, 0, 0, 1, 1, -1, -1, 1, 1, 1, 0, 0, 1,...",0.01,0.37,6.0,"Counter({1: 25, 0: 13, -1: 11})"
8,0.12,0.03,"[2772, 3462, 3673]",0.04,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...",0.03,0.45,3.0,"Counter({0: 27, 1: 12, -1: 10})"
9,0.16,-0.02,"[745, 6514]",0.0,"[0, 0, 0, 1, 1, 1, 0, 0, 1, -1, -1, 1, 1, 0, -...",0.06,0.56,2.0,"Counter({1: 25, 0: 15, -1: 9})"


adding 6 random population


100%|██████████| 31/31 [01:35<00:00,  3.07s/it]


TIME: GA: 1.5861598571141562 min
TIME: Total: 1.6490885535875957 min


Unnamed: 0_level_0,ari
dataset_name,Unnamed: 1_level_1
alon,0.16
borovecki,0.0
chiaretti,0.16
christensen,0.82
golub,0.27
gordon,0.27
khan,0.34
sorlie,0.13
su,0.37
west,0.06


In [5]:
results = pd.read_excel("../reports/microarray_hdbscan_adapted_ratkowsky_lance.xlsx")

max_ari = results.groupby("dataset_name").agg({"ari": max}).reset_index()

pd.merge(results[["dataset_name", "ari", "silhouette"]], 
         max_ari, on = ["dataset_name", "ari"]).groupby("dataset_name").max()

Unnamed: 0_level_0,ari,silhouette
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1
alon,0.16,0.31
borovecki,0.0,0.0
chiaretti,0.16,0.25
christensen,0.82,0.73
golub,0.27,0.2
gordon,0.27,0.11
khan,0.34,0.41
sorlie,0.13,0.03
su,0.37,0.37
west,0.06,0.58


In [16]:
results = None
filenames = np.array(['alon', 'borovecki', 'chiaretti', 'christensen', 'golub', 'gordon',
       'khan', 'sorlie', 'su', 'yeoh'])
clustering = "leiden"
path = '../data/microarray/'
method = "adapted_ratkowsky_lance"
imp_f = np.arange(20)
for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values


    n_clusters = len(np.unique(truth))

    z_file= f"../data/microarray/Z_{name}_correlation.npy"
    print(f"\n##########  {name}, {data.shape}")

    # Clustering 1D
    meta_features = feature_ranking.rank_features(data,
                                              nb_bins=20,
                                              rank_threshold=90,
                                              z_file=z_file,
                                              metric='correlation',
                                              redundant_threshold=0.4)
    t2 = time.time()
    print(f"TIME: 1d Features : {(t2-t1)/60} min")

    t3 = time.time()
    t4 = time.time()
    print(f"TIME: 2d scores: {(t4-t3)/60} min")
    round_size = 3
    epochs = 10*round_size

    sampling = {
    "ARCHIVE2D": { 
        "ga": 0,
        "max": 0 },
    "CLOSE": { 
        "ga": 0.35,
        "max": 0.35 },
    "IMP1D": { 
        "ga": 0.35,
        "max": 0.35 },
    "RANDOM": { 
        "ga": 0.3,
        "max": 0.3},
    }
#     sampling = {
#         "ARCHIVE2D": { 
#             "ga": 0.25,
#             "max": 0.25 },
#         "CLOSE": { 
#             "ga": 0.4,
#             "max": 0.4 },
#         "IMP1D": { 
#             "ga": 0.25,
#             "max": 0.25 },
#         "RANDOM": { 
#             "ga": 0.1,
#             "max": 0.1},
#         }
    params = ga.ga_parameters(
        n_clusters,
        data.shape[1],
        truth,
        meta_features,
        method=method,
        truth_methods=['ari'],
        archive_2d=None,#population[:data.shape[1] // 4],
        epochs=epochs,
        sampling = sampling,
        round_size=round_size,
        allow_subspace_overlap = True,
        improvement_per_mutation_report = False,
        clustering = clustering
        
    )
    solutions, archive= ga.run(data, params)
#     display(params["report"].groupby(["op", "improvement"]).count())
    solutions["dataset_name"] = name
    
    t5 = time.time()
    n_total = t5-t1
    print(f"TIME: GA: {(t5-t4)/60} min")
    print(f"TIME: Total: {(t5-t1)/60} min")
    solutions["total_time"] = round((t5-t1)/60, 1)
    solutions["t(feature_sel)"] = round((t2-t1)/60, 1)
    solutions["t(cnn)"] = round((t3-t2)/60, 1)
    solutions["t(clustering2d)"] = round((t4-t3)/60, 1)
    solutions["t(ga)"] = round((t5-t4)/60, 1)
    
    solutions["n_ga"] = archive.shape[0]
    solutions["n_cnn"] = n_total
    solutions["input_size"] = data.shape[1]
    
    
    solutions["n_non_redundant"] = meta_features[(meta_features["redundant"] ==1) ].shape[0]
    solutions["n_imp"] = meta_features[(meta_features["relevance"] !=0)].shape[0]
    solutions["n_imp4"] = meta_features[(meta_features["relevance"] ==4)].shape[0]
    solutions["n_imp3"] = meta_features[(meta_features["relevance"] ==3)].shape[0]
    
    
    if results is None: 
        results = solutions
    else:
        results = pd.concat([results, solutions], ignore_index = True)
    results.to_excel("../reports/microarray_leiden.xlsx")
    


##########  alon, (62, 1991)
*** Computing 1D feature ranking ...
Dispersion tests took 0.01 sec
Entropy computation 0.3 sec
KNN computation 0.13 sec
Sorting and thresholds 0.01 sec
Performing hierarchical clustering...


  0%|          | 0/31 [00:00<?, ?it/s]

Hierarchical clustering 0.1 sec
Handle redundant features 0.15 sec
Returning 1520 redundant features and  121 important features
TIME: 1d Features : 0.011996614933013915 min
TIME: 2d scores: 3.973642985026042e-09 min
*** Optimization algorithm 
Non redundant features 471, orig size 1991, nb imp : 121
adding 50 random population


 10%|▉         | 3/31 [00:24<03:02,  6.50s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.18,-0.01,"[22, 80, 101, 108, 116, 156, 171, 213, 215, 24...",0.0,"[0, 1, 0, 1, 2, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, ...",0.45,0.21,58.0,"Counter({0: 25, 1: 23, 2: 14})"


adding 17 random population


 19%|█▉        | 6/31 [00:48<02:40,  6.42s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.18,-0.01,"[22, 80, 101, 108, 116, 156, 171, 213, 215, 24...",0.0,"[0, 1, 0, 1, 2, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, ...",0.45,0.21,58.0,"Counter({0: 25, 1: 23, 2: 14})"
1,0.18,-0.01,"[29, 359, 391, 395, 415, 419, 443, 464, 579, 5...",0.01,"[0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, ...",0.39,0.21,37.0,"Counter({0: 25, 1: 19, 2: 18})"


adding 9 random population


 29%|██▉       | 9/31 [01:08<02:07,  5.79s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.18,-0.01,"[22, 80, 101, 108, 116, 156, 171, 213, 215, 24...",0.0,"[0, 1, 0, 1, 2, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, ...",0.45,0.21,58.0,"Counter({0: 25, 1: 23, 2: 14})"
1,0.18,-0.01,"[29, 359, 391, 395, 415, 419, 443, 464, 579, 5...",0.01,"[0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, ...",0.39,0.21,37.0,"Counter({0: 25, 1: 19, 2: 18})"
2,0.17,-0.01,"[13, 84, 96, 111, 115, 121, 171, 194, 202, 205...",0.01,"[0, 1, 2, 2, 0, 2, 0, 0, 0, 1, 1, 0, 0, 2, 0, ...",0.48,0.16,72.0,"Counter({0: 26, 1: 22, 2: 14})"


adding 20 random population


 39%|███▊      | 12/31 [01:34<02:03,  6.51s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.18,-0.01,"[22, 80, 101, 108, 116, 156, 171, 213, 215, 24...",0.0,"[0, 1, 0, 1, 2, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, ...",0.45,0.21,58.0,"Counter({0: 25, 1: 23, 2: 14})"
1,0.18,-0.01,"[29, 359, 391, 395, 415, 419, 443, 464, 579, 5...",0.01,"[0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, ...",0.39,0.21,37.0,"Counter({0: 25, 1: 19, 2: 18})"
2,0.17,-0.01,"[13, 84, 96, 111, 115, 121, 171, 194, 202, 205...",0.01,"[0, 1, 2, 2, 0, 2, 0, 0, 0, 1, 1, 0, 0, 2, 0, ...",0.48,0.16,72.0,"Counter({0: 26, 1: 22, 2: 14})"
3,0.16,0.0,"[6, 13, 20, 33, 38, 40, 62, 66, 81, 100, 108, ...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, ...",0.76,0.19,162.0,"Counter({0: 26, 1: 19, 2: 17})"


adding 28 random population


 48%|████▊     | 15/31 [02:09<02:08,  8.02s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.18,-0.01,"[22, 80, 101, 108, 116, 156, 171, 213, 215, 24...",0.0,"[0, 1, 0, 1, 2, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, ...",0.45,0.21,58.0,"Counter({0: 25, 1: 23, 2: 14})"
1,0.18,-0.01,"[29, 359, 391, 395, 415, 419, 443, 464, 579, 5...",0.01,"[0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, ...",0.39,0.21,37.0,"Counter({0: 25, 1: 19, 2: 18})"
2,0.17,-0.01,"[13, 84, 96, 111, 115, 121, 171, 194, 202, 205...",0.01,"[0, 1, 2, 2, 0, 2, 0, 0, 0, 1, 1, 0, 0, 2, 0, ...",0.48,0.16,72.0,"Counter({0: 26, 1: 22, 2: 14})"
3,0.16,0.0,"[6, 13, 20, 33, 38, 40, 62, 66, 81, 100, 108, ...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, ...",0.76,0.19,162.0,"Counter({0: 26, 1: 19, 2: 17})"
4,0.15,-0.01,"[13, 34, 38, 52, 83, 93, 121, 143, 173, 226, 2...",0.01,"[2, 1, 0, 2, 0, 0, 0, 0, 2, 2, 1, 1, 0, 0, 2, ...",0.7,0.17,167.0,"Counter({0: 24, 1: 20, 2: 18})"


adding 38 random population


 58%|█████▊    | 18/31 [02:40<01:45,  8.08s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.18,-0.01,"[22, 80, 101, 108, 116, 156, 171, 213, 215, 24...",0.0,"[0, 1, 0, 1, 2, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, ...",0.45,0.21,58.0,"Counter({0: 25, 1: 23, 2: 14})"
1,0.18,-0.01,"[29, 359, 391, 395, 415, 419, 443, 464, 579, 5...",0.01,"[0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, ...",0.39,0.21,37.0,"Counter({0: 25, 1: 19, 2: 18})"
2,0.17,-0.01,"[13, 84, 96, 111, 115, 121, 171, 194, 202, 205...",0.01,"[0, 1, 2, 2, 0, 2, 0, 0, 0, 1, 1, 0, 0, 2, 0, ...",0.48,0.16,72.0,"Counter({0: 26, 1: 22, 2: 14})"
3,0.16,0.0,"[6, 13, 20, 33, 38, 40, 62, 66, 81, 100, 108, ...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, ...",0.76,0.19,162.0,"Counter({0: 26, 1: 19, 2: 17})"
4,0.15,-0.01,"[13, 34, 38, 52, 83, 93, 121, 143, 173, 226, 2...",0.01,"[2, 1, 0, 2, 0, 0, 0, 0, 2, 2, 1, 1, 0, 0, 2, ...",0.7,0.17,167.0,"Counter({0: 24, 1: 20, 2: 18})"
5,0.17,0.02,"[18, 34, 159, 168, 206, 241, 260, 355, 365, 48...",0.08,"[1, 0, 1, 1, 0, 1, 1, 1, 2, 0, 2, 0, 0, 1, 0, ...",0.34,0.15,36.0,"Counter({0: 24, 1: 23, 2: 15})"


adding 8 random population


 68%|██████▊   | 21/31 [03:00<01:03,  6.38s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.18,-0.01,"[22, 80, 101, 108, 116, 156, 171, 213, 215, 24...",0.0,"[0, 1, 0, 1, 2, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, ...",0.45,0.21,58.0,"Counter({0: 25, 1: 23, 2: 14})"
1,0.18,-0.01,"[29, 359, 391, 395, 415, 419, 443, 464, 579, 5...",0.01,"[0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, ...",0.39,0.21,37.0,"Counter({0: 25, 1: 19, 2: 18})"
2,0.17,-0.01,"[13, 84, 96, 111, 115, 121, 171, 194, 202, 205...",0.01,"[0, 1, 2, 2, 0, 2, 0, 0, 0, 1, 1, 0, 0, 2, 0, ...",0.48,0.16,72.0,"Counter({0: 26, 1: 22, 2: 14})"
3,0.16,0.0,"[6, 13, 20, 33, 38, 40, 62, 66, 81, 100, 108, ...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, ...",0.76,0.19,162.0,"Counter({0: 26, 1: 19, 2: 17})"
4,0.15,-0.01,"[13, 34, 38, 52, 83, 93, 121, 143, 173, 226, 2...",0.01,"[2, 1, 0, 2, 0, 0, 0, 0, 2, 2, 1, 1, 0, 0, 2, ...",0.7,0.17,167.0,"Counter({0: 24, 1: 20, 2: 18})"
5,0.17,0.02,"[18, 34, 159, 168, 206, 241, 260, 355, 365, 48...",0.08,"[1, 0, 1, 1, 0, 1, 1, 1, 2, 0, 2, 0, 0, 1, 0, ...",0.34,0.15,36.0,"Counter({0: 24, 1: 23, 2: 15})"
6,0.17,-0.0,"[92, 261, 356, 420, 435, 588, 663, 717, 755, 8...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, ...",0.44,0.21,38.0,"Counter({0: 27, 1: 25, 2: 10})"


adding 3 random population


 77%|███████▋  | 24/31 [03:20<00:40,  5.79s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.18,-0.01,"[22, 80, 101, 108, 116, 156, 171, 213, 215, 24...",0.0,"[0, 1, 0, 1, 2, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, ...",0.45,0.21,58.0,"Counter({0: 25, 1: 23, 2: 14})"
1,0.18,-0.01,"[29, 359, 391, 395, 415, 419, 443, 464, 579, 5...",0.01,"[0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, ...",0.39,0.21,37.0,"Counter({0: 25, 1: 19, 2: 18})"
2,0.17,-0.01,"[13, 84, 96, 111, 115, 121, 171, 194, 202, 205...",0.01,"[0, 1, 2, 2, 0, 2, 0, 0, 0, 1, 1, 0, 0, 2, 0, ...",0.48,0.16,72.0,"Counter({0: 26, 1: 22, 2: 14})"
3,0.16,0.0,"[6, 13, 20, 33, 38, 40, 62, 66, 81, 100, 108, ...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, ...",0.76,0.19,162.0,"Counter({0: 26, 1: 19, 2: 17})"
4,0.15,-0.01,"[13, 34, 38, 52, 83, 93, 121, 143, 173, 226, 2...",0.01,"[2, 1, 0, 2, 0, 0, 0, 0, 2, 2, 1, 1, 0, 0, 2, ...",0.7,0.17,167.0,"Counter({0: 24, 1: 20, 2: 18})"
5,0.17,0.02,"[18, 34, 159, 168, 206, 241, 260, 355, 365, 48...",0.08,"[1, 0, 1, 1, 0, 1, 1, 1, 2, 0, 2, 0, 0, 1, 0, ...",0.34,0.15,36.0,"Counter({0: 24, 1: 23, 2: 15})"
6,0.17,-0.0,"[92, 261, 356, 420, 435, 588, 663, 717, 755, 8...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, ...",0.44,0.21,38.0,"Counter({0: 27, 1: 25, 2: 10})"
7,0.17,0.0,"[21, 85, 147, 198, 252, 322, 363, 492, 499, 69...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, ...",0.38,0.22,31.0,"Counter({0: 26, 1: 26, 2: 10})"


adding 44 random population


 87%|████████▋ | 27/31 [03:40<00:22,  5.56s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.18,-0.01,"[22, 80, 101, 108, 116, 156, 171, 213, 215, 24...",0.0,"[0, 1, 0, 1, 2, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, ...",0.45,0.21,58.0,"Counter({0: 25, 1: 23, 2: 14})"
1,0.18,-0.01,"[29, 359, 391, 395, 415, 419, 443, 464, 579, 5...",0.01,"[0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, ...",0.39,0.21,37.0,"Counter({0: 25, 1: 19, 2: 18})"
2,0.17,-0.01,"[13, 84, 96, 111, 115, 121, 171, 194, 202, 205...",0.01,"[0, 1, 2, 2, 0, 2, 0, 0, 0, 1, 1, 0, 0, 2, 0, ...",0.48,0.16,72.0,"Counter({0: 26, 1: 22, 2: 14})"
3,0.16,0.0,"[6, 13, 20, 33, 38, 40, 62, 66, 81, 100, 108, ...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, ...",0.76,0.19,162.0,"Counter({0: 26, 1: 19, 2: 17})"
4,0.15,-0.01,"[13, 34, 38, 52, 83, 93, 121, 143, 173, 226, 2...",0.01,"[2, 1, 0, 2, 0, 0, 0, 0, 2, 2, 1, 1, 0, 0, 2, ...",0.7,0.17,167.0,"Counter({0: 24, 1: 20, 2: 18})"
5,0.17,0.02,"[18, 34, 159, 168, 206, 241, 260, 355, 365, 48...",0.08,"[1, 0, 1, 1, 0, 1, 1, 1, 2, 0, 2, 0, 0, 1, 0, ...",0.34,0.15,36.0,"Counter({0: 24, 1: 23, 2: 15})"
6,0.17,-0.0,"[92, 261, 356, 420, 435, 588, 663, 717, 755, 8...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, ...",0.44,0.21,38.0,"Counter({0: 27, 1: 25, 2: 10})"
7,0.17,0.0,"[21, 85, 147, 198, 252, 322, 363, 492, 499, 69...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, ...",0.38,0.22,31.0,"Counter({0: 26, 1: 26, 2: 10})"
8,0.16,-0.01,"[2, 28, 34, 38, 39, 63, 77, 82, 86, 152, 158, ...",0.02,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 1, ...",0.79,0.22,139.0,"Counter({0: 27, 1: 26, 2: 9})"


adding 27 random population


 97%|█████████▋| 30/31 [04:08<00:06,  6.79s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.18,-0.01,"[22, 80, 101, 108, 116, 156, 171, 213, 215, 24...",0.0,"[0, 1, 0, 1, 2, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, ...",0.45,0.21,58.0,"Counter({0: 25, 1: 23, 2: 14})"
1,0.18,-0.01,"[29, 359, 391, 395, 415, 419, 443, 464, 579, 5...",0.01,"[0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, ...",0.39,0.21,37.0,"Counter({0: 25, 1: 19, 2: 18})"
2,0.17,-0.01,"[13, 84, 96, 111, 115, 121, 171, 194, 202, 205...",0.01,"[0, 1, 2, 2, 0, 2, 0, 0, 0, 1, 1, 0, 0, 2, 0, ...",0.48,0.16,72.0,"Counter({0: 26, 1: 22, 2: 14})"
3,0.16,0.0,"[6, 13, 20, 33, 38, 40, 62, 66, 81, 100, 108, ...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, ...",0.76,0.19,162.0,"Counter({0: 26, 1: 19, 2: 17})"
4,0.15,-0.01,"[13, 34, 38, 52, 83, 93, 121, 143, 173, 226, 2...",0.01,"[2, 1, 0, 2, 0, 0, 0, 0, 2, 2, 1, 1, 0, 0, 2, ...",0.7,0.17,167.0,"Counter({0: 24, 1: 20, 2: 18})"
5,0.17,0.02,"[18, 34, 159, 168, 206, 241, 260, 355, 365, 48...",0.08,"[1, 0, 1, 1, 0, 1, 1, 1, 2, 0, 2, 0, 0, 1, 0, ...",0.34,0.15,36.0,"Counter({0: 24, 1: 23, 2: 15})"
6,0.17,-0.0,"[92, 261, 356, 420, 435, 588, 663, 717, 755, 8...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, ...",0.44,0.21,38.0,"Counter({0: 27, 1: 25, 2: 10})"
7,0.17,0.0,"[21, 85, 147, 198, 252, 322, 363, 492, 499, 69...",0.01,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, ...",0.38,0.22,31.0,"Counter({0: 26, 1: 26, 2: 10})"
8,0.16,-0.01,"[2, 28, 34, 38, 39, 63, 77, 82, 86, 152, 158, ...",0.02,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 1, ...",0.79,0.22,139.0,"Counter({0: 27, 1: 26, 2: 9})"
9,0.16,0.26,"[1, 6, 7, 134, 245, 265, 340, 392, 454, 487, 1...",0.34,"[0, 1, 0, 0, 0, 1, 0, 1, 2, 1, 2, 1, 0, 1, 2, ...",0.36,0.29,30.0,"Counter({0: 26, 1: 19, 2: 17})"


adding 11 random population


100%|██████████| 31/31 [04:21<00:00,  8.44s/it]


TIME: GA: 4.3616046667098995 min
TIME: Total: 4.373601977030436 min

##########  borovecki, (31, 22283)
*** Computing 1D feature ranking ...
Dispersion tests took 1.13 sec
Entropy computation 2.44 sec
KNN computation 9.87 sec
Sorting and thresholds 0.06 sec
Loading clustering from file
Hierarchical clustering 0.07 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 5.23 sec
Returning 6898 redundant features and  677 important features
TIME: 1d Features : 0.3139472285906474 min
TIME: 2d scores: 3.973642985026042e-09 min
*** Optimization algorithm 
Non redundant features 15385, orig size 22283, nb imp : 677
adding 50 random population


 10%|▉         | 3/31 [00:08<01:13,  2.63s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.27,0.75,"[395, 473, 997, 1159, 1613, 2223, 2455, 2697, ...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1.24,0.42,161.0,"Counter({0: 16, 1: 15})"


adding 3 random population


 19%|█▉        | 6/31 [00:45<03:07,  7.49s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.27,0.75,"[395, 473, 997, 1159, 1613, 2223, 2455, 2697, ...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1.24,0.42,161.0,"Counter({0: 16, 1: 15})"
1,0.22,0.75,"[589, 1001, 1672, 1839, 2223, 2987, 3291, 3395...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",0.74,0.34,98.0,"Counter({0: 16, 1: 15})"


adding 7 random population


 29%|██▉       | 9/31 [01:45<04:26, 12.11s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.27,0.75,"[395, 473, 997, 1159, 1613, 2223, 2455, 2697, ...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1.24,0.42,161.0,"Counter({0: 16, 1: 15})"
1,0.22,0.75,"[589, 1001, 1672, 1839, 2223, 2987, 3291, 3395...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",0.74,0.34,98.0,"Counter({0: 16, 1: 15})"
2,0.19,0.48,"[152, 285, 365, 440, 584, 724, 960, 978, 1073,...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",0.93,0.22,159.0,"Counter({0: 12, 1: 10, 2: 9})"


adding 3 random population


 39%|███▊      | 12/31 [02:16<02:55,  9.24s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.27,0.75,"[395, 473, 997, 1159, 1613, 2223, 2455, 2697, ...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1.24,0.42,161.0,"Counter({0: 16, 1: 15})"
1,0.22,0.75,"[589, 1001, 1672, 1839, 2223, 2987, 3291, 3395...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",0.74,0.34,98.0,"Counter({0: 16, 1: 15})"
2,0.19,0.48,"[152, 285, 365, 440, 584, 724, 960, 978, 1073,...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",0.93,0.22,159.0,"Counter({0: 12, 1: 10, 2: 9})"
3,0.27,0.75,"[997, 2697, 2847, 2904, 2983, 2988, 3066, 3198...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",1.04,0.44,101.0,"Counter({0: 16, 1: 15})"


adding 13 random population


 48%|████▊     | 15/31 [03:07<03:00, 11.29s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.27,0.75,"[395, 473, 997, 1159, 1613, 2223, 2455, 2697, ...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1.24,0.42,161.0,"Counter({0: 16, 1: 15})"
1,0.22,0.75,"[589, 1001, 1672, 1839, 2223, 2987, 3291, 3395...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",0.74,0.34,98.0,"Counter({0: 16, 1: 15})"
2,0.19,0.48,"[152, 285, 365, 440, 584, 724, 960, 978, 1073,...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",0.93,0.22,159.0,"Counter({0: 12, 1: 10, 2: 9})"
3,0.27,0.75,"[997, 2697, 2847, 2904, 2983, 2988, 3066, 3198...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",1.04,0.44,101.0,"Counter({0: 16, 1: 15})"
4,0.21,0.66,"[743, 1001, 2223, 2697, 2842, 2844, 2895, 2904...",0.68,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...",1.29,0.27,184.0,"Counter({0: 15, 1: 12, 2: 4})"


adding 16 random population


 58%|█████▊    | 18/31 [03:34<01:49,  8.39s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.27,0.75,"[395, 473, 997, 1159, 1613, 2223, 2455, 2697, ...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1.24,0.42,161.0,"Counter({0: 16, 1: 15})"
1,0.22,0.75,"[589, 1001, 1672, 1839, 2223, 2987, 3291, 3395...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",0.74,0.34,98.0,"Counter({0: 16, 1: 15})"
2,0.19,0.48,"[152, 285, 365, 440, 584, 724, 960, 978, 1073,...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",0.93,0.22,159.0,"Counter({0: 12, 1: 10, 2: 9})"
3,0.27,0.75,"[997, 2697, 2847, 2904, 2983, 2988, 3066, 3198...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",1.04,0.44,101.0,"Counter({0: 16, 1: 15})"
4,0.21,0.66,"[743, 1001, 2223, 2697, 2842, 2844, 2895, 2904...",0.68,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...",1.29,0.27,184.0,"Counter({0: 15, 1: 12, 2: 4})"
5,0.21,0.48,"[1003, 1333, 2697, 2791, 2809, 2983, 2989, 308...",0.48,"[0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, ...",1.07,0.22,171.0,"Counter({0: 12, 1: 12, 2: 7})"


adding 11 random population


 68%|██████▊   | 21/31 [04:01<01:14,  7.42s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.27,0.75,"[395, 473, 997, 1159, 1613, 2223, 2455, 2697, ...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1.24,0.42,161.0,"Counter({0: 16, 1: 15})"
1,0.22,0.75,"[589, 1001, 1672, 1839, 2223, 2987, 3291, 3395...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",0.74,0.34,98.0,"Counter({0: 16, 1: 15})"
2,0.19,0.48,"[152, 285, 365, 440, 584, 724, 960, 978, 1073,...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",0.93,0.22,159.0,"Counter({0: 12, 1: 10, 2: 9})"
3,0.27,0.75,"[997, 2697, 2847, 2904, 2983, 2988, 3066, 3198...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",1.04,0.44,101.0,"Counter({0: 16, 1: 15})"
4,0.21,0.66,"[743, 1001, 2223, 2697, 2842, 2844, 2895, 2904...",0.68,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...",1.29,0.27,184.0,"Counter({0: 15, 1: 12, 2: 4})"
5,0.21,0.48,"[1003, 1333, 2697, 2791, 2809, 2983, 2989, 308...",0.48,"[0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, ...",1.07,0.22,171.0,"Counter({0: 12, 1: 12, 2: 7})"
6,0.19,0.48,"[356, 370, 584, 589, 724, 1003, 1703, 2938, 29...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",1.09,0.33,188.0,"Counter({0: 12, 1: 10, 2: 9})"


adding 5 random population


 77%|███████▋  | 24/31 [04:29<00:49,  7.13s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.27,0.75,"[395, 473, 997, 1159, 1613, 2223, 2455, 2697, ...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1.24,0.42,161.0,"Counter({0: 16, 1: 15})"
1,0.22,0.75,"[589, 1001, 1672, 1839, 2223, 2987, 3291, 3395...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",0.74,0.34,98.0,"Counter({0: 16, 1: 15})"
2,0.19,0.48,"[152, 285, 365, 440, 584, 724, 960, 978, 1073,...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",0.93,0.22,159.0,"Counter({0: 12, 1: 10, 2: 9})"
3,0.27,0.75,"[997, 2697, 2847, 2904, 2983, 2988, 3066, 3198...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",1.04,0.44,101.0,"Counter({0: 16, 1: 15})"
4,0.21,0.66,"[743, 1001, 2223, 2697, 2842, 2844, 2895, 2904...",0.68,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...",1.29,0.27,184.0,"Counter({0: 15, 1: 12, 2: 4})"
5,0.21,0.48,"[1003, 1333, 2697, 2791, 2809, 2983, 2989, 308...",0.48,"[0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, ...",1.07,0.22,171.0,"Counter({0: 12, 1: 12, 2: 7})"
6,0.19,0.48,"[356, 370, 584, 589, 724, 1003, 1703, 2938, 29...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",1.09,0.33,188.0,"Counter({0: 12, 1: 10, 2: 9})"
7,0.19,-0.02,"[2871, 12799, 17945, 21013]",0.03,"[2, 0, 2, 0, 2, 0, 1, 0, 2, 2, 1, 1, 0, 1, 2, ...",0.22,0.46,4.0,"Counter({0: 12, 1: 11, 2: 8})"


adding 4 random population


 87%|████████▋ | 27/31 [04:43<00:20,  5.08s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.27,0.75,"[395, 473, 997, 1159, 1613, 2223, 2455, 2697, ...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1.24,0.42,161.0,"Counter({0: 16, 1: 15})"
1,0.22,0.75,"[589, 1001, 1672, 1839, 2223, 2987, 3291, 3395...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",0.74,0.34,98.0,"Counter({0: 16, 1: 15})"
2,0.19,0.48,"[152, 285, 365, 440, 584, 724, 960, 978, 1073,...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",0.93,0.22,159.0,"Counter({0: 12, 1: 10, 2: 9})"
3,0.27,0.75,"[997, 2697, 2847, 2904, 2983, 2988, 3066, 3198...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",1.04,0.44,101.0,"Counter({0: 16, 1: 15})"
4,0.21,0.66,"[743, 1001, 2223, 2697, 2842, 2844, 2895, 2904...",0.68,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...",1.29,0.27,184.0,"Counter({0: 15, 1: 12, 2: 4})"
5,0.21,0.48,"[1003, 1333, 2697, 2791, 2809, 2983, 2989, 308...",0.48,"[0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, ...",1.07,0.22,171.0,"Counter({0: 12, 1: 12, 2: 7})"
6,0.19,0.48,"[356, 370, 584, 589, 724, 1003, 1703, 2938, 29...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",1.09,0.33,188.0,"Counter({0: 12, 1: 10, 2: 9})"
7,0.19,-0.02,"[2871, 12799, 17945, 21013]",0.03,"[2, 0, 2, 0, 2, 0, 1, 0, 2, 2, 1, 1, 0, 1, 2, ...",0.22,0.46,4.0,"Counter({0: 12, 1: 11, 2: 8})"
8,0.18,0.44,"[113, 642, 658, 718, 860, 1005, 1962, 2983, 29...",0.46,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...",0.86,0.18,168.0,"Counter({0: 12, 1: 11, 2: 8})"


adding 18 random population


 97%|█████████▋| 30/31 [05:06<00:05,  5.77s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.27,0.75,"[395, 473, 997, 1159, 1613, 2223, 2455, 2697, ...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1.24,0.42,161.0,"Counter({0: 16, 1: 15})"
1,0.22,0.75,"[589, 1001, 1672, 1839, 2223, 2987, 3291, 3395...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",0.74,0.34,98.0,"Counter({0: 16, 1: 15})"
2,0.19,0.48,"[152, 285, 365, 440, 584, 724, 960, 978, 1073,...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",0.93,0.22,159.0,"Counter({0: 12, 1: 10, 2: 9})"
3,0.27,0.75,"[997, 2697, 2847, 2904, 2983, 2988, 3066, 3198...",0.72,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",1.04,0.44,101.0,"Counter({0: 16, 1: 15})"
4,0.21,0.66,"[743, 1001, 2223, 2697, 2842, 2844, 2895, 2904...",0.68,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...",1.29,0.27,184.0,"Counter({0: 15, 1: 12, 2: 4})"
5,0.21,0.48,"[1003, 1333, 2697, 2791, 2809, 2983, 2989, 308...",0.48,"[0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, ...",1.07,0.22,171.0,"Counter({0: 12, 1: 12, 2: 7})"
6,0.19,0.48,"[356, 370, 584, 589, 724, 1003, 1703, 2938, 29...",0.55,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, ...",1.09,0.33,188.0,"Counter({0: 12, 1: 10, 2: 9})"
7,0.19,-0.02,"[2871, 12799, 17945, 21013]",0.03,"[2, 0, 2, 0, 2, 0, 1, 0, 2, 2, 1, 1, 0, 1, 2, ...",0.22,0.46,4.0,"Counter({0: 12, 1: 11, 2: 8})"
8,0.18,0.44,"[113, 642, 658, 718, 860, 1005, 1962, 2983, 29...",0.46,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...",0.86,0.18,168.0,"Counter({0: 12, 1: 11, 2: 8})"
9,0.18,-0.01,"[2262, 5772, 14861, 22117]",0.04,"[1, 2, 0, 2, 2, 0, 1, 0, 0, 2, 1, 2, 0, 1, 1, ...",0.24,0.5,4.0,"Counter({0: 14, 1: 9, 2: 8})"


adding 3 random population


100%|██████████| 31/31 [05:15<00:00, 10.19s/it]


TIME: GA: 5.262388865152995 min
TIME: Total: 5.576336793104807 min

##########  chiaretti, (128, 12625)
*** Computing 1D feature ranking ...
Dispersion tests took 0.46 sec
Entropy computation 2.22 sec
KNN computation 13.39 sec
Sorting and thresholds 0.04 sec
Performing hierarchical clustering...
Hierarchical clustering 8.56 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 2.09 sec
Returning 3395 redundant features and  346 important features
TIME: 1d Features : 0.44651633898417153 min
TIME: 2d scores: 3.973642985026042e-09 min
*** Optimization algorithm 
Non redundant features 9230, orig size 12625, nb imp : 346
adding 50 random population


 10%|▉         | 3/31 [00:20<03:05,  6.63s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,-0.0,"[1285, 1704, 2478, 2834, 3071, 3146, 3748, 695...",0.04,"[0, 1, 1, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, ...",0.19,0.26,9.0,"Counter({0: 48, 1: 47, 2: 33})"


adding 6 random population


 19%|█▉        | 6/31 [00:57<03:55,  9.41s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,-0.0,"[1285, 1704, 2478, 2834, 3071, 3146, 3748, 695...",0.04,"[0, 1, 1, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, ...",0.19,0.26,9.0,"Counter({0: 48, 1: 47, 2: 33})"
1,0.15,0.28,"[41, 764, 765, 1157, 1320, 1439, 1521, 1908, 2...",0.48,"[1, 0, 0, 3, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, ...",0.43,0.21,35.0,"Counter({0: 46, 1: 39, 2: 33, 3: 10})"


adding 8 random population


 29%|██▉       | 9/31 [01:58<05:03, 13.80s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,-0.0,"[1285, 1704, 2478, 2834, 3071, 3146, 3748, 695...",0.04,"[0, 1, 1, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, ...",0.19,0.26,9.0,"Counter({0: 48, 1: 47, 2: 33})"
1,0.15,0.28,"[41, 764, 765, 1157, 1320, 1439, 1521, 1908, 2...",0.48,"[1, 0, 0, 3, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, ...",0.43,0.21,35.0,"Counter({0: 46, 1: 39, 2: 33, 3: 10})"
2,0.17,0.17,"[15, 16, 41, 320, 321, 369, 764, 765, 817, 974...",0.32,"[0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, ...",0.59,0.24,80.0,"Counter({0: 52, 1: 43, 2: 33})"


adding 19 random population


 39%|███▊      | 12/31 [03:38<06:40, 21.06s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,-0.0,"[1285, 1704, 2478, 2834, 3071, 3146, 3748, 695...",0.04,"[0, 1, 1, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, ...",0.19,0.26,9.0,"Counter({0: 48, 1: 47, 2: 33})"
1,0.15,0.28,"[41, 764, 765, 1157, 1320, 1439, 1521, 1908, 2...",0.48,"[1, 0, 0, 3, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, ...",0.43,0.21,35.0,"Counter({0: 46, 1: 39, 2: 33, 3: 10})"
2,0.17,0.17,"[15, 16, 41, 320, 321, 369, 764, 765, 817, 974...",0.32,"[0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, ...",0.59,0.24,80.0,"Counter({0: 52, 1: 43, 2: 33})"
3,0.18,0.01,"[770, 823, 869, 1005, 1262, 1313, 1333, 1614, ...",0.06,"[1, 1, 1, 1, 2, 1, 0, 0, 1, 0, 1, 1, 1, 2, 0, ...",0.25,0.22,18.0,"Counter({0: 50, 1: 42, 2: 36})"


adding 7 random population


 48%|████▊     | 15/31 [04:20<03:55, 14.73s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,-0.0,"[1285, 1704, 2478, 2834, 3071, 3146, 3748, 695...",0.04,"[0, 1, 1, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, ...",0.19,0.26,9.0,"Counter({0: 48, 1: 47, 2: 33})"
1,0.15,0.28,"[41, 764, 765, 1157, 1320, 1439, 1521, 1908, 2...",0.48,"[1, 0, 0, 3, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, ...",0.43,0.21,35.0,"Counter({0: 46, 1: 39, 2: 33, 3: 10})"
2,0.17,0.17,"[15, 16, 41, 320, 321, 369, 764, 765, 817, 974...",0.32,"[0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, ...",0.59,0.24,80.0,"Counter({0: 52, 1: 43, 2: 33})"
3,0.18,0.01,"[770, 823, 869, 1005, 1262, 1313, 1333, 1614, ...",0.06,"[1, 1, 1, 1, 2, 1, 0, 0, 1, 0, 1, 1, 1, 2, 0, ...",0.25,0.22,18.0,"Counter({0: 50, 1: 42, 2: 36})"
4,0.17,-0.0,"[95, 235, 343, 359, 572, 611, 641, 710, 1730, ...",0.04,"[0, 0, 2, 0, 2, 2, 0, 2, 1, 0, 2, 2, 1, 0, 2, ...",0.26,0.23,28.0,"Counter({0: 50, 1: 43, 2: 35})"


adding 1 random population


 58%|█████▊    | 18/31 [05:11<03:01, 13.97s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,-0.0,"[1285, 1704, 2478, 2834, 3071, 3146, 3748, 695...",0.04,"[0, 1, 1, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, ...",0.19,0.26,9.0,"Counter({0: 48, 1: 47, 2: 33})"
1,0.15,0.28,"[41, 764, 765, 1157, 1320, 1439, 1521, 1908, 2...",0.48,"[1, 0, 0, 3, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, ...",0.43,0.21,35.0,"Counter({0: 46, 1: 39, 2: 33, 3: 10})"
2,0.17,0.17,"[15, 16, 41, 320, 321, 369, 764, 765, 817, 974...",0.32,"[0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, ...",0.59,0.24,80.0,"Counter({0: 52, 1: 43, 2: 33})"
3,0.18,0.01,"[770, 823, 869, 1005, 1262, 1313, 1333, 1614, ...",0.06,"[1, 1, 1, 1, 2, 1, 0, 0, 1, 0, 1, 1, 1, 2, 0, ...",0.25,0.22,18.0,"Counter({0: 50, 1: 42, 2: 36})"
4,0.17,-0.0,"[95, 235, 343, 359, 572, 611, 641, 710, 1730, ...",0.04,"[0, 0, 2, 0, 2, 2, 0, 2, 1, 0, 2, 2, 1, 0, 2, ...",0.26,0.23,28.0,"Counter({0: 50, 1: 43, 2: 35})"
5,0.16,-0.01,"[172, 322, 368, 408, 587, 618, 751, 844, 959, ...",0.02,"[0, 2, 0, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 2, ...",0.61,0.22,153.0,"Counter({0: 58, 1: 50, 2: 20})"


adding 43 random population


 68%|██████▊   | 21/31 [06:41<03:14, 19.49s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,-0.0,"[1285, 1704, 2478, 2834, 3071, 3146, 3748, 695...",0.04,"[0, 1, 1, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, ...",0.19,0.26,9.0,"Counter({0: 48, 1: 47, 2: 33})"
1,0.15,0.28,"[41, 764, 765, 1157, 1320, 1439, 1521, 1908, 2...",0.48,"[1, 0, 0, 3, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, ...",0.43,0.21,35.0,"Counter({0: 46, 1: 39, 2: 33, 3: 10})"
2,0.17,0.17,"[15, 16, 41, 320, 321, 369, 764, 765, 817, 974...",0.32,"[0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, ...",0.59,0.24,80.0,"Counter({0: 52, 1: 43, 2: 33})"
3,0.18,0.01,"[770, 823, 869, 1005, 1262, 1313, 1333, 1614, ...",0.06,"[1, 1, 1, 1, 2, 1, 0, 0, 1, 0, 1, 1, 1, 2, 0, ...",0.25,0.22,18.0,"Counter({0: 50, 1: 42, 2: 36})"
4,0.17,-0.0,"[95, 235, 343, 359, 572, 611, 641, 710, 1730, ...",0.04,"[0, 0, 2, 0, 2, 2, 0, 2, 1, 0, 2, 2, 1, 0, 2, ...",0.26,0.23,28.0,"Counter({0: 50, 1: 43, 2: 35})"
5,0.16,-0.01,"[172, 322, 368, 408, 587, 618, 751, 844, 959, ...",0.02,"[0, 2, 0, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 2, ...",0.61,0.22,153.0,"Counter({0: 58, 1: 50, 2: 20})"
6,0.17,0.02,"[106, 182, 207, 214, 215, 410, 470, 796, 864, ...",0.04,"[1, 1, 2, 1, 2, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, ...",0.34,0.2,47.0,"Counter({0: 44, 1: 42, 2: 42})"


adding 16 random population


 77%|███████▋  | 24/31 [07:44<02:00, 17.28s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,-0.0,"[1285, 1704, 2478, 2834, 3071, 3146, 3748, 695...",0.04,"[0, 1, 1, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, ...",0.19,0.26,9.0,"Counter({0: 48, 1: 47, 2: 33})"
1,0.15,0.28,"[41, 764, 765, 1157, 1320, 1439, 1521, 1908, 2...",0.48,"[1, 0, 0, 3, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, ...",0.43,0.21,35.0,"Counter({0: 46, 1: 39, 2: 33, 3: 10})"
2,0.17,0.17,"[15, 16, 41, 320, 321, 369, 764, 765, 817, 974...",0.32,"[0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, ...",0.59,0.24,80.0,"Counter({0: 52, 1: 43, 2: 33})"
3,0.18,0.01,"[770, 823, 869, 1005, 1262, 1313, 1333, 1614, ...",0.06,"[1, 1, 1, 1, 2, 1, 0, 0, 1, 0, 1, 1, 1, 2, 0, ...",0.25,0.22,18.0,"Counter({0: 50, 1: 42, 2: 36})"
4,0.17,-0.0,"[95, 235, 343, 359, 572, 611, 641, 710, 1730, ...",0.04,"[0, 0, 2, 0, 2, 2, 0, 2, 1, 0, 2, 2, 1, 0, 2, ...",0.26,0.23,28.0,"Counter({0: 50, 1: 43, 2: 35})"
5,0.16,-0.01,"[172, 322, 368, 408, 587, 618, 751, 844, 959, ...",0.02,"[0, 2, 0, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 2, ...",0.61,0.22,153.0,"Counter({0: 58, 1: 50, 2: 20})"
6,0.17,0.02,"[106, 182, 207, 214, 215, 410, 470, 796, 864, ...",0.04,"[1, 1, 2, 1, 2, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, ...",0.34,0.2,47.0,"Counter({0: 44, 1: 42, 2: 42})"
7,0.17,0.01,"[546, 875, 1037, 1252, 1313, 1430, 1696, 1999,...",0.05,"[0, 0, 0, 0, 2, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, ...",0.39,0.25,44.0,"Counter({0: 49, 1: 46, 2: 33})"


adding 26 random population


 87%|████████▋ | 27/31 [08:30<00:56, 14.01s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,-0.0,"[1285, 1704, 2478, 2834, 3071, 3146, 3748, 695...",0.04,"[0, 1, 1, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, ...",0.19,0.26,9.0,"Counter({0: 48, 1: 47, 2: 33})"
1,0.15,0.28,"[41, 764, 765, 1157, 1320, 1439, 1521, 1908, 2...",0.48,"[1, 0, 0, 3, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, ...",0.43,0.21,35.0,"Counter({0: 46, 1: 39, 2: 33, 3: 10})"
2,0.17,0.17,"[15, 16, 41, 320, 321, 369, 764, 765, 817, 974...",0.32,"[0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, ...",0.59,0.24,80.0,"Counter({0: 52, 1: 43, 2: 33})"
3,0.18,0.01,"[770, 823, 869, 1005, 1262, 1313, 1333, 1614, ...",0.06,"[1, 1, 1, 1, 2, 1, 0, 0, 1, 0, 1, 1, 1, 2, 0, ...",0.25,0.22,18.0,"Counter({0: 50, 1: 42, 2: 36})"
4,0.17,-0.0,"[95, 235, 343, 359, 572, 611, 641, 710, 1730, ...",0.04,"[0, 0, 2, 0, 2, 2, 0, 2, 1, 0, 2, 2, 1, 0, 2, ...",0.26,0.23,28.0,"Counter({0: 50, 1: 43, 2: 35})"
5,0.16,-0.01,"[172, 322, 368, 408, 587, 618, 751, 844, 959, ...",0.02,"[0, 2, 0, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 2, ...",0.61,0.22,153.0,"Counter({0: 58, 1: 50, 2: 20})"
6,0.17,0.02,"[106, 182, 207, 214, 215, 410, 470, 796, 864, ...",0.04,"[1, 1, 2, 1, 2, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, ...",0.34,0.2,47.0,"Counter({0: 44, 1: 42, 2: 42})"
7,0.17,0.01,"[546, 875, 1037, 1252, 1313, 1430, 1696, 1999,...",0.05,"[0, 0, 0, 0, 2, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, ...",0.39,0.25,44.0,"Counter({0: 49, 1: 46, 2: 33})"
8,0.15,0.01,"[3042, 3570, 3824, 5390, 5794, 7809, 9531, 108...",0.05,"[0, 1, 0, 1, 2, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, ...",0.17,0.2,13.0,"Counter({0: 46, 1: 42, 2: 40})"


adding 12 random population


 97%|█████████▋| 30/31 [09:08<00:11, 11.72s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.17,-0.0,"[1285, 1704, 2478, 2834, 3071, 3146, 3748, 695...",0.04,"[0, 1, 1, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, ...",0.19,0.26,9.0,"Counter({0: 48, 1: 47, 2: 33})"
1,0.15,0.28,"[41, 764, 765, 1157, 1320, 1439, 1521, 1908, 2...",0.48,"[1, 0, 0, 3, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, ...",0.43,0.21,35.0,"Counter({0: 46, 1: 39, 2: 33, 3: 10})"
2,0.17,0.17,"[15, 16, 41, 320, 321, 369, 764, 765, 817, 974...",0.32,"[0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, ...",0.59,0.24,80.0,"Counter({0: 52, 1: 43, 2: 33})"
3,0.18,0.01,"[770, 823, 869, 1005, 1262, 1313, 1333, 1614, ...",0.06,"[1, 1, 1, 1, 2, 1, 0, 0, 1, 0, 1, 1, 1, 2, 0, ...",0.25,0.22,18.0,"Counter({0: 50, 1: 42, 2: 36})"
4,0.17,-0.0,"[95, 235, 343, 359, 572, 611, 641, 710, 1730, ...",0.04,"[0, 0, 2, 0, 2, 2, 0, 2, 1, 0, 2, 2, 1, 0, 2, ...",0.26,0.23,28.0,"Counter({0: 50, 1: 43, 2: 35})"
5,0.16,-0.01,"[172, 322, 368, 408, 587, 618, 751, 844, 959, ...",0.02,"[0, 2, 0, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 2, ...",0.61,0.22,153.0,"Counter({0: 58, 1: 50, 2: 20})"
6,0.17,0.02,"[106, 182, 207, 214, 215, 410, 470, 796, 864, ...",0.04,"[1, 1, 2, 1, 2, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, ...",0.34,0.2,47.0,"Counter({0: 44, 1: 42, 2: 42})"
7,0.17,0.01,"[546, 875, 1037, 1252, 1313, 1430, 1696, 1999,...",0.05,"[0, 0, 0, 0, 2, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, ...",0.39,0.25,44.0,"Counter({0: 49, 1: 46, 2: 33})"
8,0.15,0.01,"[3042, 3570, 3824, 5390, 5794, 7809, 9531, 108...",0.05,"[0, 1, 0, 1, 2, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, ...",0.17,0.2,13.0,"Counter({0: 46, 1: 42, 2: 40})"
9,0.14,0.0,"[99, 403, 780, 1667, 3978, 5286, 5802, 6981, 7...",0.05,"[0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 1, 0, ...",0.16,0.2,14.0,"Counter({0: 49, 1: 40, 2: 39})"


adding 6 random population


100%|██████████| 31/31 [09:33<00:00, 18.49s/it]


TIME: GA: 9.551410106817881 min
TIME: Total: 9.997927363713583 min

##########  christensen, (217, 1413)
*** Computing 1D feature ranking ...
Dispersion tests took 0.03 sec
Entropy computation 0.37 sec
KNN computation 0.22 sec
Sorting and thresholds 0.02 sec
Performing hierarchical clustering...
Hierarchical clustering 0.12 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 0.19 sec
Returning 494 redundant features and  55 important features
TIME: 1d Features : 0.01574117342631022 min
TIME: 2d scores: 3.973642985026042e-09 min
*** Optimization algorithm 
Non redundant features 919, orig size 1413, nb imp : 55
adding 50 random population


 10%|▉         | 3/31 [00:45<07:01, 15.07s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.19,0.15,"[43, 212, 232, 304, 312, 321, 336, 340, 357, 3...",0.22,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, ...",0.43,0.32,43.0,"Counter({0: 90, 1: 76, 2: 51})"


adding 5 random population


 19%|█▉        | 6/31 [02:52<12:20, 29.63s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.19,0.15,"[43, 212, 232, 304, 312, 321, 336, 340, 357, 3...",0.22,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, ...",0.43,0.32,43.0,"Counter({0: 90, 1: 76, 2: 51})"
1,0.18,0.73,"[0, 16, 21, 25, 28, 31, 33, 54, 73, 74, 78, 11...",0.84,"[1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 0, 0, 0, 1, 1, ...",1.04,0.36,102.0,"Counter({0: 85, 1: 71, 2: 42, 3: 19})"


adding 40 random population


 29%|██▉       | 9/31 [06:42<17:40, 48.21s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.19,0.15,"[43, 212, 232, 304, 312, 321, 336, 340, 357, 3...",0.22,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, ...",0.43,0.32,43.0,"Counter({0: 90, 1: 76, 2: 51})"
1,0.18,0.73,"[0, 16, 21, 25, 28, 31, 33, 54, 73, 74, 78, 11...",0.84,"[1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 0, 0, 0, 1, 1, ...",1.04,0.36,102.0,"Counter({0: 85, 1: 71, 2: 42, 3: 19})"
2,0.19,0.53,"[6, 19, 40, 68, 118, 123, 127, 176, 245, 356, ...",0.57,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.57,0.39,53.0,"Counter({0: 92, 1: 74, 2: 51})"


adding 43 random population


 39%|███▊      | 12/31 [09:17<13:34, 42.88s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.19,0.15,"[43, 212, 232, 304, 312, 321, 336, 340, 357, 3...",0.22,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, ...",0.43,0.32,43.0,"Counter({0: 90, 1: 76, 2: 51})"
1,0.18,0.73,"[0, 16, 21, 25, 28, 31, 33, 54, 73, 74, 78, 11...",0.84,"[1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 0, 0, 0, 1, 1, ...",1.04,0.36,102.0,"Counter({0: 85, 1: 71, 2: 42, 3: 19})"
2,0.19,0.53,"[6, 19, 40, 68, 118, 123, 127, 176, 245, 356, ...",0.57,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.57,0.39,53.0,"Counter({0: 92, 1: 74, 2: 51})"
3,0.16,0.38,"[19, 22, 41, 43, 46, 68, 89, 94, 96, 118, 122,...",0.5,"[0, 0, 3, 0, 0, 3, 3, 3, 1, 1, 1, 1, 1, 3, 0, ...",0.8,0.35,133.0,"Counter({0: 64, 1: 55, 2: 51, 3: 47})"


adding 36 random population


 48%|████▊     | 15/31 [14:04<16:09, 60.57s/it] 

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.19,0.15,"[43, 212, 232, 304, 312, 321, 336, 340, 357, 3...",0.22,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, ...",0.43,0.32,43.0,"Counter({0: 90, 1: 76, 2: 51})"
1,0.18,0.73,"[0, 16, 21, 25, 28, 31, 33, 54, 73, 74, 78, 11...",0.84,"[1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 0, 0, 0, 1, 1, ...",1.04,0.36,102.0,"Counter({0: 85, 1: 71, 2: 42, 3: 19})"
2,0.19,0.53,"[6, 19, 40, 68, 118, 123, 127, 176, 245, 356, ...",0.57,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.57,0.39,53.0,"Counter({0: 92, 1: 74, 2: 51})"
3,0.16,0.38,"[19, 22, 41, 43, 46, 68, 89, 94, 96, 118, 122,...",0.5,"[0, 0, 3, 0, 0, 3, 3, 3, 1, 1, 1, 1, 1, 3, 0, ...",0.8,0.35,133.0,"Counter({0: 64, 1: 55, 2: 51, 3: 47})"
4,0.18,0.84,"[1, 2, 4, 10, 14, 15, 16, 21, 25, 28, 31, 33, ...",0.89,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",1.36,0.47,152.0,"Counter({0: 94, 1: 85, 3: 19, 2: 19})"


adding 43 random population


 58%|█████▊    | 18/31 [17:41<12:05, 55.80s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.19,0.15,"[43, 212, 232, 304, 312, 321, 336, 340, 357, 3...",0.22,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, ...",0.43,0.32,43.0,"Counter({0: 90, 1: 76, 2: 51})"
1,0.18,0.73,"[0, 16, 21, 25, 28, 31, 33, 54, 73, 74, 78, 11...",0.84,"[1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 0, 0, 0, 1, 1, ...",1.04,0.36,102.0,"Counter({0: 85, 1: 71, 2: 42, 3: 19})"
2,0.19,0.53,"[6, 19, 40, 68, 118, 123, 127, 176, 245, 356, ...",0.57,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.57,0.39,53.0,"Counter({0: 92, 1: 74, 2: 51})"
3,0.16,0.38,"[19, 22, 41, 43, 46, 68, 89, 94, 96, 118, 122,...",0.5,"[0, 0, 3, 0, 0, 3, 3, 3, 1, 1, 1, 1, 1, 3, 0, ...",0.8,0.35,133.0,"Counter({0: 64, 1: 55, 2: 51, 3: 47})"
4,0.18,0.84,"[1, 2, 4, 10, 14, 15, 16, 21, 25, 28, 31, 33, ...",0.89,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",1.36,0.47,152.0,"Counter({0: 94, 1: 85, 3: 19, 2: 19})"
5,0.14,0.54,"[5, 13, 19, 63, 93, 107, 117, 122, 136, 167, 1...",0.59,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.66,0.39,73.0,"Counter({0: 85, 1: 55, 2: 42, 3: 35})"


adding 35 random population


 68%|██████▊   | 21/31 [21:23<09:09, 54.99s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.19,0.15,"[43, 212, 232, 304, 312, 321, 336, 340, 357, 3...",0.22,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, ...",0.43,0.32,43.0,"Counter({0: 90, 1: 76, 2: 51})"
1,0.18,0.73,"[0, 16, 21, 25, 28, 31, 33, 54, 73, 74, 78, 11...",0.84,"[1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 0, 0, 0, 1, 1, ...",1.04,0.36,102.0,"Counter({0: 85, 1: 71, 2: 42, 3: 19})"
2,0.19,0.53,"[6, 19, 40, 68, 118, 123, 127, 176, 245, 356, ...",0.57,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.57,0.39,53.0,"Counter({0: 92, 1: 74, 2: 51})"
3,0.16,0.38,"[19, 22, 41, 43, 46, 68, 89, 94, 96, 118, 122,...",0.5,"[0, 0, 3, 0, 0, 3, 3, 3, 1, 1, 1, 1, 1, 3, 0, ...",0.8,0.35,133.0,"Counter({0: 64, 1: 55, 2: 51, 3: 47})"
4,0.18,0.84,"[1, 2, 4, 10, 14, 15, 16, 21, 25, 28, 31, 33, ...",0.89,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",1.36,0.47,152.0,"Counter({0: 94, 1: 85, 3: 19, 2: 19})"
5,0.14,0.54,"[5, 13, 19, 63, 93, 107, 117, 122, 136, 167, 1...",0.59,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.66,0.39,73.0,"Counter({0: 85, 1: 55, 2: 42, 3: 35})"
6,0.17,0.58,"[0, 1, 10, 13, 29, 40, 63, 93, 96, 101, 122, 1...",0.67,"[2, 2, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, 0, 0, ...",1.05,0.38,110.0,"Counter({0: 86, 1: 54, 2: 46, 3: 31})"


adding 36 random population


 77%|███████▋  | 24/31 [25:20<06:37, 56.85s/it] 

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.19,0.15,"[43, 212, 232, 304, 312, 321, 336, 340, 357, 3...",0.22,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, ...",0.43,0.32,43.0,"Counter({0: 90, 1: 76, 2: 51})"
1,0.18,0.73,"[0, 16, 21, 25, 28, 31, 33, 54, 73, 74, 78, 11...",0.84,"[1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 0, 0, 0, 1, 1, ...",1.04,0.36,102.0,"Counter({0: 85, 1: 71, 2: 42, 3: 19})"
2,0.19,0.53,"[6, 19, 40, 68, 118, 123, 127, 176, 245, 356, ...",0.57,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.57,0.39,53.0,"Counter({0: 92, 1: 74, 2: 51})"
3,0.16,0.38,"[19, 22, 41, 43, 46, 68, 89, 94, 96, 118, 122,...",0.5,"[0, 0, 3, 0, 0, 3, 3, 3, 1, 1, 1, 1, 1, 3, 0, ...",0.8,0.35,133.0,"Counter({0: 64, 1: 55, 2: 51, 3: 47})"
4,0.18,0.84,"[1, 2, 4, 10, 14, 15, 16, 21, 25, 28, 31, 33, ...",0.89,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",1.36,0.47,152.0,"Counter({0: 94, 1: 85, 3: 19, 2: 19})"
5,0.14,0.54,"[5, 13, 19, 63, 93, 107, 117, 122, 136, 167, 1...",0.59,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.66,0.39,73.0,"Counter({0: 85, 1: 55, 2: 42, 3: 35})"
6,0.17,0.58,"[0, 1, 10, 13, 29, 40, 63, 93, 96, 101, 122, 1...",0.67,"[2, 2, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, 0, 0, ...",1.05,0.38,110.0,"Counter({0: 86, 1: 54, 2: 46, 3: 31})"
7,0.14,0.66,"[3, 7, 13, 68, 136, 217, 229, 279, 303, 452, 4...",0.68,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.39,0.41,34.0,"Counter({0: 102, 1: 74, 2: 31, 3: 10})"


adding 29 random population


 87%|████████▋ | 27/31 [28:35<03:27, 51.88s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.19,0.15,"[43, 212, 232, 304, 312, 321, 336, 340, 357, 3...",0.22,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, ...",0.43,0.32,43.0,"Counter({0: 90, 1: 76, 2: 51})"
1,0.18,0.73,"[0, 16, 21, 25, 28, 31, 33, 54, 73, 74, 78, 11...",0.84,"[1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 0, 0, 0, 1, 1, ...",1.04,0.36,102.0,"Counter({0: 85, 1: 71, 2: 42, 3: 19})"
2,0.19,0.53,"[6, 19, 40, 68, 118, 123, 127, 176, 245, 356, ...",0.57,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.57,0.39,53.0,"Counter({0: 92, 1: 74, 2: 51})"
3,0.16,0.38,"[19, 22, 41, 43, 46, 68, 89, 94, 96, 118, 122,...",0.5,"[0, 0, 3, 0, 0, 3, 3, 3, 1, 1, 1, 1, 1, 3, 0, ...",0.8,0.35,133.0,"Counter({0: 64, 1: 55, 2: 51, 3: 47})"
4,0.18,0.84,"[1, 2, 4, 10, 14, 15, 16, 21, 25, 28, 31, 33, ...",0.89,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",1.36,0.47,152.0,"Counter({0: 94, 1: 85, 3: 19, 2: 19})"
5,0.14,0.54,"[5, 13, 19, 63, 93, 107, 117, 122, 136, 167, 1...",0.59,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.66,0.39,73.0,"Counter({0: 85, 1: 55, 2: 42, 3: 35})"
6,0.17,0.58,"[0, 1, 10, 13, 29, 40, 63, 93, 96, 101, 122, 1...",0.67,"[2, 2, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, 0, 0, ...",1.05,0.38,110.0,"Counter({0: 86, 1: 54, 2: 46, 3: 31})"
7,0.14,0.66,"[3, 7, 13, 68, 136, 217, 229, 279, 303, 452, 4...",0.68,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.39,0.41,34.0,"Counter({0: 102, 1: 74, 2: 31, 3: 10})"
8,0.18,0.88,"[4, 28, 33, 47, 93, 153, 178, 244, 263, 270, 2...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.57,0.41,49.0,"Counter({0: 102, 1: 86, 2: 29})"


adding 18 random population


 97%|█████████▋| 30/31 [31:43<00:49, 49.24s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.19,0.15,"[43, 212, 232, 304, 312, 321, 336, 340, 357, 3...",0.22,"[0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, ...",0.43,0.32,43.0,"Counter({0: 90, 1: 76, 2: 51})"
1,0.18,0.73,"[0, 16, 21, 25, 28, 31, 33, 54, 73, 74, 78, 11...",0.84,"[1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 0, 0, 0, 1, 1, ...",1.04,0.36,102.0,"Counter({0: 85, 1: 71, 2: 42, 3: 19})"
2,0.19,0.53,"[6, 19, 40, 68, 118, 123, 127, 176, 245, 356, ...",0.57,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.57,0.39,53.0,"Counter({0: 92, 1: 74, 2: 51})"
3,0.16,0.38,"[19, 22, 41, 43, 46, 68, 89, 94, 96, 118, 122,...",0.5,"[0, 0, 3, 0, 0, 3, 3, 3, 1, 1, 1, 1, 1, 3, 0, ...",0.8,0.35,133.0,"Counter({0: 64, 1: 55, 2: 51, 3: 47})"
4,0.18,0.84,"[1, 2, 4, 10, 14, 15, 16, 21, 25, 28, 31, 33, ...",0.89,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",1.36,0.47,152.0,"Counter({0: 94, 1: 85, 3: 19, 2: 19})"
5,0.14,0.54,"[5, 13, 19, 63, 93, 107, 117, 122, 136, 167, 1...",0.59,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.66,0.39,73.0,"Counter({0: 85, 1: 55, 2: 42, 3: 35})"
6,0.17,0.58,"[0, 1, 10, 13, 29, 40, 63, 93, 96, 101, 122, 1...",0.67,"[2, 2, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, 0, 0, ...",1.05,0.38,110.0,"Counter({0: 86, 1: 54, 2: 46, 3: 31})"
7,0.14,0.66,"[3, 7, 13, 68, 136, 217, 229, 279, 303, 452, 4...",0.68,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.39,0.41,34.0,"Counter({0: 102, 1: 74, 2: 31, 3: 10})"
8,0.18,0.88,"[4, 28, 33, 47, 93, 153, 178, 244, 263, 270, 2...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...",0.57,0.41,49.0,"Counter({0: 102, 1: 86, 2: 29})"
9,0.15,0.47,"[21, 31, 119, 136, 307, 310, 370, 401, 404, 46...",0.63,"[0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 2, 2, 1, 1, ...",0.4,0.27,24.0,"Counter({0: 67, 1: 64, 2: 55, 3: 31})"


adding 35 random population


100%|██████████| 31/31 [33:32<00:00, 64.93s/it]


TIME: GA: 33.54899654388428 min
TIME: Total: 33.564738070964815 min

##########  golub, (72, 7129)
*** Computing 1D feature ranking ...
Dispersion tests took 0.18 sec
Entropy computation 1.02 sec
KNN computation 1.85 sec
Sorting and thresholds 0.03 sec
Performing hierarchical clustering...
Hierarchical clustering 1.66 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 0.94 sec
Returning 1178 redundant features and  184 important features
TIME: 1d Features : 0.09500240882237752 min
TIME: 2d scores: 3.973642985026042e-09 min
*** Optimization algorithm 
Non redundant features 5951, orig size 7129, nb imp : 184
adding 50 random population


 10%|▉         | 3/31 [00:11<01:44,  3.72s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.05,"[19, 108, 116, 132, 149, 166, 207, 260, 294, 2...",0.05,"[1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.65,0.17,154.0,"Counter({0: 29, 1: 23, 2: 20})"


adding 22 random population


 19%|█▉        | 6/31 [00:55<03:47,  9.11s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.05,"[19, 108, 116, 132, 149, 166, 207, 260, 294, 2...",0.05,"[1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.65,0.17,154.0,"Counter({0: 29, 1: 23, 2: 20})"
1,0.14,0.09,"[19, 28, 91, 131, 149, 166, 207, 441, 611, 622...",0.18,"[0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 2, ...",0.37,0.16,62.0,"Counter({0: 27, 1: 24, 2: 21})"


adding 5 random population


 29%|██▉       | 9/31 [01:33<03:28,  9.47s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.05,"[19, 108, 116, 132, 149, 166, 207, 260, 294, 2...",0.05,"[1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.65,0.17,154.0,"Counter({0: 29, 1: 23, 2: 20})"
1,0.14,0.09,"[19, 28, 91, 131, 149, 166, 207, 441, 611, 622...",0.18,"[0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 2, ...",0.37,0.16,62.0,"Counter({0: 27, 1: 24, 2: 21})"
2,0.14,0.08,"[37, 113, 116, 149, 230, 236, 409, 480, 616, 6...",0.17,"[0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.61,0.24,154.0,"Counter({0: 33, 1: 24, 2: 15})"


adding 13 random population


 39%|███▊      | 12/31 [02:23<03:37, 11.44s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.05,"[19, 108, 116, 132, 149, 166, 207, 260, 294, 2...",0.05,"[1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.65,0.17,154.0,"Counter({0: 29, 1: 23, 2: 20})"
1,0.14,0.09,"[19, 28, 91, 131, 149, 166, 207, 441, 611, 622...",0.18,"[0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 2, ...",0.37,0.16,62.0,"Counter({0: 27, 1: 24, 2: 21})"
2,0.14,0.08,"[37, 113, 116, 149, 230, 236, 409, 480, 616, 6...",0.17,"[0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.61,0.24,154.0,"Counter({0: 33, 1: 24, 2: 15})"
3,0.15,0.07,"[57, 91, 113, 128, 131, 166, 210, 290, 294, 29...",0.16,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.65,0.27,161.0,"Counter({0: 31, 1: 27, 2: 14})"


adding 18 random population


 48%|████▊     | 15/31 [03:41<04:19, 16.21s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.05,"[19, 108, 116, 132, 149, 166, 207, 260, 294, 2...",0.05,"[1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.65,0.17,154.0,"Counter({0: 29, 1: 23, 2: 20})"
1,0.14,0.09,"[19, 28, 91, 131, 149, 166, 207, 441, 611, 622...",0.18,"[0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 2, ...",0.37,0.16,62.0,"Counter({0: 27, 1: 24, 2: 21})"
2,0.14,0.08,"[37, 113, 116, 149, 230, 236, 409, 480, 616, 6...",0.17,"[0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.61,0.24,154.0,"Counter({0: 33, 1: 24, 2: 15})"
3,0.15,0.07,"[57, 91, 113, 128, 131, 166, 210, 290, 294, 29...",0.16,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.65,0.27,161.0,"Counter({0: 31, 1: 27, 2: 14})"
4,0.15,0.12,"[0, 19, 34, 99, 142, 149, 166, 210, 230, 247, ...",0.2,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.62,0.26,152.0,"Counter({0: 28, 1: 28, 2: 16})"


adding 21 random population


 58%|█████▊    | 18/31 [04:39<03:14, 14.95s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.05,"[19, 108, 116, 132, 149, 166, 207, 260, 294, 2...",0.05,"[1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.65,0.17,154.0,"Counter({0: 29, 1: 23, 2: 20})"
1,0.14,0.09,"[19, 28, 91, 131, 149, 166, 207, 441, 611, 622...",0.18,"[0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 2, ...",0.37,0.16,62.0,"Counter({0: 27, 1: 24, 2: 21})"
2,0.14,0.08,"[37, 113, 116, 149, 230, 236, 409, 480, 616, 6...",0.17,"[0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.61,0.24,154.0,"Counter({0: 33, 1: 24, 2: 15})"
3,0.15,0.07,"[57, 91, 113, 128, 131, 166, 210, 290, 294, 29...",0.16,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.65,0.27,161.0,"Counter({0: 31, 1: 27, 2: 14})"
4,0.15,0.12,"[0, 19, 34, 99, 142, 149, 166, 210, 230, 247, ...",0.2,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.62,0.26,152.0,"Counter({0: 28, 1: 28, 2: 16})"
5,0.14,0.12,"[19, 93, 116, 121, 122, 128, 137, 142, 145, 14...",0.16,"[1, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.63,0.25,173.0,"Counter({0: 31, 1: 28, 2: 13})"


adding 17 random population


 68%|██████▊   | 21/31 [06:24<03:33, 21.31s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.05,"[19, 108, 116, 132, 149, 166, 207, 260, 294, 2...",0.05,"[1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.65,0.17,154.0,"Counter({0: 29, 1: 23, 2: 20})"
1,0.14,0.09,"[19, 28, 91, 131, 149, 166, 207, 441, 611, 622...",0.18,"[0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 2, ...",0.37,0.16,62.0,"Counter({0: 27, 1: 24, 2: 21})"
2,0.14,0.08,"[37, 113, 116, 149, 230, 236, 409, 480, 616, 6...",0.17,"[0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.61,0.24,154.0,"Counter({0: 33, 1: 24, 2: 15})"
3,0.15,0.07,"[57, 91, 113, 128, 131, 166, 210, 290, 294, 29...",0.16,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.65,0.27,161.0,"Counter({0: 31, 1: 27, 2: 14})"
4,0.15,0.12,"[0, 19, 34, 99, 142, 149, 166, 210, 230, 247, ...",0.2,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.62,0.26,152.0,"Counter({0: 28, 1: 28, 2: 16})"
5,0.14,0.12,"[19, 93, 116, 121, 122, 128, 137, 142, 145, 14...",0.16,"[1, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.63,0.25,173.0,"Counter({0: 31, 1: 28, 2: 13})"
6,0.14,0.29,"[1, 3, 31, 113, 114, 236, 247, 513, 674, 756, ...",0.37,"[0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 0, 0, 1, 1, 2, ...",0.37,0.2,57.0,"Counter({0: 30, 1: 22, 2: 20})"


adding 13 random population


 77%|███████▋  | 24/31 [07:03<01:36, 13.78s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.05,"[19, 108, 116, 132, 149, 166, 207, 260, 294, 2...",0.05,"[1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.65,0.17,154.0,"Counter({0: 29, 1: 23, 2: 20})"
1,0.14,0.09,"[19, 28, 91, 131, 149, 166, 207, 441, 611, 622...",0.18,"[0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 2, ...",0.37,0.16,62.0,"Counter({0: 27, 1: 24, 2: 21})"
2,0.14,0.08,"[37, 113, 116, 149, 230, 236, 409, 480, 616, 6...",0.17,"[0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.61,0.24,154.0,"Counter({0: 33, 1: 24, 2: 15})"
3,0.15,0.07,"[57, 91, 113, 128, 131, 166, 210, 290, 294, 29...",0.16,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.65,0.27,161.0,"Counter({0: 31, 1: 27, 2: 14})"
4,0.15,0.12,"[0, 19, 34, 99, 142, 149, 166, 210, 230, 247, ...",0.2,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.62,0.26,152.0,"Counter({0: 28, 1: 28, 2: 16})"
5,0.14,0.12,"[19, 93, 116, 121, 122, 128, 137, 142, 145, 14...",0.16,"[1, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.63,0.25,173.0,"Counter({0: 31, 1: 28, 2: 13})"
6,0.14,0.29,"[1, 3, 31, 113, 114, 236, 247, 513, 674, 756, ...",0.37,"[0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 0, 0, 1, 1, 2, ...",0.37,0.2,57.0,"Counter({0: 30, 1: 22, 2: 20})"
7,0.16,0.28,"[554, 885, 1355, 2048, 2874, 3270, 3754, 4031,...",0.31,"[0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, ...",0.25,0.34,12.0,"Counter({0: 30, 1: 27, 2: 15})"


adding 23 random population


 87%|████████▋ | 27/31 [07:24<00:34,  8.62s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.05,"[19, 108, 116, 132, 149, 166, 207, 260, 294, 2...",0.05,"[1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.65,0.17,154.0,"Counter({0: 29, 1: 23, 2: 20})"
1,0.14,0.09,"[19, 28, 91, 131, 149, 166, 207, 441, 611, 622...",0.18,"[0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 2, ...",0.37,0.16,62.0,"Counter({0: 27, 1: 24, 2: 21})"
2,0.14,0.08,"[37, 113, 116, 149, 230, 236, 409, 480, 616, 6...",0.17,"[0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.61,0.24,154.0,"Counter({0: 33, 1: 24, 2: 15})"
3,0.15,0.07,"[57, 91, 113, 128, 131, 166, 210, 290, 294, 29...",0.16,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.65,0.27,161.0,"Counter({0: 31, 1: 27, 2: 14})"
4,0.15,0.12,"[0, 19, 34, 99, 142, 149, 166, 210, 230, 247, ...",0.2,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.62,0.26,152.0,"Counter({0: 28, 1: 28, 2: 16})"
5,0.14,0.12,"[19, 93, 116, 121, 122, 128, 137, 142, 145, 14...",0.16,"[1, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.63,0.25,173.0,"Counter({0: 31, 1: 28, 2: 13})"
6,0.14,0.29,"[1, 3, 31, 113, 114, 236, 247, 513, 674, 756, ...",0.37,"[0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 0, 0, 1, 1, 2, ...",0.37,0.2,57.0,"Counter({0: 30, 1: 22, 2: 20})"
7,0.16,0.28,"[554, 885, 1355, 2048, 2874, 3270, 3754, 4031,...",0.31,"[0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, ...",0.25,0.34,12.0,"Counter({0: 30, 1: 27, 2: 15})"
8,0.15,0.11,"[3064, 5708, 6782, 6831]",0.15,"[2, 0, 1, 2, 1, 2, 1, 1, 0, 0, 0, 0, 1, 1, 1, ...",0.15,0.36,4.0,"Counter({0: 27, 1: 24, 2: 21})"


adding 1 random population


 97%|█████████▋| 30/31 [07:44<00:06,  6.67s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.05,"[19, 108, 116, 132, 149, 166, 207, 260, 294, 2...",0.05,"[1, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.65,0.17,154.0,"Counter({0: 29, 1: 23, 2: 20})"
1,0.14,0.09,"[19, 28, 91, 131, 149, 166, 207, 441, 611, 622...",0.18,"[0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 2, ...",0.37,0.16,62.0,"Counter({0: 27, 1: 24, 2: 21})"
2,0.14,0.08,"[37, 113, 116, 149, 230, 236, 409, 480, 616, 6...",0.17,"[0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.61,0.24,154.0,"Counter({0: 33, 1: 24, 2: 15})"
3,0.15,0.07,"[57, 91, 113, 128, 131, 166, 210, 290, 294, 29...",0.16,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.65,0.27,161.0,"Counter({0: 31, 1: 27, 2: 14})"
4,0.15,0.12,"[0, 19, 34, 99, 142, 149, 166, 210, 230, 247, ...",0.2,"[0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.62,0.26,152.0,"Counter({0: 28, 1: 28, 2: 16})"
5,0.14,0.12,"[19, 93, 116, 121, 122, 128, 137, 142, 145, 14...",0.16,"[1, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, ...",0.63,0.25,173.0,"Counter({0: 31, 1: 28, 2: 13})"
6,0.14,0.29,"[1, 3, 31, 113, 114, 236, 247, 513, 674, 756, ...",0.37,"[0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 0, 0, 1, 1, 2, ...",0.37,0.2,57.0,"Counter({0: 30, 1: 22, 2: 20})"
7,0.16,0.28,"[554, 885, 1355, 2048, 2874, 3270, 3754, 4031,...",0.31,"[0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, ...",0.25,0.34,12.0,"Counter({0: 30, 1: 27, 2: 15})"
8,0.15,0.11,"[3064, 5708, 6782, 6831]",0.15,"[2, 0, 1, 2, 1, 2, 1, 1, 0, 0, 0, 0, 1, 1, 1, ...",0.15,0.36,4.0,"Counter({0: 27, 1: 24, 2: 21})"
9,0.16,0.09,"[19, 116, 303, 355, 626, 889, 1375, 1809, 2299...",0.1,"[2, 0, 1, 2, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, ...",0.27,0.28,20.0,"Counter({0: 28, 1: 27, 2: 17})"


adding 3 random population


100%|██████████| 31/31 [07:58<00:00, 15.44s/it]


TIME: GA: 7.975339512030284 min
TIME: Total: 8.070342334111531 min

##########  gordon, (181, 12533)
*** Computing 1D feature ranking ...
Dispersion tests took 0.62 sec
Entropy computation 2.53 sec
KNN computation 17.04 sec
Sorting and thresholds 0.04 sec
Loading clustering from file
Hierarchical clustering 0.04 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 1.12 sec
Returning 733 redundant features and  351 important features
TIME: 1d Features : 0.3569863160451253 min
TIME: 2d scores: 3.973642985026042e-09 min
*** Optimization algorithm 
Non redundant features 11800, orig size 12533, nb imp : 351
adding 50 random population


 10%|▉         | 3/31 [00:32<04:49, 10.34s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.13,0.0,"[282, 718, 2205, 5906, 6888, 8392, 10470]",0.12,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",0.14,0.28,7.0,"Counter({0: 62, 1: 43, 2: 42, 3: 34})"


adding 2 random population


 19%|█▉        | 6/31 [01:31<06:15, 15.03s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.13,0.0,"[282, 718, 2205, 5906, 6888, 8392, 10470]",0.12,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",0.14,0.28,7.0,"Counter({0: 62, 1: 43, 2: 42, 3: 34})"
1,0.11,0.07,"[2881, 3175, 5391, 12247]",0.17,"[2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 2, ...",0.11,0.3,4.0,"Counter({0: 66, 1: 56, 2: 38, 3: 21})"


adding 5 random population


 29%|██▉       | 9/31 [02:29<05:50, 15.92s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.13,0.0,"[282, 718, 2205, 5906, 6888, 8392, 10470]",0.12,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",0.14,0.28,7.0,"Counter({0: 62, 1: 43, 2: 42, 3: 34})"
1,0.11,0.07,"[2881, 3175, 5391, 12247]",0.17,"[2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 2, ...",0.11,0.3,4.0,"Counter({0: 66, 1: 56, 2: 38, 3: 21})"
2,0.11,0.01,"[446, 2775, 6516, 7578]",0.01,"[2, 1, 3, 1, 0, 3, 3, 0, 2, 1, 2, 1, 3, 1, 2, ...",0.09,0.28,4.0,"Counter({0: 57, 1: 46, 2: 45, 3: 33})"


adding 2 random population


 39%|███▊      | 12/31 [03:29<05:14, 16.53s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.13,0.0,"[282, 718, 2205, 5906, 6888, 8392, 10470]",0.12,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",0.14,0.28,7.0,"Counter({0: 62, 1: 43, 2: 42, 3: 34})"
1,0.11,0.07,"[2881, 3175, 5391, 12247]",0.17,"[2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 2, ...",0.11,0.3,4.0,"Counter({0: 66, 1: 56, 2: 38, 3: 21})"
2,0.11,0.01,"[446, 2775, 6516, 7578]",0.01,"[2, 1, 3, 1, 0, 3, 3, 0, 2, 1, 2, 1, 3, 1, 2, ...",0.09,0.28,4.0,"Counter({0: 57, 1: 46, 2: 45, 3: 33})"
3,0.11,0.01,"[339, 616, 2556, 3465, 4045, 4600, 5631, 11792]",0.04,"[1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, ...",0.1,0.17,8.0,"Counter({1: 50, 0: 50, 2: 42, 3: 39})"


adding 7 random population


 48%|████▊     | 15/31 [04:38<04:49, 18.09s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.13,0.0,"[282, 718, 2205, 5906, 6888, 8392, 10470]",0.12,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",0.14,0.28,7.0,"Counter({0: 62, 1: 43, 2: 42, 3: 34})"
1,0.11,0.07,"[2881, 3175, 5391, 12247]",0.17,"[2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 2, ...",0.11,0.3,4.0,"Counter({0: 66, 1: 56, 2: 38, 3: 21})"
2,0.11,0.01,"[446, 2775, 6516, 7578]",0.01,"[2, 1, 3, 1, 0, 3, 3, 0, 2, 1, 2, 1, 3, 1, 2, ...",0.09,0.28,4.0,"Counter({0: 57, 1: 46, 2: 45, 3: 33})"
3,0.11,0.01,"[339, 616, 2556, 3465, 4045, 4600, 5631, 11792]",0.04,"[1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, ...",0.1,0.17,8.0,"Counter({1: 50, 0: 50, 2: 42, 3: 39})"
4,0.11,-0.01,"[78, 409, 753, 1796, 2300, 2647, 2977, 4145, 4...",0.05,"[1, 3, 0, 3, 0, 0, 1, 3, 3, 1, 3, 0, 0, 0, 0, ...",0.17,0.18,15.0,"Counter({0: 68, 1: 52, 2: 32, 3: 29})"


adding 7 random population


 58%|█████▊    | 18/31 [05:48<04:05, 18.86s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.13,0.0,"[282, 718, 2205, 5906, 6888, 8392, 10470]",0.12,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",0.14,0.28,7.0,"Counter({0: 62, 1: 43, 2: 42, 3: 34})"
1,0.11,0.07,"[2881, 3175, 5391, 12247]",0.17,"[2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 2, ...",0.11,0.3,4.0,"Counter({0: 66, 1: 56, 2: 38, 3: 21})"
2,0.11,0.01,"[446, 2775, 6516, 7578]",0.01,"[2, 1, 3, 1, 0, 3, 3, 0, 2, 1, 2, 1, 3, 1, 2, ...",0.09,0.28,4.0,"Counter({0: 57, 1: 46, 2: 45, 3: 33})"
3,0.11,0.01,"[339, 616, 2556, 3465, 4045, 4600, 5631, 11792]",0.04,"[1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, ...",0.1,0.17,8.0,"Counter({1: 50, 0: 50, 2: 42, 3: 39})"
4,0.11,-0.01,"[78, 409, 753, 1796, 2300, 2647, 2977, 4145, 4...",0.05,"[1, 3, 0, 3, 0, 0, 1, 3, 3, 1, 3, 0, 0, 0, 0, ...",0.17,0.18,15.0,"Counter({0: 68, 1: 52, 2: 32, 3: 29})"
5,0.11,0.1,"[328, 547, 715, 2750, 3193, 3659, 3893, 4073, ...",0.19,"[1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, ...",0.19,0.17,29.0,"Counter({0: 70, 1: 46, 2: 41, 3: 24})"


adding 10 random population


 68%|██████▊   | 21/31 [07:15<03:35, 21.57s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.13,0.0,"[282, 718, 2205, 5906, 6888, 8392, 10470]",0.12,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",0.14,0.28,7.0,"Counter({0: 62, 1: 43, 2: 42, 3: 34})"
1,0.11,0.07,"[2881, 3175, 5391, 12247]",0.17,"[2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 2, ...",0.11,0.3,4.0,"Counter({0: 66, 1: 56, 2: 38, 3: 21})"
2,0.11,0.01,"[446, 2775, 6516, 7578]",0.01,"[2, 1, 3, 1, 0, 3, 3, 0, 2, 1, 2, 1, 3, 1, 2, ...",0.09,0.28,4.0,"Counter({0: 57, 1: 46, 2: 45, 3: 33})"
3,0.11,0.01,"[339, 616, 2556, 3465, 4045, 4600, 5631, 11792]",0.04,"[1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, ...",0.1,0.17,8.0,"Counter({1: 50, 0: 50, 2: 42, 3: 39})"
4,0.11,-0.01,"[78, 409, 753, 1796, 2300, 2647, 2977, 4145, 4...",0.05,"[1, 3, 0, 3, 0, 0, 1, 3, 3, 1, 3, 0, 0, 0, 0, ...",0.17,0.18,15.0,"Counter({0: 68, 1: 52, 2: 32, 3: 29})"
5,0.11,0.1,"[328, 547, 715, 2750, 3193, 3659, 3893, 4073, ...",0.19,"[1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, ...",0.19,0.17,29.0,"Counter({0: 70, 1: 46, 2: 41, 3: 24})"
6,0.12,0.02,"[718, 2754, 3932, 3997, 4601, 4729, 4779, 4931...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 3, 1, ...",0.17,0.12,25.0,"Counter({0: 54, 1: 54, 2: 39, 3: 34})"


adding 13 random population


 77%|███████▋  | 24/31 [09:14<03:10, 27.25s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.13,0.0,"[282, 718, 2205, 5906, 6888, 8392, 10470]",0.12,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",0.14,0.28,7.0,"Counter({0: 62, 1: 43, 2: 42, 3: 34})"
1,0.11,0.07,"[2881, 3175, 5391, 12247]",0.17,"[2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 2, ...",0.11,0.3,4.0,"Counter({0: 66, 1: 56, 2: 38, 3: 21})"
2,0.11,0.01,"[446, 2775, 6516, 7578]",0.01,"[2, 1, 3, 1, 0, 3, 3, 0, 2, 1, 2, 1, 3, 1, 2, ...",0.09,0.28,4.0,"Counter({0: 57, 1: 46, 2: 45, 3: 33})"
3,0.11,0.01,"[339, 616, 2556, 3465, 4045, 4600, 5631, 11792]",0.04,"[1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, ...",0.1,0.17,8.0,"Counter({1: 50, 0: 50, 2: 42, 3: 39})"
4,0.11,-0.01,"[78, 409, 753, 1796, 2300, 2647, 2977, 4145, 4...",0.05,"[1, 3, 0, 3, 0, 0, 1, 3, 3, 1, 3, 0, 0, 0, 0, ...",0.17,0.18,15.0,"Counter({0: 68, 1: 52, 2: 32, 3: 29})"
5,0.11,0.1,"[328, 547, 715, 2750, 3193, 3659, 3893, 4073, ...",0.19,"[1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, ...",0.19,0.17,29.0,"Counter({0: 70, 1: 46, 2: 41, 3: 24})"
6,0.12,0.02,"[718, 2754, 3932, 3997, 4601, 4729, 4779, 4931...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 3, 1, ...",0.17,0.12,25.0,"Counter({0: 54, 1: 54, 2: 39, 3: 34})"
7,0.11,0.18,"[0, 1, 2, 3, 33, 182, 330, 497, 533, 534, 1259...",0.34,"[3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, ...",0.33,0.17,73.0,"Counter({0: 63, 1: 62, 2: 33, 3: 23})"


adding 17 random population


 87%|████████▋ | 27/31 [10:58<01:47, 26.80s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.13,0.0,"[282, 718, 2205, 5906, 6888, 8392, 10470]",0.12,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",0.14,0.28,7.0,"Counter({0: 62, 1: 43, 2: 42, 3: 34})"
1,0.11,0.07,"[2881, 3175, 5391, 12247]",0.17,"[2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 2, ...",0.11,0.3,4.0,"Counter({0: 66, 1: 56, 2: 38, 3: 21})"
2,0.11,0.01,"[446, 2775, 6516, 7578]",0.01,"[2, 1, 3, 1, 0, 3, 3, 0, 2, 1, 2, 1, 3, 1, 2, ...",0.09,0.28,4.0,"Counter({0: 57, 1: 46, 2: 45, 3: 33})"
3,0.11,0.01,"[339, 616, 2556, 3465, 4045, 4600, 5631, 11792]",0.04,"[1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, ...",0.1,0.17,8.0,"Counter({1: 50, 0: 50, 2: 42, 3: 39})"
4,0.11,-0.01,"[78, 409, 753, 1796, 2300, 2647, 2977, 4145, 4...",0.05,"[1, 3, 0, 3, 0, 0, 1, 3, 3, 1, 3, 0, 0, 0, 0, ...",0.17,0.18,15.0,"Counter({0: 68, 1: 52, 2: 32, 3: 29})"
5,0.11,0.1,"[328, 547, 715, 2750, 3193, 3659, 3893, 4073, ...",0.19,"[1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, ...",0.19,0.17,29.0,"Counter({0: 70, 1: 46, 2: 41, 3: 24})"
6,0.12,0.02,"[718, 2754, 3932, 3997, 4601, 4729, 4779, 4931...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 3, 1, ...",0.17,0.12,25.0,"Counter({0: 54, 1: 54, 2: 39, 3: 34})"
7,0.11,0.18,"[0, 1, 2, 3, 33, 182, 330, 497, 533, 534, 1259...",0.34,"[3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, ...",0.33,0.17,73.0,"Counter({0: 63, 1: 62, 2: 33, 3: 23})"
8,0.11,0.22,"[0, 1, 2, 33, 182, 339, 409, 419, 534, 625, 63...",0.42,"[3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...",0.29,0.14,62.0,"Counter({0: 65, 1: 56, 2: 33, 3: 27})"


adding 5 random population


 97%|█████████▋| 30/31 [13:47<00:36, 36.39s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.13,0.0,"[282, 718, 2205, 5906, 6888, 8392, 10470]",0.12,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",0.14,0.28,7.0,"Counter({0: 62, 1: 43, 2: 42, 3: 34})"
1,0.11,0.07,"[2881, 3175, 5391, 12247]",0.17,"[2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 2, ...",0.11,0.3,4.0,"Counter({0: 66, 1: 56, 2: 38, 3: 21})"
2,0.11,0.01,"[446, 2775, 6516, 7578]",0.01,"[2, 1, 3, 1, 0, 3, 3, 0, 2, 1, 2, 1, 3, 1, 2, ...",0.09,0.28,4.0,"Counter({0: 57, 1: 46, 2: 45, 3: 33})"
3,0.11,0.01,"[339, 616, 2556, 3465, 4045, 4600, 5631, 11792]",0.04,"[1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, ...",0.1,0.17,8.0,"Counter({1: 50, 0: 50, 2: 42, 3: 39})"
4,0.11,-0.01,"[78, 409, 753, 1796, 2300, 2647, 2977, 4145, 4...",0.05,"[1, 3, 0, 3, 0, 0, 1, 3, 3, 1, 3, 0, 0, 0, 0, ...",0.17,0.18,15.0,"Counter({0: 68, 1: 52, 2: 32, 3: 29})"
5,0.11,0.1,"[328, 547, 715, 2750, 3193, 3659, 3893, 4073, ...",0.19,"[1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, ...",0.19,0.17,29.0,"Counter({0: 70, 1: 46, 2: 41, 3: 24})"
6,0.12,0.02,"[718, 2754, 3932, 3997, 4601, 4729, 4779, 4931...",0.06,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 3, 1, ...",0.17,0.12,25.0,"Counter({0: 54, 1: 54, 2: 39, 3: 34})"
7,0.11,0.18,"[0, 1, 2, 3, 33, 182, 330, 497, 533, 534, 1259...",0.34,"[3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, ...",0.33,0.17,73.0,"Counter({0: 63, 1: 62, 2: 33, 3: 23})"
8,0.11,0.22,"[0, 1, 2, 33, 182, 339, 409, 419, 534, 625, 63...",0.42,"[3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...",0.29,0.14,62.0,"Counter({0: 65, 1: 56, 2: 33, 3: 27})"
9,0.13,0.08,"[12, 472, 748, 1104, 1663, 2139, 2208, 2249, 2...",0.07,"[0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 2, 2, 2, ...",0.35,0.17,75.0,"Counter({0: 75, 1: 72, 2: 34})"


adding 6 random population


100%|██████████| 31/31 [15:31<00:00, 30.04s/it]


TIME: GA: 15.520104229450226 min
TIME: Total: 15.877092826366425 min

##########  khan, (63, 2308)
*** Computing 1D feature ranking ...
Dispersion tests took 0.04 sec
Entropy computation 0.35 sec
KNN computation 0.19 sec
Sorting and thresholds 0.02 sec
Loading clustering from file
Hierarchical clustering 0.01 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 0.47 sec
Returning 381 redundant features and  83 important features
TIME: 1d Features : 0.01825172503789266 min
TIME: 2d scores: 3.973642985026042e-09 min
*** Optimization algorithm 
Non redundant features 1927, orig size 2308, nb imp : 83
adding 50 random population


 10%|▉         | 3/31 [00:10<01:35,  3.41s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.2,0.03,"[15, 35, 37, 60, 66, 97, 112, 148, 159, 208, 2...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.98,0.38,125.0,"Counter({0: 25, 1: 21, 2: 17})"


adding 18 random population


 19%|█▉        | 6/31 [01:49<07:32, 18.09s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.2,0.03,"[15, 35, 37, 60, 66, 97, 112, 148, 159, 208, 2...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.98,0.38,125.0,"Counter({0: 25, 1: 21, 2: 17})"
1,0.17,0.48,"[1, 7, 8, 230, 277, 494, 541, 761, 981, 1045, ...",0.5,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, ...",0.29,0.32,17.0,"Counter({0: 22, 1: 21, 2: 20})"


adding 11 random population


 29%|██▉       | 9/31 [02:09<03:30,  9.56s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.2,0.03,"[15, 35, 37, 60, 66, 97, 112, 148, 159, 208, 2...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.98,0.38,125.0,"Counter({0: 25, 1: 21, 2: 17})"
1,0.17,0.48,"[1, 7, 8, 230, 277, 494, 541, 761, 981, 1045, ...",0.5,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, ...",0.29,0.32,17.0,"Counter({0: 22, 1: 21, 2: 20})"
2,0.2,0.81,"[1, 3, 5, 7, 8, 17, 22, 208, 230, 277, 390, 54...",0.82,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0.42,0.44,20.0,"Counter({1: 23, 0: 23, 2: 17})"


adding 9 random population


 39%|███▊      | 12/31 [02:30<02:11,  6.91s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.2,0.03,"[15, 35, 37, 60, 66, 97, 112, 148, 159, 208, 2...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.98,0.38,125.0,"Counter({0: 25, 1: 21, 2: 17})"
1,0.17,0.48,"[1, 7, 8, 230, 277, 494, 541, 761, 981, 1045, ...",0.5,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, ...",0.29,0.32,17.0,"Counter({0: 22, 1: 21, 2: 20})"
2,0.2,0.81,"[1, 3, 5, 7, 8, 17, 22, 208, 230, 277, 390, 54...",0.82,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0.42,0.44,20.0,"Counter({1: 23, 0: 23, 2: 17})"
3,0.17,0.14,"[214, 233, 254, 315, 343, 443, 602, 696, 950, ...",0.26,"[0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, ...",0.27,0.26,17.0,"Counter({0: 27, 1: 19, 2: 17})"


adding 17 random population


 48%|████▊     | 15/31 [02:50<01:36,  6.02s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.2,0.03,"[15, 35, 37, 60, 66, 97, 112, 148, 159, 208, 2...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.98,0.38,125.0,"Counter({0: 25, 1: 21, 2: 17})"
1,0.17,0.48,"[1, 7, 8, 230, 277, 494, 541, 761, 981, 1045, ...",0.5,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, ...",0.29,0.32,17.0,"Counter({0: 22, 1: 21, 2: 20})"
2,0.2,0.81,"[1, 3, 5, 7, 8, 17, 22, 208, 230, 277, 390, 54...",0.82,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0.42,0.44,20.0,"Counter({1: 23, 0: 23, 2: 17})"
3,0.17,0.14,"[214, 233, 254, 315, 343, 443, 602, 696, 950, ...",0.26,"[0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, ...",0.27,0.26,17.0,"Counter({0: 27, 1: 19, 2: 17})"
4,0.17,0.05,"[635, 2017, 2021, 2049, 2056, 2147, 2299]",0.11,"[1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 0, ...",0.17,0.25,7.0,"Counter({0: 27, 1: 21, 2: 15})"


adding 6 random population


 58%|█████▊    | 18/31 [03:07<01:09,  5.34s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.2,0.03,"[15, 35, 37, 60, 66, 97, 112, 148, 159, 208, 2...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.98,0.38,125.0,"Counter({0: 25, 1: 21, 2: 17})"
1,0.17,0.48,"[1, 7, 8, 230, 277, 494, 541, 761, 981, 1045, ...",0.5,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, ...",0.29,0.32,17.0,"Counter({0: 22, 1: 21, 2: 20})"
2,0.2,0.81,"[1, 3, 5, 7, 8, 17, 22, 208, 230, 277, 390, 54...",0.82,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0.42,0.44,20.0,"Counter({1: 23, 0: 23, 2: 17})"
3,0.17,0.14,"[214, 233, 254, 315, 343, 443, 602, 696, 950, ...",0.26,"[0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, ...",0.27,0.26,17.0,"Counter({0: 27, 1: 19, 2: 17})"
4,0.17,0.05,"[635, 2017, 2021, 2049, 2056, 2147, 2299]",0.11,"[1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 0, ...",0.17,0.25,7.0,"Counter({0: 27, 1: 21, 2: 15})"
5,0.21,0.02,"[34, 35, 37, 60, 112, 290, 306, 402, 406, 478,...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.86,0.37,83.0,"Counter({0: 25, 1: 22, 2: 16})"


adding 1 random population


 68%|██████▊   | 21/31 [03:40<01:13,  7.37s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.2,0.03,"[15, 35, 37, 60, 66, 97, 112, 148, 159, 208, 2...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.98,0.38,125.0,"Counter({0: 25, 1: 21, 2: 17})"
1,0.17,0.48,"[1, 7, 8, 230, 277, 494, 541, 761, 981, 1045, ...",0.5,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, ...",0.29,0.32,17.0,"Counter({0: 22, 1: 21, 2: 20})"
2,0.2,0.81,"[1, 3, 5, 7, 8, 17, 22, 208, 230, 277, 390, 54...",0.82,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0.42,0.44,20.0,"Counter({1: 23, 0: 23, 2: 17})"
3,0.17,0.14,"[214, 233, 254, 315, 343, 443, 602, 696, 950, ...",0.26,"[0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, ...",0.27,0.26,17.0,"Counter({0: 27, 1: 19, 2: 17})"
4,0.17,0.05,"[635, 2017, 2021, 2049, 2056, 2147, 2299]",0.11,"[1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 0, ...",0.17,0.25,7.0,"Counter({0: 27, 1: 21, 2: 15})"
5,0.21,0.02,"[34, 35, 37, 60, 112, 290, 306, 402, 406, 478,...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.86,0.37,83.0,"Counter({0: 25, 1: 22, 2: 16})"
6,0.16,0.25,"[186, 208, 213, 228, 390, 556, 558, 650, 662, ...",0.37,"[1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 2, ...",0.27,0.24,18.0,"Counter({0: 23, 1: 20, 2: 20})"


adding 12 random population


 77%|███████▋  | 24/31 [04:00<00:42,  6.07s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.2,0.03,"[15, 35, 37, 60, 66, 97, 112, 148, 159, 208, 2...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.98,0.38,125.0,"Counter({0: 25, 1: 21, 2: 17})"
1,0.17,0.48,"[1, 7, 8, 230, 277, 494, 541, 761, 981, 1045, ...",0.5,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, ...",0.29,0.32,17.0,"Counter({0: 22, 1: 21, 2: 20})"
2,0.2,0.81,"[1, 3, 5, 7, 8, 17, 22, 208, 230, 277, 390, 54...",0.82,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0.42,0.44,20.0,"Counter({1: 23, 0: 23, 2: 17})"
3,0.17,0.14,"[214, 233, 254, 315, 343, 443, 602, 696, 950, ...",0.26,"[0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, ...",0.27,0.26,17.0,"Counter({0: 27, 1: 19, 2: 17})"
4,0.17,0.05,"[635, 2017, 2021, 2049, 2056, 2147, 2299]",0.11,"[1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 0, ...",0.17,0.25,7.0,"Counter({0: 27, 1: 21, 2: 15})"
5,0.21,0.02,"[34, 35, 37, 60, 112, 290, 306, 402, 406, 478,...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.86,0.37,83.0,"Counter({0: 25, 1: 22, 2: 16})"
6,0.16,0.25,"[186, 208, 213, 228, 390, 556, 558, 650, 662, ...",0.37,"[1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 2, ...",0.27,0.24,18.0,"Counter({0: 23, 1: 20, 2: 20})"
7,0.18,0.15,"[32, 34, 37, 60, 112, 148, 306, 335, 342, 402,...",0.23,"[1, 1, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 1, 1, 0, ...",0.8,0.29,130.0,"Counter({0: 24, 1: 21, 2: 18})"


adding 5 random population


 87%|████████▋ | 27/31 [05:02<00:48, 12.02s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.2,0.03,"[15, 35, 37, 60, 66, 97, 112, 148, 159, 208, 2...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.98,0.38,125.0,"Counter({0: 25, 1: 21, 2: 17})"
1,0.17,0.48,"[1, 7, 8, 230, 277, 494, 541, 761, 981, 1045, ...",0.5,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, ...",0.29,0.32,17.0,"Counter({0: 22, 1: 21, 2: 20})"
2,0.2,0.81,"[1, 3, 5, 7, 8, 17, 22, 208, 230, 277, 390, 54...",0.82,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0.42,0.44,20.0,"Counter({1: 23, 0: 23, 2: 17})"
3,0.17,0.14,"[214, 233, 254, 315, 343, 443, 602, 696, 950, ...",0.26,"[0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, ...",0.27,0.26,17.0,"Counter({0: 27, 1: 19, 2: 17})"
4,0.17,0.05,"[635, 2017, 2021, 2049, 2056, 2147, 2299]",0.11,"[1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 0, ...",0.17,0.25,7.0,"Counter({0: 27, 1: 21, 2: 15})"
5,0.21,0.02,"[34, 35, 37, 60, 112, 290, 306, 402, 406, 478,...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.86,0.37,83.0,"Counter({0: 25, 1: 22, 2: 16})"
6,0.16,0.25,"[186, 208, 213, 228, 390, 556, 558, 650, 662, ...",0.37,"[1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 2, ...",0.27,0.24,18.0,"Counter({0: 23, 1: 20, 2: 20})"
7,0.18,0.15,"[32, 34, 37, 60, 112, 148, 306, 335, 342, 402,...",0.23,"[1, 1, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 1, 1, 0, ...",0.8,0.29,130.0,"Counter({0: 24, 1: 21, 2: 18})"
8,0.16,0.08,"[22, 32, 39, 60, 66, 91, 97, 99, 118, 120, 148...",0.18,"[2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 1, 1, 1, ...",0.7,0.31,123.0,"Counter({0: 27, 1: 22, 2: 14})"


adding 40 random population


 97%|█████████▋| 30/31 [06:05<00:13, 13.99s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.2,0.03,"[15, 35, 37, 60, 66, 97, 112, 148, 159, 208, 2...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.98,0.38,125.0,"Counter({0: 25, 1: 21, 2: 17})"
1,0.17,0.48,"[1, 7, 8, 230, 277, 494, 541, 761, 981, 1045, ...",0.5,"[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, ...",0.29,0.32,17.0,"Counter({0: 22, 1: 21, 2: 20})"
2,0.2,0.81,"[1, 3, 5, 7, 8, 17, 22, 208, 230, 277, 390, 54...",0.82,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0.42,0.44,20.0,"Counter({1: 23, 0: 23, 2: 17})"
3,0.17,0.14,"[214, 233, 254, 315, 343, 443, 602, 696, 950, ...",0.26,"[0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, ...",0.27,0.26,17.0,"Counter({0: 27, 1: 19, 2: 17})"
4,0.17,0.05,"[635, 2017, 2021, 2049, 2056, 2147, 2299]",0.11,"[1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 0, ...",0.17,0.25,7.0,"Counter({0: 27, 1: 21, 2: 15})"
5,0.21,0.02,"[34, 35, 37, 60, 112, 290, 306, 402, 406, 478,...",0.12,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, ...",0.86,0.37,83.0,"Counter({0: 25, 1: 22, 2: 16})"
6,0.16,0.25,"[186, 208, 213, 228, 390, 556, 558, 650, 662, ...",0.37,"[1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 2, ...",0.27,0.24,18.0,"Counter({0: 23, 1: 20, 2: 20})"
7,0.18,0.15,"[32, 34, 37, 60, 112, 148, 306, 335, 342, 402,...",0.23,"[1, 1, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 1, 1, 0, ...",0.8,0.29,130.0,"Counter({0: 24, 1: 21, 2: 18})"
8,0.16,0.08,"[22, 32, 39, 60, 66, 91, 97, 99, 118, 120, 148...",0.18,"[2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 1, 1, 1, ...",0.7,0.31,123.0,"Counter({0: 27, 1: 22, 2: 14})"
9,0.17,0.21,"[39, 155, 171, 234, 235, 450, 500, 520, 758, 1...",0.35,"[0, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 1, ...",0.3,0.27,23.0,"Counter({0: 23, 1: 21, 2: 19})"


adding 8 random population


100%|██████████| 31/31 [06:28<00:00, 12.54s/it]


TIME: GA: 6.4802716533343 min
TIME: Total: 6.498523759841919 min

##########  sorlie, (85, 456)
*** Computing 1D feature ranking ...
Dispersion tests took 0.01 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Entropy computation 0.11 sec
KNN computation 0.01 sec
Sorting and thresholds 0.01 sec
Performing hierarchical clustering...
Hierarchical clustering 0.01 sec
Handle redundant features 0.05 sec
Returning 26 redundant features and  13 important features
TIME: 1d Features : 0.0033613125483194987 min
TIME: 2d scores: 3.973642985026042e-09 min
*** Optimization algorithm 
Non redundant features 430, orig size 456, nb imp : 13
adding 50 random population


 10%|▉         | 3/31 [00:12<01:55,  4.14s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.14,0.09,"[255, 256, 357, 381, 399, 403]",0.19,"[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, ...",0.14,0.29,6.0,"Counter({0: 33, 1: 30, 2: 22})"


adding 16 random population


 19%|█▉        | 6/31 [00:31<02:10,  5.21s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.14,0.09,"[255, 256, 357, 381, 399, 403]",0.19,"[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, ...",0.14,0.29,6.0,"Counter({0: 33, 1: 30, 2: 22})"
1,0.15,0.24,"[5, 55, 192]",0.25,"[1, 0, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 1, ...",0.13,0.4,3.0,"Counter({0: 31, 1: 29, 2: 25})"


adding 12 random population


 29%|██▉       | 9/31 [00:47<01:48,  4.93s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.14,0.09,"[255, 256, 357, 381, 399, 403]",0.19,"[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, ...",0.14,0.29,6.0,"Counter({0: 33, 1: 30, 2: 22})"
1,0.15,0.24,"[5, 55, 192]",0.25,"[1, 0, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 1, ...",0.13,0.4,3.0,"Counter({0: 31, 1: 29, 2: 25})"
2,0.15,0.17,"[7, 25, 391, 451, 452, 454]",0.26,"[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ...",0.15,0.26,6.0,"Counter({0: 34, 1: 28, 2: 23})"


adding 7 random population


 39%|███▊      | 12/31 [01:03<01:32,  4.86s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.14,0.09,"[255, 256, 357, 381, 399, 403]",0.19,"[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, ...",0.14,0.29,6.0,"Counter({0: 33, 1: 30, 2: 22})"
1,0.15,0.24,"[5, 55, 192]",0.25,"[1, 0, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 1, ...",0.13,0.4,3.0,"Counter({0: 31, 1: 29, 2: 25})"
2,0.15,0.17,"[7, 25, 391, 451, 452, 454]",0.26,"[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ...",0.15,0.26,6.0,"Counter({0: 34, 1: 28, 2: 23})"
3,0.14,0.15,"[9, 20, 367, 369, 370, 371]",0.27,"[2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, 1, ...",0.15,0.24,6.0,"Counter({0: 24, 2: 23, 1: 23, 3: 15})"


adding 8 random population


 48%|████▊     | 15/31 [01:19<01:16,  4.80s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.14,0.09,"[255, 256, 357, 381, 399, 403]",0.19,"[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, ...",0.14,0.29,6.0,"Counter({0: 33, 1: 30, 2: 22})"
1,0.15,0.24,"[5, 55, 192]",0.25,"[1, 0, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 1, ...",0.13,0.4,3.0,"Counter({0: 31, 1: 29, 2: 25})"
2,0.15,0.17,"[7, 25, 391, 451, 452, 454]",0.26,"[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ...",0.15,0.26,6.0,"Counter({0: 34, 1: 28, 2: 23})"
3,0.14,0.15,"[9, 20, 367, 369, 370, 371]",0.27,"[2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, 1, ...",0.15,0.24,6.0,"Counter({0: 24, 2: 23, 1: 23, 3: 15})"
4,0.14,0.19,"[206, 217, 218]",0.24,"[0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.12,0.36,3.0,"Counter({0: 35, 1: 32, 2: 18})"


adding 5 random population


 58%|█████▊    | 18/31 [01:34<01:01,  4.71s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.14,0.09,"[255, 256, 357, 381, 399, 403]",0.19,"[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, ...",0.14,0.29,6.0,"Counter({0: 33, 1: 30, 2: 22})"
1,0.15,0.24,"[5, 55, 192]",0.25,"[1, 0, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 1, ...",0.13,0.4,3.0,"Counter({0: 31, 1: 29, 2: 25})"
2,0.15,0.17,"[7, 25, 391, 451, 452, 454]",0.26,"[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ...",0.15,0.26,6.0,"Counter({0: 34, 1: 28, 2: 23})"
3,0.14,0.15,"[9, 20, 367, 369, 370, 371]",0.27,"[2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, 1, ...",0.15,0.24,6.0,"Counter({0: 24, 2: 23, 1: 23, 3: 15})"
4,0.14,0.19,"[206, 217, 218]",0.24,"[0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.12,0.36,3.0,"Counter({0: 35, 1: 32, 2: 18})"
5,0.14,0.21,"[17, 27, 37, 90, 94, 107, 305]",0.32,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, ...",0.19,0.31,7.0,"Counter({0: 29, 1: 26, 2: 17, 3: 13})"


adding 28 random population


 68%|██████▊   | 21/31 [01:50<00:47,  4.76s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.14,0.09,"[255, 256, 357, 381, 399, 403]",0.19,"[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, ...",0.14,0.29,6.0,"Counter({0: 33, 1: 30, 2: 22})"
1,0.15,0.24,"[5, 55, 192]",0.25,"[1, 0, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 1, ...",0.13,0.4,3.0,"Counter({0: 31, 1: 29, 2: 25})"
2,0.15,0.17,"[7, 25, 391, 451, 452, 454]",0.26,"[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ...",0.15,0.26,6.0,"Counter({0: 34, 1: 28, 2: 23})"
3,0.14,0.15,"[9, 20, 367, 369, 370, 371]",0.27,"[2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, 1, ...",0.15,0.24,6.0,"Counter({0: 24, 2: 23, 1: 23, 3: 15})"
4,0.14,0.19,"[206, 217, 218]",0.24,"[0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.12,0.36,3.0,"Counter({0: 35, 1: 32, 2: 18})"
5,0.14,0.21,"[17, 27, 37, 90, 94, 107, 305]",0.32,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, ...",0.19,0.31,7.0,"Counter({0: 29, 1: 26, 2: 17, 3: 13})"
6,0.13,0.13,"[30, 49, 289, 375, 394, 409, 412]",0.21,"[0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, ...",0.12,0.27,7.0,"Counter({0: 32, 1: 27, 2: 26})"


adding 12 random population


 77%|███████▋  | 24/31 [02:08<00:34,  4.95s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.14,0.09,"[255, 256, 357, 381, 399, 403]",0.19,"[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, ...",0.14,0.29,6.0,"Counter({0: 33, 1: 30, 2: 22})"
1,0.15,0.24,"[5, 55, 192]",0.25,"[1, 0, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 1, ...",0.13,0.4,3.0,"Counter({0: 31, 1: 29, 2: 25})"
2,0.15,0.17,"[7, 25, 391, 451, 452, 454]",0.26,"[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ...",0.15,0.26,6.0,"Counter({0: 34, 1: 28, 2: 23})"
3,0.14,0.15,"[9, 20, 367, 369, 370, 371]",0.27,"[2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, 1, ...",0.15,0.24,6.0,"Counter({0: 24, 2: 23, 1: 23, 3: 15})"
4,0.14,0.19,"[206, 217, 218]",0.24,"[0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.12,0.36,3.0,"Counter({0: 35, 1: 32, 2: 18})"
5,0.14,0.21,"[17, 27, 37, 90, 94, 107, 305]",0.32,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, ...",0.19,0.31,7.0,"Counter({0: 29, 1: 26, 2: 17, 3: 13})"
6,0.13,0.13,"[30, 49, 289, 375, 394, 409, 412]",0.21,"[0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, ...",0.12,0.27,7.0,"Counter({0: 32, 1: 27, 2: 26})"
7,0.14,0.04,"[26, 419, 434]",0.12,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 0, 1, 2, 2, ...",0.13,0.33,3.0,"Counter({0: 29, 1: 28, 2: 28})"


adding 4 random population


 87%|████████▋ | 27/31 [02:24<00:19,  4.90s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.14,0.09,"[255, 256, 357, 381, 399, 403]",0.19,"[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, ...",0.14,0.29,6.0,"Counter({0: 33, 1: 30, 2: 22})"
1,0.15,0.24,"[5, 55, 192]",0.25,"[1, 0, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 1, ...",0.13,0.4,3.0,"Counter({0: 31, 1: 29, 2: 25})"
2,0.15,0.17,"[7, 25, 391, 451, 452, 454]",0.26,"[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ...",0.15,0.26,6.0,"Counter({0: 34, 1: 28, 2: 23})"
3,0.14,0.15,"[9, 20, 367, 369, 370, 371]",0.27,"[2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, 1, ...",0.15,0.24,6.0,"Counter({0: 24, 2: 23, 1: 23, 3: 15})"
4,0.14,0.19,"[206, 217, 218]",0.24,"[0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.12,0.36,3.0,"Counter({0: 35, 1: 32, 2: 18})"
5,0.14,0.21,"[17, 27, 37, 90, 94, 107, 305]",0.32,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, ...",0.19,0.31,7.0,"Counter({0: 29, 1: 26, 2: 17, 3: 13})"
6,0.13,0.13,"[30, 49, 289, 375, 394, 409, 412]",0.21,"[0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, ...",0.12,0.27,7.0,"Counter({0: 32, 1: 27, 2: 26})"
7,0.14,0.04,"[26, 419, 434]",0.12,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 0, 1, 2, 2, ...",0.13,0.33,3.0,"Counter({0: 29, 1: 28, 2: 28})"
8,0.13,0.08,"[23, 33, 83, 132, 149, 160, 162, 335, 382]",0.16,"[0, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, ...",0.13,0.26,9.0,"Counter({0: 38, 1: 24, 2: 23})"


adding 13 random population


 97%|█████████▋| 30/31 [02:42<00:05,  5.09s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.14,0.09,"[255, 256, 357, 381, 399, 403]",0.19,"[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, ...",0.14,0.29,6.0,"Counter({0: 33, 1: 30, 2: 22})"
1,0.15,0.24,"[5, 55, 192]",0.25,"[1, 0, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 1, ...",0.13,0.4,3.0,"Counter({0: 31, 1: 29, 2: 25})"
2,0.15,0.17,"[7, 25, 391, 451, 452, 454]",0.26,"[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ...",0.15,0.26,6.0,"Counter({0: 34, 1: 28, 2: 23})"
3,0.14,0.15,"[9, 20, 367, 369, 370, 371]",0.27,"[2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, 1, ...",0.15,0.24,6.0,"Counter({0: 24, 2: 23, 1: 23, 3: 15})"
4,0.14,0.19,"[206, 217, 218]",0.24,"[0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...",0.12,0.36,3.0,"Counter({0: 35, 1: 32, 2: 18})"
5,0.14,0.21,"[17, 27, 37, 90, 94, 107, 305]",0.32,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, ...",0.19,0.31,7.0,"Counter({0: 29, 1: 26, 2: 17, 3: 13})"
6,0.13,0.13,"[30, 49, 289, 375, 394, 409, 412]",0.21,"[0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, ...",0.12,0.27,7.0,"Counter({0: 32, 1: 27, 2: 26})"
7,0.14,0.04,"[26, 419, 434]",0.12,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 0, 1, 2, 2, ...",0.13,0.33,3.0,"Counter({0: 29, 1: 28, 2: 28})"
8,0.13,0.08,"[23, 33, 83, 132, 149, 160, 162, 335, 382]",0.16,"[0, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, ...",0.13,0.26,9.0,"Counter({0: 38, 1: 24, 2: 23})"
9,0.13,0.15,"[10, 20, 241, 243, 244, 363, 369, 370]",0.28,"[0, 0, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 3, 0, 3, ...",0.18,0.24,8.0,"Counter({0: 28, 1: 20, 2: 19, 3: 18})"


adding 11 random population


100%|██████████| 31/31 [02:52<00:00,  5.55s/it]


TIME: GA: 2.868309013048808 min
TIME: Total: 2.871670639514923 min

##########  su, (102, 5563)
*** Computing 1D feature ranking ...
Dispersion tests took 0.11 sec
Entropy computation 0.89 sec
KNN computation 1.5 sec
Sorting and thresholds 0.02 sec
Performing hierarchical clustering...
Hierarchical clustering 1.18 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 0.59 sec
Returning 603 redundant features and  185 important features
TIME: 1d Features : 0.07191680669784546 min
TIME: 2d scores: 7.947285970052083e-09 min
*** Optimization algorithm 
Non redundant features 4960, orig size 5563, nb imp : 185
adding 50 random population


 10%|▉         | 3/31 [00:14<02:15,  4.85s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.5,"[5, 87, 131, 592, 695, 1442, 1447, 1835, 2120,...",0.62,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, ...",0.37,0.36,19.0,"Counter({0: 47, 1: 33, 2: 22})"


adding 9 random population


 19%|█▉        | 6/31 [00:49<03:22,  8.08s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.5,"[5, 87, 131, 592, 695, 1442, 1447, 1835, 2120,...",0.62,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, ...",0.37,0.36,19.0,"Counter({0: 47, 1: 33, 2: 22})"
1,0.21,0.69,"[0, 1, 225, 242, 283, 284, 333, 348, 402, 574,...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.08,0.36,94.0,"Counter({0: 53, 1: 26, 2: 23})"


adding 37 random population


 29%|██▉       | 9/31 [02:03<05:30, 15.01s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.5,"[5, 87, 131, 592, 695, 1442, 1447, 1835, 2120,...",0.62,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, ...",0.37,0.36,19.0,"Counter({0: 47, 1: 33, 2: 22})"
1,0.21,0.69,"[0, 1, 225, 242, 283, 284, 333, 348, 402, 574,...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.08,0.36,94.0,"Counter({0: 53, 1: 26, 2: 23})"
2,0.16,0.34,"[23, 44, 169, 171, 201, 279, 284, 293, 333, 34...",0.47,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, ...",0.67,0.22,103.0,"Counter({0: 42, 1: 34, 2: 26})"


adding 17 random population


 39%|███▊      | 12/31 [03:28<05:55, 18.70s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.5,"[5, 87, 131, 592, 695, 1442, 1447, 1835, 2120,...",0.62,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, ...",0.37,0.36,19.0,"Counter({0: 47, 1: 33, 2: 22})"
1,0.21,0.69,"[0, 1, 225, 242, 283, 284, 333, 348, 402, 574,...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.08,0.36,94.0,"Counter({0: 53, 1: 26, 2: 23})"
2,0.16,0.34,"[23, 44, 169, 171, 201, 279, 284, 293, 333, 34...",0.47,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, ...",0.67,0.22,103.0,"Counter({0: 42, 1: 34, 2: 26})"
3,0.18,0.48,"[0, 38, 47, 60, 171, 183, 201, 225, 242, 268, ...",0.59,"[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, ...",1.07,0.24,186.0,"Counter({0: 40, 1: 36, 2: 26})"


adding 8 random population


 48%|████▊     | 15/31 [04:41<04:50, 18.14s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.5,"[5, 87, 131, 592, 695, 1442, 1447, 1835, 2120,...",0.62,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, ...",0.37,0.36,19.0,"Counter({0: 47, 1: 33, 2: 22})"
1,0.21,0.69,"[0, 1, 225, 242, 283, 284, 333, 348, 402, 574,...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.08,0.36,94.0,"Counter({0: 53, 1: 26, 2: 23})"
2,0.16,0.34,"[23, 44, 169, 171, 201, 279, 284, 293, 333, 34...",0.47,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, ...",0.67,0.22,103.0,"Counter({0: 42, 1: 34, 2: 26})"
3,0.18,0.48,"[0, 38, 47, 60, 171, 183, 201, 225, 242, 268, ...",0.59,"[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, ...",1.07,0.24,186.0,"Counter({0: 40, 1: 36, 2: 26})"
4,0.18,0.62,"[4, 15, 353, 384, 514, 697, 713, 791, 839, 907...",0.74,"[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, ...",0.59,0.34,51.0,"Counter({0: 40, 1: 39, 2: 23})"


adding 19 random population


 58%|█████▊    | 18/31 [05:26<03:01, 13.93s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.5,"[5, 87, 131, 592, 695, 1442, 1447, 1835, 2120,...",0.62,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, ...",0.37,0.36,19.0,"Counter({0: 47, 1: 33, 2: 22})"
1,0.21,0.69,"[0, 1, 225, 242, 283, 284, 333, 348, 402, 574,...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.08,0.36,94.0,"Counter({0: 53, 1: 26, 2: 23})"
2,0.16,0.34,"[23, 44, 169, 171, 201, 279, 284, 293, 333, 34...",0.47,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, ...",0.67,0.22,103.0,"Counter({0: 42, 1: 34, 2: 26})"
3,0.18,0.48,"[0, 38, 47, 60, 171, 183, 201, 225, 242, 268, ...",0.59,"[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, ...",1.07,0.24,186.0,"Counter({0: 40, 1: 36, 2: 26})"
4,0.18,0.62,"[4, 15, 353, 384, 514, 697, 713, 791, 839, 907...",0.74,"[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, ...",0.59,0.34,51.0,"Counter({0: 40, 1: 39, 2: 23})"
5,0.17,0.45,"[0, 107, 186, 201, 225, 229, 230, 242, 279, 28...",0.57,"[1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, ...",0.95,0.21,159.0,"Counter({0: 42, 1: 34, 2: 26})"


adding 10 random population


 68%|██████▊   | 21/31 [06:37<02:41, 16.11s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.5,"[5, 87, 131, 592, 695, 1442, 1447, 1835, 2120,...",0.62,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, ...",0.37,0.36,19.0,"Counter({0: 47, 1: 33, 2: 22})"
1,0.21,0.69,"[0, 1, 225, 242, 283, 284, 333, 348, 402, 574,...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.08,0.36,94.0,"Counter({0: 53, 1: 26, 2: 23})"
2,0.16,0.34,"[23, 44, 169, 171, 201, 279, 284, 293, 333, 34...",0.47,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, ...",0.67,0.22,103.0,"Counter({0: 42, 1: 34, 2: 26})"
3,0.18,0.48,"[0, 38, 47, 60, 171, 183, 201, 225, 242, 268, ...",0.59,"[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, ...",1.07,0.24,186.0,"Counter({0: 40, 1: 36, 2: 26})"
4,0.18,0.62,"[4, 15, 353, 384, 514, 697, 713, 791, 839, 907...",0.74,"[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, ...",0.59,0.34,51.0,"Counter({0: 40, 1: 39, 2: 23})"
5,0.17,0.45,"[0, 107, 186, 201, 225, 229, 230, 242, 279, 28...",0.57,"[1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, ...",0.95,0.21,159.0,"Counter({0: 42, 1: 34, 2: 26})"
6,0.16,0.59,"[5, 60, 170, 171, 209, 279, 348, 353, 384, 605...",0.65,"[1, 1, 0, 0, 0, 0, 2, 0, 1, 0, 1, 0, 1, 2, 2, ...",0.62,0.25,90.0,"Counter({0: 39, 1: 32, 2: 31})"


adding 14 random population


 77%|███████▋  | 24/31 [07:52<02:03, 17.68s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.5,"[5, 87, 131, 592, 695, 1442, 1447, 1835, 2120,...",0.62,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, ...",0.37,0.36,19.0,"Counter({0: 47, 1: 33, 2: 22})"
1,0.21,0.69,"[0, 1, 225, 242, 283, 284, 333, 348, 402, 574,...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.08,0.36,94.0,"Counter({0: 53, 1: 26, 2: 23})"
2,0.16,0.34,"[23, 44, 169, 171, 201, 279, 284, 293, 333, 34...",0.47,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, ...",0.67,0.22,103.0,"Counter({0: 42, 1: 34, 2: 26})"
3,0.18,0.48,"[0, 38, 47, 60, 171, 183, 201, 225, 242, 268, ...",0.59,"[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, ...",1.07,0.24,186.0,"Counter({0: 40, 1: 36, 2: 26})"
4,0.18,0.62,"[4, 15, 353, 384, 514, 697, 713, 791, 839, 907...",0.74,"[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, ...",0.59,0.34,51.0,"Counter({0: 40, 1: 39, 2: 23})"
5,0.17,0.45,"[0, 107, 186, 201, 225, 229, 230, 242, 279, 28...",0.57,"[1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, ...",0.95,0.21,159.0,"Counter({0: 42, 1: 34, 2: 26})"
6,0.16,0.59,"[5, 60, 170, 171, 209, 279, 348, 353, 384, 605...",0.65,"[1, 1, 0, 0, 0, 0, 2, 0, 1, 0, 1, 0, 1, 2, 2, ...",0.62,0.25,90.0,"Counter({0: 39, 1: 32, 2: 31})"
7,0.16,-0.0,"[24, 4180]",0.02,"[1, 2, 1, 1, 1, 1, 2, 0, 1, 0, 0, 1, 0, 1, 0, ...",0.24,0.64,2.0,"Counter({0: 51, 1: 40, 2: 11})"


adding 2 random population


 87%|████████▋ | 27/31 [08:16<00:42, 10.57s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.5,"[5, 87, 131, 592, 695, 1442, 1447, 1835, 2120,...",0.62,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, ...",0.37,0.36,19.0,"Counter({0: 47, 1: 33, 2: 22})"
1,0.21,0.69,"[0, 1, 225, 242, 283, 284, 333, 348, 402, 574,...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.08,0.36,94.0,"Counter({0: 53, 1: 26, 2: 23})"
2,0.16,0.34,"[23, 44, 169, 171, 201, 279, 284, 293, 333, 34...",0.47,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, ...",0.67,0.22,103.0,"Counter({0: 42, 1: 34, 2: 26})"
3,0.18,0.48,"[0, 38, 47, 60, 171, 183, 201, 225, 242, 268, ...",0.59,"[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, ...",1.07,0.24,186.0,"Counter({0: 40, 1: 36, 2: 26})"
4,0.18,0.62,"[4, 15, 353, 384, 514, 697, 713, 791, 839, 907...",0.74,"[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, ...",0.59,0.34,51.0,"Counter({0: 40, 1: 39, 2: 23})"
5,0.17,0.45,"[0, 107, 186, 201, 225, 229, 230, 242, 279, 28...",0.57,"[1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, ...",0.95,0.21,159.0,"Counter({0: 42, 1: 34, 2: 26})"
6,0.16,0.59,"[5, 60, 170, 171, 209, 279, 348, 353, 384, 605...",0.65,"[1, 1, 0, 0, 0, 0, 2, 0, 1, 0, 1, 0, 1, 2, 2, ...",0.62,0.25,90.0,"Counter({0: 39, 1: 32, 2: 31})"
7,0.16,-0.0,"[24, 4180]",0.02,"[1, 2, 1, 1, 1, 1, 2, 0, 1, 0, 0, 1, 0, 1, 0, ...",0.24,0.64,2.0,"Counter({0: 51, 1: 40, 2: 11})"
8,0.18,0.47,"[0, 38, 230, 283, 333, 574, 765, 866, 983, 988...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.71,0.24,54.0,"Counter({0: 48, 1: 28, 2: 26})"


adding 17 random population


 97%|█████████▋| 30/31 [09:02<00:11, 11.44s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.16,0.5,"[5, 87, 131, 592, 695, 1442, 1447, 1835, 2120,...",0.62,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, ...",0.37,0.36,19.0,"Counter({0: 47, 1: 33, 2: 22})"
1,0.21,0.69,"[0, 1, 225, 242, 283, 284, 333, 348, 402, 574,...",0.85,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.08,0.36,94.0,"Counter({0: 53, 1: 26, 2: 23})"
2,0.16,0.34,"[23, 44, 169, 171, 201, 279, 284, 293, 333, 34...",0.47,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, ...",0.67,0.22,103.0,"Counter({0: 42, 1: 34, 2: 26})"
3,0.18,0.48,"[0, 38, 47, 60, 171, 183, 201, 225, 242, 268, ...",0.59,"[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, ...",1.07,0.24,186.0,"Counter({0: 40, 1: 36, 2: 26})"
4,0.18,0.62,"[4, 15, 353, 384, 514, 697, 713, 791, 839, 907...",0.74,"[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, ...",0.59,0.34,51.0,"Counter({0: 40, 1: 39, 2: 23})"
5,0.17,0.45,"[0, 107, 186, 201, 225, 229, 230, 242, 279, 28...",0.57,"[1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, ...",0.95,0.21,159.0,"Counter({0: 42, 1: 34, 2: 26})"
6,0.16,0.59,"[5, 60, 170, 171, 209, 279, 348, 353, 384, 605...",0.65,"[1, 1, 0, 0, 0, 0, 2, 0, 1, 0, 1, 0, 1, 2, 2, ...",0.62,0.25,90.0,"Counter({0: 39, 1: 32, 2: 31})"
7,0.16,-0.0,"[24, 4180]",0.02,"[1, 2, 1, 1, 1, 1, 2, 0, 1, 0, 0, 1, 0, 1, 0, ...",0.24,0.64,2.0,"Counter({0: 51, 1: 40, 2: 11})"
8,0.18,0.47,"[0, 38, 230, 283, 333, 574, 765, 866, 983, 988...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.71,0.24,54.0,"Counter({0: 48, 1: 28, 2: 26})"
9,0.18,0.45,"[0, 74, 135, 169, 183, 199, 230, 242, 282, 331...",0.55,"[1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, ...",0.95,0.24,156.0,"Counter({0: 44, 1: 32, 2: 26})"


adding 16 random population


100%|██████████| 31/31 [09:58<00:00, 19.30s/it]


TIME: GA: 9.972534092267354 min
TIME: Total: 10.044451292355856 min

##########  yeoh, (248, 12625)
*** Computing 1D feature ranking ...
Dispersion tests took 0.85 sec
Entropy computation 3.17 sec
KNN computation 24.23 sec
Sorting and thresholds 0.04 sec
Performing hierarchical clustering...
Hierarchical clustering 14.22 sec


  0%|          | 0/31 [00:00<?, ?it/s]

Handle redundant features 0.87 sec
Returning 1357 redundant features and  330 important features
TIME: 1d Features : 0.7235448996225993 min
TIME: 2d scores: 3.973642985026042e-09 min
*** Optimization algorithm 
Non redundant features 11268, orig size 12625, nb imp : 330
adding 50 random population


 10%|▉         | 3/31 [01:02<09:39, 20.71s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.15,-0.0,"[1436, 3733, 4371, 4460, 4675, 5771, 7539, 868...",0.01,"[0, 0, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, ...",0.13,0.18,13.0,"Counter({0: 87, 1: 86, 2: 75})"


adding 1 random population


 19%|█▉        | 6/31 [03:24<14:16, 34.26s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.15,-0.0,"[1436, 3733, 4371, 4460, 4675, 5771, 7539, 868...",0.01,"[0, 0, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, ...",0.13,0.18,13.0,"Counter({0: 87, 1: 86, 2: 75})"
1,0.17,0.54,"[0, 2, 18, 26, 28, 46, 55, 71, 564, 1009, 1046...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.3,0.29,27.0,"Counter({0: 87, 1: 83, 2: 78})"


adding 25 random population


 29%|██▉       | 9/31 [06:46<17:11, 46.90s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.15,-0.0,"[1436, 3733, 4371, 4460, 4675, 5771, 7539, 868...",0.01,"[0, 0, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, ...",0.13,0.18,13.0,"Counter({0: 87, 1: 86, 2: 75})"
1,0.17,0.54,"[0, 2, 18, 26, 28, 46, 55, 71, 564, 1009, 1046...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.3,0.29,27.0,"Counter({0: 87, 1: 83, 2: 78})"
2,0.13,0.22,"[3, 4, 15, 19, 23, 120, 158, 306, 465, 519, 53...",0.37,"[1, 1, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, ...",0.34,0.16,44.0,"Counter({0: 83, 1: 62, 2: 60, 3: 43})"


adding 11 random population


 39%|███▊      | 12/31 [09:48<15:10, 47.94s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.15,-0.0,"[1436, 3733, 4371, 4460, 4675, 5771, 7539, 868...",0.01,"[0, 0, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, ...",0.13,0.18,13.0,"Counter({0: 87, 1: 86, 2: 75})"
1,0.17,0.54,"[0, 2, 18, 26, 28, 46, 55, 71, 564, 1009, 1046...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.3,0.29,27.0,"Counter({0: 87, 1: 83, 2: 78})"
2,0.13,0.22,"[3, 4, 15, 19, 23, 120, 158, 306, 465, 519, 53...",0.37,"[1, 1, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, ...",0.34,0.16,44.0,"Counter({0: 83, 1: 62, 2: 60, 3: 43})"
3,0.12,0.33,"[1, 14, 197, 358, 752, 1366, 1694, 1755, 1871,...",0.48,"[2, 1, 0, 2, 2, 2, 2, 2, 2, 1, 0, 0, 2, 2, 2, ...",0.24,0.25,31.0,"Counter({0: 92, 1: 66, 2: 63, 3: 27})"


adding 38 random population


 48%|████▊     | 15/31 [12:20<11:39, 43.73s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.15,-0.0,"[1436, 3733, 4371, 4460, 4675, 5771, 7539, 868...",0.01,"[0, 0, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, ...",0.13,0.18,13.0,"Counter({0: 87, 1: 86, 2: 75})"
1,0.17,0.54,"[0, 2, 18, 26, 28, 46, 55, 71, 564, 1009, 1046...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.3,0.29,27.0,"Counter({0: 87, 1: 83, 2: 78})"
2,0.13,0.22,"[3, 4, 15, 19, 23, 120, 158, 306, 465, 519, 53...",0.37,"[1, 1, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, ...",0.34,0.16,44.0,"Counter({0: 83, 1: 62, 2: 60, 3: 43})"
3,0.12,0.33,"[1, 14, 197, 358, 752, 1366, 1694, 1755, 1871,...",0.48,"[2, 1, 0, 2, 2, 2, 2, 2, 2, 1, 0, 0, 2, 2, 2, ...",0.24,0.25,31.0,"Counter({0: 92, 1: 66, 2: 63, 3: 27})"
4,0.12,-0.0,"[1198, 3272, 4463, 4491, 5716, 6420, 7566, 822...",0.01,"[0, 2, 0, 1, 1, 1, 1, 0, 0, 2, 0, 0, 2, 1, 2, ...",0.1,0.17,9.0,"Counter({0: 90, 1: 81, 2: 77})"


adding 20 random population


 58%|█████▊    | 18/31 [14:47<09:03, 41.83s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.15,-0.0,"[1436, 3733, 4371, 4460, 4675, 5771, 7539, 868...",0.01,"[0, 0, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, ...",0.13,0.18,13.0,"Counter({0: 87, 1: 86, 2: 75})"
1,0.17,0.54,"[0, 2, 18, 26, 28, 46, 55, 71, 564, 1009, 1046...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.3,0.29,27.0,"Counter({0: 87, 1: 83, 2: 78})"
2,0.13,0.22,"[3, 4, 15, 19, 23, 120, 158, 306, 465, 519, 53...",0.37,"[1, 1, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, ...",0.34,0.16,44.0,"Counter({0: 83, 1: 62, 2: 60, 3: 43})"
3,0.12,0.33,"[1, 14, 197, 358, 752, 1366, 1694, 1755, 1871,...",0.48,"[2, 1, 0, 2, 2, 2, 2, 2, 2, 1, 0, 0, 2, 2, 2, ...",0.24,0.25,31.0,"Counter({0: 92, 1: 66, 2: 63, 3: 27})"
4,0.12,-0.0,"[1198, 3272, 4463, 4491, 5716, 6420, 7566, 822...",0.01,"[0, 2, 0, 1, 1, 1, 1, 0, 0, 2, 0, 0, 2, 1, 2, ...",0.1,0.17,9.0,"Counter({0: 90, 1: 81, 2: 77})"
5,0.13,0.18,"[46, 203, 311, 378, 379, 386, 430, 1392, 1906,...",0.3,"[3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 3, ...",0.31,0.18,55.0,"Counter({0: 82, 1: 78, 2: 49, 3: 39})"


adding 9 random population


 68%|██████▊   | 21/31 [17:56<07:51, 47.10s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.15,-0.0,"[1436, 3733, 4371, 4460, 4675, 5771, 7539, 868...",0.01,"[0, 0, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, ...",0.13,0.18,13.0,"Counter({0: 87, 1: 86, 2: 75})"
1,0.17,0.54,"[0, 2, 18, 26, 28, 46, 55, 71, 564, 1009, 1046...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.3,0.29,27.0,"Counter({0: 87, 1: 83, 2: 78})"
2,0.13,0.22,"[3, 4, 15, 19, 23, 120, 158, 306, 465, 519, 53...",0.37,"[1, 1, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, ...",0.34,0.16,44.0,"Counter({0: 83, 1: 62, 2: 60, 3: 43})"
3,0.12,0.33,"[1, 14, 197, 358, 752, 1366, 1694, 1755, 1871,...",0.48,"[2, 1, 0, 2, 2, 2, 2, 2, 2, 1, 0, 0, 2, 2, 2, ...",0.24,0.25,31.0,"Counter({0: 92, 1: 66, 2: 63, 3: 27})"
4,0.12,-0.0,"[1198, 3272, 4463, 4491, 5716, 6420, 7566, 822...",0.01,"[0, 2, 0, 1, 1, 1, 1, 0, 0, 2, 0, 0, 2, 1, 2, ...",0.1,0.17,9.0,"Counter({0: 90, 1: 81, 2: 77})"
5,0.13,0.18,"[46, 203, 311, 378, 379, 386, 430, 1392, 1906,...",0.3,"[3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 3, ...",0.31,0.18,55.0,"Counter({0: 82, 1: 78, 2: 49, 3: 39})"
6,0.15,0.64,"[7, 9, 26, 31, 34, 50, 51, 158, 261, 453, 818,...",0.66,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, ...",0.31,0.3,37.0,"Counter({0: 87, 1: 84, 2: 77})"


adding 39 random population


 77%|███████▋  | 24/31 [23:17<07:56, 68.04s/it] 

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.15,-0.0,"[1436, 3733, 4371, 4460, 4675, 5771, 7539, 868...",0.01,"[0, 0, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, ...",0.13,0.18,13.0,"Counter({0: 87, 1: 86, 2: 75})"
1,0.17,0.54,"[0, 2, 18, 26, 28, 46, 55, 71, 564, 1009, 1046...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.3,0.29,27.0,"Counter({0: 87, 1: 83, 2: 78})"
2,0.13,0.22,"[3, 4, 15, 19, 23, 120, 158, 306, 465, 519, 53...",0.37,"[1, 1, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, ...",0.34,0.16,44.0,"Counter({0: 83, 1: 62, 2: 60, 3: 43})"
3,0.12,0.33,"[1, 14, 197, 358, 752, 1366, 1694, 1755, 1871,...",0.48,"[2, 1, 0, 2, 2, 2, 2, 2, 2, 1, 0, 0, 2, 2, 2, ...",0.24,0.25,31.0,"Counter({0: 92, 1: 66, 2: 63, 3: 27})"
4,0.12,-0.0,"[1198, 3272, 4463, 4491, 5716, 6420, 7566, 822...",0.01,"[0, 2, 0, 1, 1, 1, 1, 0, 0, 2, 0, 0, 2, 1, 2, ...",0.1,0.17,9.0,"Counter({0: 90, 1: 81, 2: 77})"
5,0.13,0.18,"[46, 203, 311, 378, 379, 386, 430, 1392, 1906,...",0.3,"[3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 3, ...",0.31,0.18,55.0,"Counter({0: 82, 1: 78, 2: 49, 3: 39})"
6,0.15,0.64,"[7, 9, 26, 31, 34, 50, 51, 158, 261, 453, 818,...",0.66,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, ...",0.31,0.3,37.0,"Counter({0: 87, 1: 84, 2: 77})"
7,0.12,0.23,"[2914, 4432, 5242, 7266, 8093, 10562, 12088]",0.26,"[2, 0, 1, 2, 2, 2, 1, 2, 1, 0, 2, 3, 1, 2, 1, ...",0.13,0.24,7.0,"Counter({0: 80, 2: 70, 1: 70, 3: 28})"


adding 13 random population


 87%|████████▋ | 27/31 [25:38<03:16, 49.13s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.15,-0.0,"[1436, 3733, 4371, 4460, 4675, 5771, 7539, 868...",0.01,"[0, 0, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, ...",0.13,0.18,13.0,"Counter({0: 87, 1: 86, 2: 75})"
1,0.17,0.54,"[0, 2, 18, 26, 28, 46, 55, 71, 564, 1009, 1046...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.3,0.29,27.0,"Counter({0: 87, 1: 83, 2: 78})"
2,0.13,0.22,"[3, 4, 15, 19, 23, 120, 158, 306, 465, 519, 53...",0.37,"[1, 1, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, ...",0.34,0.16,44.0,"Counter({0: 83, 1: 62, 2: 60, 3: 43})"
3,0.12,0.33,"[1, 14, 197, 358, 752, 1366, 1694, 1755, 1871,...",0.48,"[2, 1, 0, 2, 2, 2, 2, 2, 2, 1, 0, 0, 2, 2, 2, ...",0.24,0.25,31.0,"Counter({0: 92, 1: 66, 2: 63, 3: 27})"
4,0.12,-0.0,"[1198, 3272, 4463, 4491, 5716, 6420, 7566, 822...",0.01,"[0, 2, 0, 1, 1, 1, 1, 0, 0, 2, 0, 0, 2, 1, 2, ...",0.1,0.17,9.0,"Counter({0: 90, 1: 81, 2: 77})"
5,0.13,0.18,"[46, 203, 311, 378, 379, 386, 430, 1392, 1906,...",0.3,"[3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 3, ...",0.31,0.18,55.0,"Counter({0: 82, 1: 78, 2: 49, 3: 39})"
6,0.15,0.64,"[7, 9, 26, 31, 34, 50, 51, 158, 261, 453, 818,...",0.66,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, ...",0.31,0.3,37.0,"Counter({0: 87, 1: 84, 2: 77})"
7,0.12,0.23,"[2914, 4432, 5242, 7266, 8093, 10562, 12088]",0.26,"[2, 0, 1, 2, 2, 2, 1, 2, 1, 0, 2, 3, 1, 2, 1, ...",0.13,0.24,7.0,"Counter({0: 80, 2: 70, 1: 70, 3: 28})"
8,0.12,0.0,"[113, 132, 358, 432, 588, 597, 598, 762, 904, ...",0.02,"[0, 2, 1, 0, 0, 0, 0, 1, 1, 2, 0, 1, 0, 0, 0, ...",0.23,0.12,63.0,"Counter({0: 91, 1: 87, 2: 70})"


adding 13 random population


 97%|█████████▋| 30/31 [29:33<00:56, 56.54s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,nmi,partition,point_biserial,silhouette,size,structure
0,0.15,-0.0,"[1436, 3733, 4371, 4460, 4675, 5771, 7539, 868...",0.01,"[0, 0, 0, 2, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, ...",0.13,0.18,13.0,"Counter({0: 87, 1: 86, 2: 75})"
1,0.17,0.54,"[0, 2, 18, 26, 28, 46, 55, 71, 564, 1009, 1046...",0.6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.3,0.29,27.0,"Counter({0: 87, 1: 83, 2: 78})"
2,0.13,0.22,"[3, 4, 15, 19, 23, 120, 158, 306, 465, 519, 53...",0.37,"[1, 1, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, ...",0.34,0.16,44.0,"Counter({0: 83, 1: 62, 2: 60, 3: 43})"
3,0.12,0.33,"[1, 14, 197, 358, 752, 1366, 1694, 1755, 1871,...",0.48,"[2, 1, 0, 2, 2, 2, 2, 2, 2, 1, 0, 0, 2, 2, 2, ...",0.24,0.25,31.0,"Counter({0: 92, 1: 66, 2: 63, 3: 27})"
4,0.12,-0.0,"[1198, 3272, 4463, 4491, 5716, 6420, 7566, 822...",0.01,"[0, 2, 0, 1, 1, 1, 1, 0, 0, 2, 0, 0, 2, 1, 2, ...",0.1,0.17,9.0,"Counter({0: 90, 1: 81, 2: 77})"
5,0.13,0.18,"[46, 203, 311, 378, 379, 386, 430, 1392, 1906,...",0.3,"[3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 3, ...",0.31,0.18,55.0,"Counter({0: 82, 1: 78, 2: 49, 3: 39})"
6,0.15,0.64,"[7, 9, 26, 31, 34, 50, 51, 158, 261, 453, 818,...",0.66,"[1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, ...",0.31,0.3,37.0,"Counter({0: 87, 1: 84, 2: 77})"
7,0.12,0.23,"[2914, 4432, 5242, 7266, 8093, 10562, 12088]",0.26,"[2, 0, 1, 2, 2, 2, 1, 2, 1, 0, 2, 3, 1, 2, 1, ...",0.13,0.24,7.0,"Counter({0: 80, 2: 70, 1: 70, 3: 28})"
8,0.12,0.0,"[113, 132, 358, 432, 588, 597, 598, 762, 904, ...",0.02,"[0, 2, 1, 0, 0, 0, 0, 1, 1, 2, 0, 1, 0, 0, 0, ...",0.23,0.12,63.0,"Counter({0: 91, 1: 87, 2: 70})"
9,0.16,0.1,"[15, 511, 534, 645, 770, 786, 817, 827, 845, 8...",0.11,"[0, 1, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2, 2, ...",0.49,0.19,152.0,"Counter({0: 94, 1: 90, 2: 64})"


adding 3 random population


100%|██████████| 31/31 [33:38<00:00, 65.11s/it] 

TIME: GA: 33.64013553063075 min
TIME: Total: 34.36368084351222 min





In [17]:
results.groupby("dataset_name").agg({"ari": max})

Unnamed: 0_level_0,ari
dataset_name,Unnamed: 1_level_1
alon,0.26
borovecki,0.75
chiaretti,0.28
christensen,0.88
golub,0.29
gordon,0.22
khan,0.81
sorlie,0.24
su,0.69
yeoh,0.64


# Other methods

In [6]:
from sklearn import mixture
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import SpectralClustering
from sklearn.cluster import KMeans
import hdbscan

## Silhouette analysis

In [23]:
results = pd.DataFrame()
filenames = np.array([
    'alon', 'borovecki', 'chiaretti', 'christensen', 'golub', 'gordon',
       'khan', 'sorlie', 'su', 'yeoh', 'west'
])
clustering = "hdbscan"
path = '../data/microarray/'

for name in filenames:
    print(name)
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values
    if data.shape[1] > 8000:
        data = data[:, :8000]
    n_clusters = len(np.unique(truth))
    row = {"dataset": name}
    clustering = AffinityPropagation(random_state=5).fit(data)
    ari = silhouette_score(data, clustering.labels_)
    print(f"Affinity {ari}")
    row["AffinityPropagation"] = ari

    clustering = SpectralClustering(n_clusters=n_clusters, assign_labels='discretize',random_state=5).fit(data)
    ari = silhouette_score(data, clustering.labels_)
    print(f"Spectral {ari}")
    row["Spectral"] = ari

    clustering = KMeans(n_clusters=n_clusters,random_state=5).fit(data)
    ari = silhouette_score(data, clustering.labels_)
    print(f"KMeans {ari}")
    row["KMeans"] = ari

    gmm = mixture.GaussianMixture(n_components=n_clusters,
                  covariance_type="full", random_state=0)
    pred = gmm.fit_predict(data)
    ari = silhouette_score(data, pred)
    print(f"GMM {ari}")
    row["GMM"] = ari

    pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(data).labels_
    ari = silhouette_score(data, pred)
    print(f"HDBSCAN {ari}")
    row["HDBSCAN"] = ari

    pca = PCA(2)
    pca_data = pca.fit_transform(data)

    clustering = KMeans(n_clusters=n_clusters,random_state=5).fit(pca_data)
    ari = silhouette_score(pca_data, clustering.labels_)
    print(f"PCA KMeans {ari}")
    row["PCA_KMeans"] = ari

    gmm = mixture.GaussianMixture(n_components=n_clusters,
                  covariance_type="full", random_state=0)
    pred = gmm.fit_predict(pca_data)
    ari = silhouette_score(pca_data, pred)
    print(f"PCA GMM {ari}")
    row["PCA_GMM"] = ari

    pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(pca_data).labels_
    ari = silhouette_score(pca_data, pred)
    print(f"PCAHDBSCAN {ari}")
    row["PCA_HDBSCAN"] = ari

    results = results.append(row, ignore_index = True)
    results.to_pickle("../data/microarray_others_silhouette.pkl")
# results.groupby("dataset_name").agg({"ari": max})

alon
Affinity 0.062115676466571706
Spectral 0.18971104380145723
KMeans 0.33751924055238436
GMM 0.39038081738376745
HDBSCAN 0.327951038200559
PCA KMeans 0.5386360867267125
PCA GMM 0.5948486954752348
PCAHDBSCAN 0.5404045400966923
borovecki
Affinity 0.05732736809638543
Spectral 0.00020361049546127082




KMeans 0.1834516193513604
GMM 0.16428416398565662
HDBSCAN -0.06528039250291505
PCA KMeans 0.5278758557385943
PCA GMM 0.4183195014864457
PCAHDBSCAN 0.2938380365229793
chiaretti
Affinity 0.05829586917319028
Spectral -0.06849106206603138
KMeans 0.08400771636709349
GMM 0.05976823400715967
HDBSCAN -0.0761508051875368
PCA KMeans 0.3891652629573138
PCA GMM 0.3702614925457201
PCAHDBSCAN 0.09342965822113936
christensen
Affinity 0.14378879241920922
Spectral 0.2505581978871958
KMeans 0.28698602009245067
GMM 0.28698602009245067
HDBSCAN 0.24138629251455723
PCA KMeans 0.635316384955393
PCA GMM 0.6323162377395329
PCAHDBSCAN 0.4499280716469588
golub
Affinity 0.046579455330318376




Spectral 0.0246193242871702
KMeans 0.10296167977036548
GMM 0.10895234009293128
HDBSCAN 0.08005591489837682
PCA KMeans 0.41240509079013205
PCA GMM 0.3530838176490845
PCAHDBSCAN 0.2739315849058706
gordon
Affinity 0.016147481335841395




Spectral 0.001220701781178454
KMeans 0.3276695862574315
GMM 0.08769326199312157
HDBSCAN -0.06911562830740205
PCA KMeans 0.6610834192367447
PCA GMM 0.5437945033279933
PCAHDBSCAN 0.15441864020098203
khan
Affinity 0.11536604899800153
Spectral 0.0373203269924958
KMeans 0.11498315455746094
GMM 0.11283131411052932
HDBSCAN 0.0872752723621908
PCA KMeans 0.5941763302797652
PCA GMM 0.5950128121511599
PCAHDBSCAN 0.42798402218891385
sorlie
Affinity 0.05754429031630421
Spectral 0.06661410352474696
KMeans 0.07195986313084812
GMM 0.054763539099721176
HDBSCAN 0.014795226629159946
PCA KMeans 0.4313279230991947
PCA GMM 0.358767713621992
PCAHDBSCAN 0.236830287626471
su
Affinity 0.11763943313320828
Spectral -0.00257658517967443
KMeans 0.14317786635918256
GMM 0.13088794750380037
HDBSCAN 0.07990960846641203
PCA KMeans 0.4989525314345633
PCA GMM 0.4423240646495514
PCAHDBSCAN 0.33114332565224175
yeoh
Affinity 0.033839258201076174
Spectral -0.07516042328023195
KMeans 0.03612338401004898
GMM 0.02984745557222693

# ARI analysis

In [8]:
results = pd.DataFrame()
filenames = np.array([
    'west','alon', 'borovecki', 'chiaretti', 'christensen', 'golub', 'gordon',
       'khan', 'sorlie', 'su', 'yeoh', ])
clustering = "hdbscan"
path = '../data/microarray/'
method = "adapted_ratkowsky_lance"

for name in filenames:
    print(name)
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values
    if data.shape[1] > 8000:
        data = data[:, :8000]
    n_clusters = len(np.unique(truth))
    row = {"dataset": name}
    clustering = AffinityPropagation(random_state=5).fit(data)
    ari = adjusted_rand_score(truth, clustering.labels_)
    print(f"Affinity {ari}")
    row["AffinityPropagation"] = ari

    clustering = SpectralClustering(n_clusters=n_clusters, assign_labels='discretize',random_state=5).fit(data)
    ari = adjusted_rand_score(truth, clustering.labels_)
    print(f"Spectral {ari}")
    row["Spectral"] = ari

    clustering = KMeans(n_clusters=n_clusters,random_state=5).fit(data)
    ari = adjusted_rand_score(truth, clustering.labels_)
    print(f"KMeans {ari}")
    row["KMeans"] = ari

    gmm = mixture.GaussianMixture(n_components=n_clusters,
                  covariance_type="full", random_state=0)
    pred = gmm.fit_predict(data)
    ari = adjusted_rand_score(truth, pred)
    print(f"GMM {ari}")
    row["GMM"] = ari

    pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(data).labels_
    ari = adjusted_rand_score(truth, pred)
    print(f"HDBSCAN {ari}")
    row["HDBSCAN"] = ari

    pca = PCA(2)
    pca_data = pca.fit_transform(data)

    clustering = KMeans(n_clusters=n_clusters,random_state=5).fit(pca_data)
    ari = adjusted_rand_score(truth, clustering.labels_)
    print(f"PCA KMeans {ari}")
    row["PCA_KMeans"] = ari

    gmm = mixture.GaussianMixture(n_components=n_clusters,
                  covariance_type="full", random_state=0)
    pred = gmm.fit_predict(pca_data)
    ari = adjusted_rand_score(truth, pred)
    print(f"PCA GMM {ari}")
    row["PCA_GMM"] = ari

    pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(pca_data).labels_
    ari = adjusted_rand_score(truth, pred)
    print(f"PCAHDBSCAN {ari}")
    row["PCA_HDBSCAN"] = ari

    results = results.append(row, ignore_index = True)
    results.to_pickle("../data/microarray_others.pkl")
# results.groupby("dataset_name").agg({"ari": max})

west
Affinity 0.010971235670581616
Spectral -0.015456057665650124
KMeans -0.0041809458533241944




GMM 0.014631440260730879
HDBSCAN -0.0013206366858969903
PCA KMeans 0.0039014373716632442
PCA GMM -0.008853201564751905
PCAHDBSCAN 0.028903153390157494
alon
Affinity 0.06114066313750697
Spectral -0.00965753681291918
KMeans -0.01832753338552532
GMM -0.041705878302384834
HDBSCAN -0.07204023223578718
PCA KMeans -0.01832753338552532
PCA GMM -0.04814177497890322
PCAHDBSCAN -0.06776579509080834
borovecki
Affinity 0.3498443354896441
Spectral -0.03178565485105675
KMeans 0.15717685829366532




GMM 0.21518987341772153
HDBSCAN 0.15651723244363824
PCA KMeans 0.10807255987285581
PCA GMM 0.27686753679533466
PCAHDBSCAN 0.18848167539267016
chiaretti
Affinity 0.002067838978669255
Spectral -0.014478552554577664
KMeans -0.01183474044815515
GMM 0.011690399378572447
HDBSCAN -0.021070423525464545
PCA KMeans -0.013216039768117636
PCA GMM -0.019860864107804122
PCAHDBSCAN -0.01985584386721365
christensen
Affinity 0.28079936738945627
Spectral 0.20488269028642028
KMeans 0.9828429301705416
GMM 0.9828429301705416
HDBSCAN 0.44599263739995926
PCA KMeans 0.9492317112192555
PCA GMM 0.966345822674811
PCAHDBSCAN 0.48591682416507553
golub
Affinity 0.09794745851353995




Spectral 0.02437970235508023
KMeans 0.16207751501386602
GMM 0.2111546312131075
HDBSCAN 0.052535013845996056
PCA KMeans 0.2111546312131075
PCA GMM -0.05815801223119611
PCAHDBSCAN 0.03254154258020062
gordon
Affinity 0.06443032737658316




Spectral 0.0034126343792930504
KMeans 0.4094357339241926
GMM 0.019886908184780526
HDBSCAN -0.05999462161864832
PCA KMeans 0.3913274977531081
PCA GMM 0.8122761573399956
PCAHDBSCAN 0.03551395193364228
khan
Affinity 0.2192505201591399
Spectral 0.23527599677099087
KMeans 0.06595712786688653
GMM 0.04565391299550357
HDBSCAN 0.19650287185934123
PCA KMeans 0.05299278573331886
PCA GMM 0.06584227303680164
PCAHDBSCAN 0.10001565054081313
sorlie
Affinity 0.3588011382534462
Spectral 0.20883997372679067
KMeans 0.4472746039398899
GMM 0.3752708372881578
HDBSCAN -0.0013327267128984968
PCA KMeans 0.43496208761528765
PCA GMM 0.5961412258826617
PCAHDBSCAN 0.2090726581521507
su
Affinity 0.5717241331174441
Spectral 0.08790084186243431
KMeans 0.8965943968314738
GMM 0.6391711442977579
HDBSCAN 0.7117296249855959
PCA KMeans 0.4162409917380749
PCA GMM 0.3320029787900224
PCAHDBSCAN 0.21515948349438987
yeoh
Affinity 0.18333811544096054
Spectral 0.03656635514018691
KMeans 0.0990670103519137
GMM 0.02909912125417917
H

In [5]:
results.round(2)

Unnamed: 0,AffinityPropagation,GMM,HDBSCAN,KMeans,PCA_GMM,PCA_HDBSCAN,PCA_KMeans,Spectral,dataset
0,0.06,-0.04,-0.07,-0.02,-0.05,-0.07,-0.02,-0.01,alon
1,0.35,0.22,0.16,0.16,0.28,0.19,0.11,-0.03,borovecki
2,0.0,0.01,-0.02,-0.01,-0.02,-0.02,-0.01,-0.01,chiaretti
3,0.28,0.98,0.45,0.98,0.97,0.49,0.95,0.2,christensen
4,0.1,0.21,0.05,0.16,-0.06,0.03,0.21,0.02,golub
5,0.06,0.02,-0.06,0.41,0.81,0.03,0.39,0.0,gordon
6,0.22,0.05,0.2,0.07,0.07,0.1,0.05,0.24,khan
7,0.36,0.38,-0.0,0.45,0.6,0.21,0.43,0.21,sorlie
8,0.57,0.64,0.71,0.9,0.33,0.22,0.42,0.09,su
9,0.18,0.03,-0.0,0.1,0.02,-0.0,0.02,0.04,yeoh


# Supervised analysis of datasets

In [7]:
import scripts.ga_evaluation as ga_evaluation
filenames = np.array(['alon', 'borovecki', 'chiaretti', 'christensen', 'golub', 'gordon',
       'khan', 'sorlie', 'su', 'yeoh'])
path = '../data/microarray/'
imp_f = np.arange(20)
result_df = pd.DataFrame()
for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values

    n_clusters = len(np.unique(truth))

    result = {"Dataset" : name,
             "Original Dimensions": f"{data.shape[0]} x {data.shape[1]}",
             "Cluster sizes" : ", ".join(np.array(list(Counter(truth).values())).astype(str))}
    
    predK = KMeans(n_clusters= n_clusters, random_state = 2).fit(data).labels_
    ari_all = adjusted_rand_score(truth, predK)
    
    predK = KMeans(n_clusters= n_clusters, random_state = 2).fit(data[:, :5]).labels_
    ari_top10 = adjusted_rand_score(truth, predK)
    
    pca = PCA(2)
    pca_data = pca.fit_transform(data)

    predK = KMeans(n_clusters= n_clusters, random_state = 2).fit(pca_data).labels_
    ari_pca = adjusted_rand_score(truth, predK)
    
    r1 = ga_evaluation.random_sampling(data, truth, n_clusters, algo = "gmm")
    r2 = ga_evaluation.random_sampling(data, truth, n_clusters, algo = "hdbscan")
    result["ARI all dataset"] = round(ari_all,2)
    result["ARI PCA dataset"] = round(ari_pca,2)
    result["ARI top 10 features"] = round(ari_top10,2)
    result["Random GMM"] = round(r1,2)
    result["Random HDBSCAN"] = round(r2,2)
    result_df = result_df.append(result, ignore_index = True)

result_df

100%|██████████| 100/100 [00:01<00:00, 69.46it/s]
100%|██████████| 100/100 [00:00<00:00, 194.23it/s]
100%|██████████| 100/100 [00:01<00:00, 58.98it/s]
100%|██████████| 100/100 [00:00<00:00, 203.61it/s]
100%|██████████| 100/100 [00:06<00:00, 14.40it/s]
100%|██████████| 100/100 [00:00<00:00, 155.62it/s]
100%|██████████| 100/100 [00:03<00:00, 28.13it/s]
100%|██████████| 100/100 [00:01<00:00, 98.18it/s]
100%|██████████| 100/100 [00:03<00:00, 29.34it/s]
100%|██████████| 100/100 [00:00<00:00, 216.55it/s]
100%|██████████| 100/100 [00:03<00:00, 25.93it/s]
100%|██████████| 100/100 [00:00<00:00, 119.02it/s]
100%|██████████| 100/100 [00:03<00:00, 31.65it/s]
100%|██████████| 100/100 [00:00<00:00, 251.22it/s]
100%|██████████| 100/100 [00:04<00:00, 24.76it/s]
100%|██████████| 100/100 [00:00<00:00, 204.72it/s]
100%|██████████| 100/100 [00:04<00:00, 22.32it/s]
100%|██████████| 100/100 [00:00<00:00, 154.53it/s]
100%|██████████| 100/100 [00:11<00:00,  8.82it/s]
100%|██████████| 100/100 [00:01<00:00, 85.

Unnamed: 0,ARI PCA dataset,ARI all dataset,ARI top 10 features,Cluster sizes,Dataset,Original Dimensions,Random GMM,Random HDBSCAN
0,-0.02,-0.02,0.01,"40, 22",alon,62 x 1991,-0.03,0.0
1,0.35,0.54,0.75,"17, 14",borovecki,31 x 22283,0.1,0.0
2,-0.01,0.03,0.28,"37, 74, 10, 5, 1, 1",chiaretti,128 x 12625,0.01,0.0
3,0.95,0.98,0.61,"113, 85, 19",christensen,217 x 1413,0.55,0.16
4,0.21,0.24,0.79,"47, 25",golub,72 x 7129,0.03,0.0
5,0.39,0.61,0.92,"31, 150",gordon,181 x 12533,0.22,0.0
6,0.07,0.08,1.0,"23, 20, 12, 8",khan,63 x 2308,0.1,0.0
7,0.43,0.55,0.4,"14, 11, 13, 15, 32",sorlie,85 x 456,0.13,0.0
8,0.42,0.9,0.83,"25, 26, 28, 23",su,102 x 5563,0.24,0.0
9,0.01,0.09,0.63,"15, 27, 64, 20, 43, 79",yeoh,248 x 12625,0.04,0.0


# Run time analysis

In [None]:
results = pd.read_excel("reports/r_cnn.xlsx", index_col=0)

In [None]:
results["label"] = results["dataset_name"] + " (" +results["input_size"].astype(str) + " features) " 

In [None]:
perf = results.groupby("input_size").min()[['t(feature_sel)', 't(cnn)','t(ga)']]

In [None]:
perf = perf.rename(columns = {
    't(feature_sel)' : '1D Feature ranking', 
    't(cnn)' : '2D Feature ranking with NN',
    't(ga)': 'Optimization algorithm for top 10 subspaces'
})

In [None]:
plt.figure(figsize = (10,3))
ax = plt.gca()
perf.plot(kind='bar', stacked=True, ax = ax)
plt.ylabel("time (min)")
plt.xticks(rotation = 0)
sns.despine()
plt.title("Run times on West, Khan, Gordon and Boroveki datasets")
plt.xlabel("Number of dimensions in the input dataset")
plt.savefig(f"images/run_times.pdf", bbox_inches='tight')

# Best scores using supervised feature selection

In [8]:
import scripts.ga_evaluation as ga_evaluation
from sklearn import mixture
import hdbscan
filenames = np.array(['alon', 'borovecki', 'chiaretti', 'christensen', 'golub', 'gordon',
       'khan', 'sorlie', 'su', 'yeoh', 'west'])
path = '../data/microarray/'

In [9]:
for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values
    print(Counter(truth))
    n_clusters = len(np.unique(truth))
    gmm_scores = []
    hdbscan_scores = []
    for i in range(2, 50):
        input_data = data[:, :i]
        gmm = mixture.GaussianMixture(n_components=n_clusters,
                          covariance_type="full", random_state=0)
        pred = gmm.fit_predict(input_data)
        ari = adjusted_rand_score(truth, pred)
        gmm_scores.append(ari)

        pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(input_data).labels_
        ari = adjusted_rand_score(truth, pred)
        hdbscan_scores.append(ari)

        
    print(f"\n\n\n{name} GMM ari = {max(gmm_scores)}, ")
    print(f"{name} HDBSCAN ari = {max(hdbscan_scores)}, ")
    

Counter({1: 40, 0: 22})



alon GMM ari = 0.5408956029134071, 
alon HDBSCAN ari = 0.2614104988950006, 
Counter({1: 17, 0: 14})



borovecki GMM ari = 0.7506518000776613, 
borovecki HDBSCAN ari = 0.442523768366465, 
Counter({3: 74, 1: 37, 0: 10, 2: 5, 5: 1, 4: 1})



chiaretti GMM ari = 0.5027786102396937, 
chiaretti HDBSCAN ari = 0.38195418120210867, 
Counter({0: 113, 1: 85, 2: 19})



christensen GMM ari = 1.0, 
christensen HDBSCAN ari = 0.9662417166002536, 
Counter({0: 47, 1: 25})



golub GMM ari = 1.0, 
golub HDBSCAN ari = 0.6696342951791474, 
Counter({0: 150, 1: 31})



gordon GMM ari = 1.0, 
gordon HDBSCAN ari = 0.8919758343184441, 
Counter({1: 23, 3: 20, 2: 12, 0: 8})



khan GMM ari = 1.0, 
khan HDBSCAN ari = 0.9857833454105082, 
Counter({4: 32, 3: 15, 0: 14, 2: 13, 1: 11})



sorlie GMM ari = 0.7633359805059493, 
sorlie HDBSCAN ari = 0.29016918700404254, 
Counter({2: 28, 1: 26, 0: 25, 3: 23})



su GMM ari = 0.9752497571716532, 
su HDBSCAN ari = 0.8788950801065628, 
Counter({5

# Mutual Information

In [10]:
from sklearn.feature_selection import chi2,  mutual_info_classif, SelectKBest

In [11]:
for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values
    print(Counter(truth))
    n_clusters = len(np.unique(truth))
    gmm_scores = []
    hdbscan_scores = []
    sel = SelectKBest(mutual_info_classif, k=50).fit_transform(data, truth)
    for i in range(2, 50):
        input_data = sel[:, :i]
        gmm = mixture.GaussianMixture(n_components=n_clusters,
                          covariance_type="full", random_state=0)
        pred = gmm.fit_predict(input_data)
        ari = adjusted_rand_score(truth, pred)
        gmm_scores.append(ari)

        pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(input_data).labels_
        ari = adjusted_rand_score(truth, pred)
        hdbscan_scores.append(ari)

        
    print(f"\n\n\n{name} GMM ari = {max(gmm_scores)}, ")
    print(f"{name} HDBSCAN ari = {max(hdbscan_scores)}, ")

Counter({1: 40, 0: 22})



alon GMM ari = 0.4473743368402122, 
alon HDBSCAN ari = 0.2614104988950006, 
Counter({1: 17, 0: 14})



borovecki GMM ari = 1.0, 
borovecki HDBSCAN ari = 0.775027912169706, 
Counter({3: 74, 1: 37, 0: 10, 2: 5, 5: 1, 4: 1})



chiaretti GMM ari = 0.3627043165289317, 
chiaretti HDBSCAN ari = 0.24742717368780276, 
Counter({0: 113, 1: 85, 2: 19})



christensen GMM ari = 0.9773124968542636, 
christensen HDBSCAN ari = 0.817866868130873, 
Counter({0: 47, 1: 25})



golub GMM ari = 0.9439994667673883, 
golub HDBSCAN ari = 0.6696342951791474, 
Counter({0: 150, 1: 31})



gordon GMM ari = 1.0, 
gordon HDBSCAN ari = 0.9060143786845584, 
Counter({1: 23, 3: 20, 2: 12, 0: 8})



khan GMM ari = 1.0, 
khan HDBSCAN ari = 0.9753291540426958, 
Counter({4: 32, 3: 15, 0: 14, 2: 13, 1: 11})



sorlie GMM ari = 0.7753005995131064, 
sorlie HDBSCAN ari = 0.22462124648462306, 
Counter({2: 28, 1: 26, 0: 25, 3: 23})



su GMM ari = 1.0, 
su HDBSCAN ari = 0.3695008593957911, 
Counter({5:

In [None]:
import scripts.ga_evaluation as ga_evaluation
filenames = np.array(['alon', 'borovecki', 'chiaretti', 'christensen', 'golub', 'gordon',
       'khan', 'sorlie', 'su', 'yeoh', 'west'])
path = '../data/microarray/'

for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    input_data = data.drop("truth", axis = 1).values
    if len(input_data) > 5000:
        input_data = input_data[:, :5000]

    n_clusters = len(np.unique(truth))
    gmm = mixture.GaussianMixture(n_components=n_clusters,
                      covariance_type="full", random_state=0)
    pred = gmm.fit_predict(input_data)
    ari = adjusted_rand_score(truth, pred)
    print(f"{name} GMM ari = {ari}")

    pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(input_data).labels_
    ari = adjusted_rand_score(truth, pred)
    print(f"{name} HDBSCAN ari = {ari}")

alon GMM ari = -0.041705878302384834
alon HDBSCAN ari = -0.07204023223578718


In [None]:
import scripts.ga_evaluation as ga_evaluation
filenames = np.array(['borovecki'])
path = '../data/microarray/'

for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    input_data = data.drop("truth", axis = 1).values
    input_data = input_data[:, :10000]
    

    n_clusters = len(np.unique(truth))
    
    pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(input_data).labels_
    ari = adjusted_rand_score(truth, pred)
    print(f"{name} HDBSCAN ari = {ari}")
    
    gmm = mixture.GaussianMixture(n_components=n_clusters,
                      covariance_type="full", random_state=0)
    pred = gmm.fit_predict(input_data)
    ari = adjusted_rand_score(truth, pred)
    print(f"{name} GMM ari = {ari}")




# Execution time analysis


In [None]:
results = pd.read_excel("reports/r_cnn.xlsx", index_col=0)

results["label"] = results["dataset_name"] + " (" +results["input_size"].astype(str) + " features) " 

perf = results.groupby("input_size").min()[['t(feature_sel)', 't(cnn)','t(ga)']]

perf = perf.rename(columns = {
    't(feature_sel)' : '1D Feature ranking', 
    't(cnn)' : '2D Feature ranking with NN',
    't(ga)': 'Optimization algorithm for top 10 subspaces'
})



plt.figure(figsize = (10,3))
ax = plt.gca()
perf.plot(kind='bar', stacked=True, ax = ax)
plt.ylabel("time (min)")
plt.xticks(rotation = 0)
sns.despine()
plt.title("Run times on West, Khan, Gordon and Boroveki datasets")
plt.xlabel("Number of dimensions in the input dataset")
plt.savefig(f"images/run_times.pdf", bbox_inches='tight')