In [1]:
import random
import sys
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics.cluster import adjusted_rand_score
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scripts.data_generator as data_generator
import scripts.feature_ranking as feature_ranking
import scripts.features_2d as features_2d
import scripts.ga as ga
import scripts.preprocess as preprocess
import scripts.ga_evaluation as ga_evaluation
import scripts.bio_analysis as bio_analysis
import tensorflow as tf
from IPython import get_ipython
from keras.backend.tensorflow_backend import set_session
from tqdm import tqdm
from collections import Counter
import seaborn as sns
import time

config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default

plt.ion()
plt.show()
sys.path.append("..")

random_state=1
random.seed( random_state )
np.random.seed(random_state)


Using TensorFlow backend.


Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device
/job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5



In [2]:
%load_ext autoreload
%autoreload 2

# Steps
- Clustering 1d to select best discriminant features

- Clustering 2d to select redundant, close and outlier features

In [3]:
random_state=0
random.seed( random_state )
np.random.seed(random_state)
results = None

In [5]:
filenames = np.array(["khan","west",  "gordon",'borovecki'])
clustering = "gmm"
path = 'data/microarray/'
method = "adapted_ratkowsky_lance"
imp_f = np.arange(20)
for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values


    n_clusters = len(np.unique(truth))

    z_file= f"data/microarray/Z_{name}_correlation.npy"
    print(f"\n##########  {name}, {data.shape}")

    # Clustering 1D
    meta_features = feature_ranking.rank_features(data,
                                              nb_bins=20,
                                              rank_threshold=90,
                                              z_file=z_file,
                                              metric='correlation',
                                              redundant_threshold=0.4)
    t2 = time.time()
    print(f"TIME: 1d Features : {(t2-t1)/60} min")

    model_file = f'models/gmm_arl.h5' 

    population, n = features_2d.run(data,
                                n_clusters,
                                meta_features,
                                model_file=model_file,
                                add_close_population=False)
    t3 = time.time()
    t4 = time.time()
    print(f"TIME: 2d scores: {(t4-t3)/60} min")
    round_size = 3
    epochs = 10*round_size

    sampling = {
        "ARCHIVE2D": { 
            "ga": 0.25,
            "max": 0.25 },
        "CLOSE": { 
            "ga": 0.4,
            "max": 0.4 },
        "IMP1D": { 
            "ga": 0.25,
            "max": 0.25 },
        "RANDOM": { 
            "ga": 0.1,
            "max": 0.1},
        }
    params = ga.ga_parameters(
        n_clusters,
        data.shape[1],
        truth,
        meta_features,
        method=method,
        truth_methods=['ari'],
        archive_2d=population[:data.shape[1] // 4],
        epochs=epochs,
        sampling = sampling,
        round_size=round_size,
        allow_subspace_overlap = True,
        improvement_per_mutation_report = False,
        clustering = clustering
        
    )
    solutions, archive= ga.run(data, params)
#     display(params["report"].groupby(["op", "improvement"]).count())
    solutions["dataset_name"] = name
    
    t5 = time.time()
    n_total = t5-t1
    print(f"TIME: GA: {(t5-t4)/60} min")
    print(f"TIME: Total: {(t5-t1)/60} min")
    solutions["total_time"] = round((t5-t1)/60, 1)
    solutions["t(feature_sel)"] = round((t2-t1)/60, 1)
    solutions["t(cnn)"] = round((t3-t2)/60, 1)
    solutions["t(clustering2d)"] = round((t4-t3)/60, 1)
    solutions["t(ga)"] = round((t5-t4)/60, 1)
    
    solutions["n_ga"] = archive.shape[0]
    solutions["n_cnn"] = n_total
    solutions["input_size"] = data.shape[1]
    
    
    solutions["n_non_redundant"] = meta_features[(meta_features["redundant"] ==1) ].shape[0]
    solutions["n_imp"] = meta_features[(meta_features["relevance"] !=0)].shape[0]
    solutions["n_imp4"] = meta_features[(meta_features["relevance"] ==4)].shape[0]
    solutions["n_imp3"] = meta_features[(meta_features["relevance"] ==3)].shape[0]
    
    
    if results is None: 
        results = solutions
    else:
        results = pd.concat([results, solutions], ignore_index = True)
    results.to_excel("reports/microarray_gmm.xlsx")
    


##########  khan, (63, 2308)
*** Computing 1D feature ranking ...
Dispersion tests took 0.02 sec
Entropy computation 0.35 sec
KNN computation 0.18 sec
Sorting and thresholds 0.02 sec
Loading clustering from file
Hierarchical clustering 0.01 sec
Handle redundant features 0.63 sec
Returning 381 redundant features and  83 important features
TIME: 1d Features : 0.020270029703776043 min
*** Exploring 2D feature space with NN ...
trimming 1665 to 963
handle_close_important (963, 3), total 1678, 0.16005289554595947
relevant_features 606 => computing 3586 
trimming 3123 to 381
handle_important_features (1344, 3),  total 3586, 0.16005289554595947
irrelevant_features 1321 => computing 3953
trimming 3042 to 963
handle_not_important_features (2307, 3), total 3953, 0.16005289554595947
handle_all_features 1927 => computing 5772


  0%|          | 0/31 [00:00<?, ?it/s]

trimming 4666 to 1927
handle_all_features (4234, 3),  total 5772, 0.16005289554595947
Returning (4234, 3), explored a total of 14989 feature pairs
TIME: 2d scores: 1.1920928955078126e-08 min
*** Optimization algorithm 
Non redundant features 1927, orig size 2308, nb imp : 83
Selecting (50, 4) from archive


 10%|▉         | 3/31 [00:08<01:21,  2.93s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.161918,0.06,"[15, 35, 60, 66, 91, 112, 118, 148, 159, 258, ...","[3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, ...",95.0,"Counter({1: 27, 2: 14, 3: 13, 0: 9})"


Selecting (34, 4) from archive


 19%|█▉        | 6/31 [00:52<03:21,  8.06s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.161918,0.06,"[15, 35, 60, 66, 91, 112, 118, 148, 159, 258, ...","[3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, ...",95.0,"Counter({1: 27, 2: 14, 3: 13, 0: 9})"
1,0.155023,0.9,"[1, 3, 5, 7, 8, 17, 208, 228, 230, 277, 349, 3...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",23.0,"Counter({1: 23, 3: 19, 2: 11, 0: 10})"


Selecting (46, 4) from archive


 29%|██▉       | 9/31 [01:07<02:03,  5.60s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.161918,0.06,"[15, 35, 60, 66, 91, 112, 118, 148, 159, 258, ...","[3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, ...",95.0,"Counter({1: 27, 2: 14, 3: 13, 0: 9})"
1,0.155023,0.9,"[1, 3, 5, 7, 8, 17, 208, 228, 230, 277, 349, 3...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",23.0,"Counter({1: 23, 3: 19, 2: 11, 0: 10})"
2,0.14553,0.16,"[66, 207, 297, 315, 566, 644, 778, 892, 950, 9...","[3, 3, 3, 2, 3, 0, 3, 3, 3, 3, 2, 2, 2, 1, 2, ...",23.0,"Counter({3: 22, 2: 18, 0: 15, 1: 8})"


Selecting (13, 4) from archive


 39%|███▊      | 12/31 [01:22<01:28,  4.66s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.161918,0.06,"[15, 35, 60, 66, 91, 112, 118, 148, 159, 258, ...","[3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, ...",95.0,"Counter({1: 27, 2: 14, 3: 13, 0: 9})"
1,0.155023,0.9,"[1, 3, 5, 7, 8, 17, 208, 228, 230, 277, 349, 3...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",23.0,"Counter({1: 23, 3: 19, 2: 11, 0: 10})"
2,0.14553,0.16,"[66, 207, 297, 315, 566, 644, 778, 892, 950, 9...","[3, 3, 3, 2, 3, 0, 3, 3, 3, 3, 2, 2, 2, 1, 2, ...",23.0,"Counter({3: 22, 2: 18, 0: 15, 1: 8})"
3,0.151451,0.72,"[1, 3, 5, 7, 8, 9, 22, 99, 186, 228, 230, 277,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",29.0,"Counter({1: 23, 0: 20, 3: 12, 2: 8})"


Selecting (17, 4) from archive


 48%|████▊     | 15/31 [01:42<01:22,  5.18s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.161918,0.06,"[15, 35, 60, 66, 91, 112, 118, 148, 159, 258, ...","[3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, ...",95.0,"Counter({1: 27, 2: 14, 3: 13, 0: 9})"
1,0.155023,0.9,"[1, 3, 5, 7, 8, 17, 208, 228, 230, 277, 349, 3...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",23.0,"Counter({1: 23, 3: 19, 2: 11, 0: 10})"
2,0.14553,0.16,"[66, 207, 297, 315, 566, 644, 778, 892, 950, 9...","[3, 3, 3, 2, 3, 0, 3, 3, 3, 3, 2, 2, 2, 1, 2, ...",23.0,"Counter({3: 22, 2: 18, 0: 15, 1: 8})"
3,0.151451,0.72,"[1, 3, 5, 7, 8, 9, 22, 99, 186, 228, 230, 277,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",29.0,"Counter({1: 23, 0: 20, 3: 12, 2: 8})"
4,0.145732,0.07,"[15, 32, 35, 39, 66, 69, 97, 112, 120, 148, 15...","[0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 3, ...",155.0,"Counter({0: 21, 2: 19, 3: 16, 1: 7})"


Selecting (30, 4) from archive


 58%|█████▊    | 18/31 [03:23<03:44, 17.29s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.161918,0.06,"[15, 35, 60, 66, 91, 112, 118, 148, 159, 258, ...","[3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, ...",95.0,"Counter({1: 27, 2: 14, 3: 13, 0: 9})"
1,0.155023,0.9,"[1, 3, 5, 7, 8, 17, 208, 228, 230, 277, 349, 3...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",23.0,"Counter({1: 23, 3: 19, 2: 11, 0: 10})"
2,0.14553,0.16,"[66, 207, 297, 315, 566, 644, 778, 892, 950, 9...","[3, 3, 3, 2, 3, 0, 3, 3, 3, 3, 2, 2, 2, 1, 2, ...",23.0,"Counter({3: 22, 2: 18, 0: 15, 1: 8})"
3,0.151451,0.72,"[1, 3, 5, 7, 8, 9, 22, 99, 186, 228, 230, 277,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",29.0,"Counter({1: 23, 0: 20, 3: 12, 2: 8})"
4,0.145732,0.07,"[15, 32, 35, 39, 66, 69, 97, 112, 120, 148, 15...","[0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 3, ...",155.0,"Counter({0: 21, 2: 19, 3: 16, 1: 7})"
5,0.13852,0.51,"[3, 7, 8, 9, 81, 131, 230, 277, 607, 728, 993,...","[1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, ...",19.0,"Counter({0: 23, 1: 20, 3: 15, 2: 5})"


Selecting (6, 4) from archive


 68%|██████▊   | 21/31 [03:45<01:39,  9.92s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.161918,0.06,"[15, 35, 60, 66, 91, 112, 118, 148, 159, 258, ...","[3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, ...",95.0,"Counter({1: 27, 2: 14, 3: 13, 0: 9})"
1,0.155023,0.9,"[1, 3, 5, 7, 8, 17, 208, 228, 230, 277, 349, 3...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",23.0,"Counter({1: 23, 3: 19, 2: 11, 0: 10})"
2,0.14553,0.16,"[66, 207, 297, 315, 566, 644, 778, 892, 950, 9...","[3, 3, 3, 2, 3, 0, 3, 3, 3, 3, 2, 2, 2, 1, 2, ...",23.0,"Counter({3: 22, 2: 18, 0: 15, 1: 8})"
3,0.151451,0.72,"[1, 3, 5, 7, 8, 9, 22, 99, 186, 228, 230, 277,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",29.0,"Counter({1: 23, 0: 20, 3: 12, 2: 8})"
4,0.145732,0.07,"[15, 32, 35, 39, 66, 69, 97, 112, 120, 148, 15...","[0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 3, ...",155.0,"Counter({0: 21, 2: 19, 3: 16, 1: 7})"
5,0.13852,0.51,"[3, 7, 8, 9, 81, 131, 230, 277, 607, 728, 993,...","[1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, ...",19.0,"Counter({0: 23, 1: 20, 3: 15, 2: 5})"
6,0.13741,0.68,"[2, 3, 7, 8, 17, 20, 22, 66, 208, 214, 224, 22...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, ...",50.0,"Counter({3: 21, 2: 19, 1: 14, 0: 9})"


Selecting (28, 4) from archive


 77%|███████▋  | 24/31 [04:18<01:02,  8.94s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.161918,0.06,"[15, 35, 60, 66, 91, 112, 118, 148, 159, 258, ...","[3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, ...",95.0,"Counter({1: 27, 2: 14, 3: 13, 0: 9})"
1,0.155023,0.9,"[1, 3, 5, 7, 8, 17, 208, 228, 230, 277, 349, 3...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",23.0,"Counter({1: 23, 3: 19, 2: 11, 0: 10})"
2,0.14553,0.16,"[66, 207, 297, 315, 566, 644, 778, 892, 950, 9...","[3, 3, 3, 2, 3, 0, 3, 3, 3, 3, 2, 2, 2, 1, 2, ...",23.0,"Counter({3: 22, 2: 18, 0: 15, 1: 8})"
3,0.151451,0.72,"[1, 3, 5, 7, 8, 9, 22, 99, 186, 228, 230, 277,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",29.0,"Counter({1: 23, 0: 20, 3: 12, 2: 8})"
4,0.145732,0.07,"[15, 32, 35, 39, 66, 69, 97, 112, 120, 148, 15...","[0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 3, ...",155.0,"Counter({0: 21, 2: 19, 3: 16, 1: 7})"
5,0.13852,0.51,"[3, 7, 8, 9, 81, 131, 230, 277, 607, 728, 993,...","[1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, ...",19.0,"Counter({0: 23, 1: 20, 3: 15, 2: 5})"
6,0.13741,0.68,"[2, 3, 7, 8, 17, 20, 22, 66, 208, 214, 224, 22...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, ...",50.0,"Counter({3: 21, 2: 19, 1: 14, 0: 9})"
7,0.135438,0.84,"[1, 3, 6, 7, 9, 11, 14, 22, 23, 99, 107, 186, ...","[2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",53.0,"Counter({3: 21, 1: 19, 0: 12, 2: 11})"


Selecting (11, 4) from archive


 87%|████████▋ | 27/31 [05:08<00:44, 11.11s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.161918,0.06,"[15, 35, 60, 66, 91, 112, 118, 148, 159, 258, ...","[3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, ...",95.0,"Counter({1: 27, 2: 14, 3: 13, 0: 9})"
1,0.155023,0.9,"[1, 3, 5, 7, 8, 17, 208, 228, 230, 277, 349, 3...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",23.0,"Counter({1: 23, 3: 19, 2: 11, 0: 10})"
2,0.14553,0.16,"[66, 207, 297, 315, 566, 644, 778, 892, 950, 9...","[3, 3, 3, 2, 3, 0, 3, 3, 3, 3, 2, 2, 2, 1, 2, ...",23.0,"Counter({3: 22, 2: 18, 0: 15, 1: 8})"
3,0.151451,0.72,"[1, 3, 5, 7, 8, 9, 22, 99, 186, 228, 230, 277,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",29.0,"Counter({1: 23, 0: 20, 3: 12, 2: 8})"
4,0.145732,0.07,"[15, 32, 35, 39, 66, 69, 97, 112, 120, 148, 15...","[0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 3, ...",155.0,"Counter({0: 21, 2: 19, 3: 16, 1: 7})"
5,0.13852,0.51,"[3, 7, 8, 9, 81, 131, 230, 277, 607, 728, 993,...","[1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, ...",19.0,"Counter({0: 23, 1: 20, 3: 15, 2: 5})"
6,0.13741,0.68,"[2, 3, 7, 8, 17, 20, 22, 66, 208, 214, 224, 22...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, ...",50.0,"Counter({3: 21, 2: 19, 1: 14, 0: 9})"
7,0.135438,0.84,"[1, 3, 6, 7, 9, 11, 14, 22, 23, 99, 107, 186, ...","[2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",53.0,"Counter({3: 21, 1: 19, 0: 12, 2: 11})"
8,0.143132,0.04,"[34, 35, 39, 60, 66, 79, 112, 144, 151, 155, 2...","[0, 0, 0, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 0, 2, ...",82.0,"Counter({1: 25, 2: 16, 0: 13, 3: 9})"


Selecting (12, 4) from archive


 97%|█████████▋| 30/31 [06:14<00:14, 14.34s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.161918,0.06,"[15, 35, 60, 66, 91, 112, 118, 148, 159, 258, ...","[3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 2, ...",95.0,"Counter({1: 27, 2: 14, 3: 13, 0: 9})"
1,0.155023,0.9,"[1, 3, 5, 7, 8, 17, 208, 228, 230, 277, 349, 3...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",23.0,"Counter({1: 23, 3: 19, 2: 11, 0: 10})"
2,0.14553,0.16,"[66, 207, 297, 315, 566, 644, 778, 892, 950, 9...","[3, 3, 3, 2, 3, 0, 3, 3, 3, 3, 2, 2, 2, 1, 2, ...",23.0,"Counter({3: 22, 2: 18, 0: 15, 1: 8})"
3,0.151451,0.72,"[1, 3, 5, 7, 8, 9, 22, 99, 186, 228, 230, 277,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",29.0,"Counter({1: 23, 0: 20, 3: 12, 2: 8})"
4,0.145732,0.07,"[15, 32, 35, 39, 66, 69, 97, 112, 120, 148, 15...","[0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 3, ...",155.0,"Counter({0: 21, 2: 19, 3: 16, 1: 7})"
5,0.13852,0.51,"[3, 7, 8, 9, 81, 131, 230, 277, 607, 728, 993,...","[1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, ...",19.0,"Counter({0: 23, 1: 20, 3: 15, 2: 5})"
6,0.13741,0.68,"[2, 3, 7, 8, 17, 20, 22, 66, 208, 214, 224, 22...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, ...",50.0,"Counter({3: 21, 2: 19, 1: 14, 0: 9})"
7,0.135438,0.84,"[1, 3, 6, 7, 9, 11, 14, 22, 23, 99, 107, 186, ...","[2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",53.0,"Counter({3: 21, 1: 19, 0: 12, 2: 11})"
8,0.143132,0.04,"[34, 35, 39, 60, 66, 79, 112, 144, 151, 155, 2...","[0, 0, 0, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 0, 2, ...",82.0,"Counter({1: 25, 2: 16, 0: 13, 3: 9})"
9,0.134957,0.1,"[66, 125, 142, 290, 297, 375, 618, 655, 877, 8...","[2, 2, 2, 1, 2, 0, 0, 2, 0, 2, 1, 1, 1, 2, 1, ...",18.0,"Counter({2: 22, 0: 22, 1: 14, 3: 5})"


Selecting (12, 4) from archive


100%|██████████| 31/31 [06:28<00:00, 12.52s/it]


TIME: GA: 6.469065574804942 min
TIME: Total: 6.583194863796234 min

##########  west, (49, 6858)
*** Computing 1D feature ranking ...
Dispersion tests took 0.14 sec
Entropy computation 1.02 sec
KNN computation 1.63 sec
Sorting and thresholds 0.04 sec
Loading clustering from file
Hierarchical clustering 0.03 sec
Handle redundant features 1.68 sec
Returning 1530 redundant features and  198 important features
TIME: 1d Features : 0.07581042051315308 min
*** Exploring 2D feature space with NN ...
trimming 5128 to 2664
handle_close_important (2664, 3), total 5185, 0.2692382335662842
relevant_features 1842 => computing 11007 
trimming 10647 to 1530
handle_important_features (4194, 3),  total 11007, 0.2692382335662842
irrelevant_features 3486 => computing 10455
trimming 9720 to 2664
handle_not_important_features (6858, 3), total 10455, 0.2692382335662842
handle_all_features 5328 => computing 15974


  0%|          | 0/31 [00:00<?, ?it/s]

trimming 15038 to 5328
handle_all_features (12186, 3),  total 15974, 0.2692382335662842
Returning (12186, 3), explored a total of 42621 feature pairs
TIME: 2d scores: 7.947285970052083e-09 min
*** Optimization algorithm 
Non redundant features 5328, orig size 6858, nb imp : 198
Selecting (50, 4) from archive


 10%|▉         | 3/31 [00:08<01:21,  2.92s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.295179,-0.01,"[450, 947, 1419, 1443, 1523, 2605, 2794, 2833,...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...",16.0,"Counter({0: 25, 1: 24})"


Selecting (15, 4) from archive


 19%|█▉        | 6/31 [00:21<01:25,  3.40s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.295179,-0.01,"[450, 947, 1419, 1443, 1523, 2605, 2794, 2833,...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...",16.0,"Counter({0: 25, 1: 24})"
1,0.293279,-0.0,"[83, 719, 1100, 1429, 4179, 4966, 5058, 6291]","[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, ...",8.0,"Counter({0: 44, 1: 5})"


Selecting (5, 4) from archive


 29%|██▉       | 9/31 [00:33<01:18,  3.57s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.295179,-0.01,"[450, 947, 1419, 1443, 1523, 2605, 2794, 2833,...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...",16.0,"Counter({0: 25, 1: 24})"
1,0.293279,-0.0,"[83, 719, 1100, 1429, 4179, 4966, 5058, 6291]","[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, ...",8.0,"Counter({0: 44, 1: 5})"
2,0.275204,0.01,"[799, 2865, 2955, 3004, 3112, 4516, 6834]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 40, 0: 9})"


Selecting (10, 4) from archive


 39%|███▊      | 12/31 [00:46<01:07,  3.55s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.295179,-0.01,"[450, 947, 1419, 1443, 1523, 2605, 2794, 2833,...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...",16.0,"Counter({0: 25, 1: 24})"
1,0.293279,-0.0,"[83, 719, 1100, 1429, 4179, 4966, 5058, 6291]","[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, ...",8.0,"Counter({0: 44, 1: 5})"
2,0.275204,0.01,"[799, 2865, 2955, 3004, 3112, 4516, 6834]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 40, 0: 9})"
3,0.298993,-0.01,"[31, 537, 605, 735, 1100, 1627, 2442, 4090, 43...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, ...",15.0,"Counter({0: 45, 1: 4})"


Selecting (9, 4) from archive


 48%|████▊     | 15/31 [01:06<01:24,  5.27s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.295179,-0.01,"[450, 947, 1419, 1443, 1523, 2605, 2794, 2833,...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...",16.0,"Counter({0: 25, 1: 24})"
1,0.293279,-0.0,"[83, 719, 1100, 1429, 4179, 4966, 5058, 6291]","[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, ...",8.0,"Counter({0: 44, 1: 5})"
2,0.275204,0.01,"[799, 2865, 2955, 3004, 3112, 4516, 6834]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 40, 0: 9})"
3,0.298993,-0.01,"[31, 537, 605, 735, 1100, 1627, 2442, 4090, 43...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, ...",15.0,"Counter({0: 45, 1: 4})"
4,0.268718,0.03,"[300, 1356, 4565, 4571, 4593, 5840, 6719]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...",7.0,"Counter({0: 45, 1: 4})"


Selecting (4, 4) from archive


 58%|█████▊    | 18/31 [01:29<01:21,  6.26s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.295179,-0.01,"[450, 947, 1419, 1443, 1523, 2605, 2794, 2833,...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...",16.0,"Counter({0: 25, 1: 24})"
1,0.293279,-0.0,"[83, 719, 1100, 1429, 4179, 4966, 5058, 6291]","[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, ...",8.0,"Counter({0: 44, 1: 5})"
2,0.275204,0.01,"[799, 2865, 2955, 3004, 3112, 4516, 6834]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 40, 0: 9})"
3,0.298993,-0.01,"[31, 537, 605, 735, 1100, 1627, 2442, 4090, 43...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, ...",15.0,"Counter({0: 45, 1: 4})"
4,0.268718,0.03,"[300, 1356, 4565, 4571, 4593, 5840, 6719]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...",7.0,"Counter({0: 45, 1: 4})"
5,0.254098,0.03,"[344, 599, 726, 1141, 1405, 1576, 2585, 3240]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, ...",8.0,"Counter({0: 45, 1: 4})"


Selecting (19, 4) from archive


 68%|██████▊   | 21/31 [01:43<00:47,  4.76s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.295179,-0.01,"[450, 947, 1419, 1443, 1523, 2605, 2794, 2833,...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...",16.0,"Counter({0: 25, 1: 24})"
1,0.293279,-0.0,"[83, 719, 1100, 1429, 4179, 4966, 5058, 6291]","[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, ...",8.0,"Counter({0: 44, 1: 5})"
2,0.275204,0.01,"[799, 2865, 2955, 3004, 3112, 4516, 6834]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 40, 0: 9})"
3,0.298993,-0.01,"[31, 537, 605, 735, 1100, 1627, 2442, 4090, 43...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, ...",15.0,"Counter({0: 45, 1: 4})"
4,0.268718,0.03,"[300, 1356, 4565, 4571, 4593, 5840, 6719]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...",7.0,"Counter({0: 45, 1: 4})"
5,0.254098,0.03,"[344, 599, 726, 1141, 1405, 1576, 2585, 3240]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, ...",8.0,"Counter({0: 45, 1: 4})"
6,0.278622,-0.02,"[3395, 3889, 5179, 5188, 5400, 5401]","[0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, ...",6.0,"Counter({0: 29, 1: 20})"


Selecting (8, 4) from archive


 77%|███████▋  | 24/31 [01:56<00:27,  3.98s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.295179,-0.01,"[450, 947, 1419, 1443, 1523, 2605, 2794, 2833,...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...",16.0,"Counter({0: 25, 1: 24})"
1,0.293279,-0.0,"[83, 719, 1100, 1429, 4179, 4966, 5058, 6291]","[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, ...",8.0,"Counter({0: 44, 1: 5})"
2,0.275204,0.01,"[799, 2865, 2955, 3004, 3112, 4516, 6834]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 40, 0: 9})"
3,0.298993,-0.01,"[31, 537, 605, 735, 1100, 1627, 2442, 4090, 43...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, ...",15.0,"Counter({0: 45, 1: 4})"
4,0.268718,0.03,"[300, 1356, 4565, 4571, 4593, 5840, 6719]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...",7.0,"Counter({0: 45, 1: 4})"
5,0.254098,0.03,"[344, 599, 726, 1141, 1405, 1576, 2585, 3240]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, ...",8.0,"Counter({0: 45, 1: 4})"
6,0.278622,-0.02,"[3395, 3889, 5179, 5188, 5400, 5401]","[0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, ...",6.0,"Counter({0: 29, 1: 20})"
7,0.264018,0.0,"[5, 144, 187, 208, 384, 421, 516, 538, 591, 66...","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, ...",156.0,"Counter({0: 45, 1: 4})"


Selecting (19, 4) from archive


 87%|████████▋ | 27/31 [02:36<00:31,  7.79s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.295179,-0.01,"[450, 947, 1419, 1443, 1523, 2605, 2794, 2833,...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...",16.0,"Counter({0: 25, 1: 24})"
1,0.293279,-0.0,"[83, 719, 1100, 1429, 4179, 4966, 5058, 6291]","[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, ...",8.0,"Counter({0: 44, 1: 5})"
2,0.275204,0.01,"[799, 2865, 2955, 3004, 3112, 4516, 6834]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 40, 0: 9})"
3,0.298993,-0.01,"[31, 537, 605, 735, 1100, 1627, 2442, 4090, 43...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, ...",15.0,"Counter({0: 45, 1: 4})"
4,0.268718,0.03,"[300, 1356, 4565, 4571, 4593, 5840, 6719]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...",7.0,"Counter({0: 45, 1: 4})"
5,0.254098,0.03,"[344, 599, 726, 1141, 1405, 1576, 2585, 3240]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, ...",8.0,"Counter({0: 45, 1: 4})"
6,0.278622,-0.02,"[3395, 3889, 5179, 5188, 5400, 5401]","[0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, ...",6.0,"Counter({0: 29, 1: 20})"
7,0.264018,0.0,"[5, 144, 187, 208, 384, 421, 516, 538, 591, 66...","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, ...",156.0,"Counter({0: 45, 1: 4})"
8,0.256672,0.0,"[646, 679, 1573, 4709]","[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, ...",4.0,"Counter({0: 45, 1: 4})"


Selecting (6, 4) from archive


 97%|█████████▋| 30/31 [02:46<00:04,  4.76s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.295179,-0.01,"[450, 947, 1419, 1443, 1523, 2605, 2794, 2833,...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...",16.0,"Counter({0: 25, 1: 24})"
1,0.293279,-0.0,"[83, 719, 1100, 1429, 4179, 4966, 5058, 6291]","[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, ...",8.0,"Counter({0: 44, 1: 5})"
2,0.275204,0.01,"[799, 2865, 2955, 3004, 3112, 4516, 6834]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 40, 0: 9})"
3,0.298993,-0.01,"[31, 537, 605, 735, 1100, 1627, 2442, 4090, 43...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, ...",15.0,"Counter({0: 45, 1: 4})"
4,0.268718,0.03,"[300, 1356, 4565, 4571, 4593, 5840, 6719]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...",7.0,"Counter({0: 45, 1: 4})"
5,0.254098,0.03,"[344, 599, 726, 1141, 1405, 1576, 2585, 3240]","[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, ...",8.0,"Counter({0: 45, 1: 4})"
6,0.278622,-0.02,"[3395, 3889, 5179, 5188, 5400, 5401]","[0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, ...",6.0,"Counter({0: 29, 1: 20})"
7,0.264018,0.0,"[5, 144, 187, 208, 384, 421, 516, 538, 591, 66...","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, ...",156.0,"Counter({0: 45, 1: 4})"
8,0.256672,0.0,"[646, 679, 1573, 4709]","[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, ...",4.0,"Counter({0: 45, 1: 4})"
9,0.243788,-0.01,"[508, 1564, 2313, 3761]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, ...",4.0,"Counter({1: 45, 0: 4})"


Selecting (8, 4) from archive


100%|██████████| 31/31 [02:54<00:00,  5.62s/it]


TIME: GA: 2.9015302737553914 min
TIME: Total: 3.280222713947296 min

##########  gordon, (181, 12533)
*** Computing 1D feature ranking ...
Dispersion tests took 0.96 sec
Entropy computation 2.97 sec
KNN computation 20.62 sec
Sorting and thresholds 0.06 sec
Loading clustering from file
Hierarchical clustering 0.05 sec
Handle redundant features 1.68 sec
Returning 733 redundant features and  351 important features
TIME: 1d Features : 0.4396177927652995 min
*** Exploring 2D feature space with NN ...
trimming 8433 to 5900
handle_close_important (5900, 3), total 10261, 0.24181434512138367
relevant_features 3626 => computing 21713 
trimming 10997 to 733
handle_important_features (6633, 3),  total 21713, 0.24181434512138367
irrelevant_features 8174 => computing 24509
handle_not_important_features (11664, 3), total 24509, 0.24181434512138367
handle_all_features 11800 => computing 35392


  0%|          | 0/31 [00:00<?, ?it/s]

trimming 10812 to 10000
handle_all_features (21664, 3),  total 35392, 0.24181434512138367
Returning (21664, 3), explored a total of 91875 feature pairs
TIME: 2d scores: 1.5894571940104166e-08 min
*** Optimization algorithm 
Non redundant features 11800, orig size 12533, nb imp : 351
Selecting (50, 4) from archive


 10%|▉         | 3/31 [00:19<03:12,  6.88s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.270827,0.86,"[0, 1, 3, 6, 33, 308, 534, 1350, 2195, 2587, 2...","[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",36.0,"Counter({0: 155, 1: 26})"


Selecting (23, 4) from archive


 19%|█▉        | 6/31 [00:46<02:53,  6.92s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.270827,0.86,"[0, 1, 3, 6, 33, 308, 534, 1350, 2195, 2587, 2...","[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",36.0,"Counter({0: 155, 1: 26})"
1,0.265767,0.41,"[86, 137, 229, 281, 792, 941, 1654, 1774, 1871...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...",42.0,"Counter({0: 170, 1: 11})"


Selecting (6, 4) from archive


 29%|██▉       | 9/31 [01:38<03:55, 10.68s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.270827,0.86,"[0, 1, 3, 6, 33, 308, 534, 1350, 2195, 2587, 2...","[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",36.0,"Counter({0: 155, 1: 26})"
1,0.265767,0.41,"[86, 137, 229, 281, 792, 941, 1654, 1774, 1871...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...",42.0,"Counter({0: 170, 1: 11})"
2,0.227398,-0.04,"[625, 1628, 1955, 2961, 4499, 6459, 11364]","[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 119, 0: 62})"


Selecting (1, 4) from archive


 39%|███▊      | 12/31 [01:53<02:04,  6.55s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.270827,0.86,"[0, 1, 3, 6, 33, 308, 534, 1350, 2195, 2587, 2...","[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",36.0,"Counter({0: 155, 1: 26})"
1,0.265767,0.41,"[86, 137, 229, 281, 792, 941, 1654, 1774, 1871...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...",42.0,"Counter({0: 170, 1: 11})"
2,0.227398,-0.04,"[625, 1628, 1955, 2961, 4499, 6459, 11364]","[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 119, 0: 62})"
3,0.214624,-0.11,"[4812, 4813, 5229, 6798, 8199]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",5.0,"Counter({0: 146, 1: 35})"


Selecting (3, 4) from archive


 48%|████▊     | 15/31 [02:09<01:22,  5.14s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.270827,0.86,"[0, 1, 3, 6, 33, 308, 534, 1350, 2195, 2587, 2...","[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",36.0,"Counter({0: 155, 1: 26})"
1,0.265767,0.41,"[86, 137, 229, 281, 792, 941, 1654, 1774, 1871...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...",42.0,"Counter({0: 170, 1: 11})"
2,0.227398,-0.04,"[625, 1628, 1955, 2961, 4499, 6459, 11364]","[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 119, 0: 62})"
3,0.214624,-0.11,"[4812, 4813, 5229, 6798, 8199]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",5.0,"Counter({0: 146, 1: 35})"
4,0.220603,0.01,"[165, 9353]","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 106, 1: 75})"


Selecting (4, 4) from archive


 58%|█████▊    | 18/31 [02:23<00:58,  4.53s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.270827,0.86,"[0, 1, 3, 6, 33, 308, 534, 1350, 2195, 2587, 2...","[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",36.0,"Counter({0: 155, 1: 26})"
1,0.265767,0.41,"[86, 137, 229, 281, 792, 941, 1654, 1774, 1871...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...",42.0,"Counter({0: 170, 1: 11})"
2,0.227398,-0.04,"[625, 1628, 1955, 2961, 4499, 6459, 11364]","[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 119, 0: 62})"
3,0.214624,-0.11,"[4812, 4813, 5229, 6798, 8199]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",5.0,"Counter({0: 146, 1: 35})"
4,0.220603,0.01,"[165, 9353]","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 106, 1: 75})"
5,0.213329,0.02,"[2984, 8147]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 151, 1: 30})"


Selecting (1, 4) from archive


 68%|██████▊   | 21/31 [02:36<00:41,  4.14s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.270827,0.86,"[0, 1, 3, 6, 33, 308, 534, 1350, 2195, 2587, 2...","[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",36.0,"Counter({0: 155, 1: 26})"
1,0.265767,0.41,"[86, 137, 229, 281, 792, 941, 1654, 1774, 1871...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...",42.0,"Counter({0: 170, 1: 11})"
2,0.227398,-0.04,"[625, 1628, 1955, 2961, 4499, 6459, 11364]","[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 119, 0: 62})"
3,0.214624,-0.11,"[4812, 4813, 5229, 6798, 8199]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",5.0,"Counter({0: 146, 1: 35})"
4,0.220603,0.01,"[165, 9353]","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 106, 1: 75})"
5,0.213329,0.02,"[2984, 8147]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 151, 1: 30})"
6,0.22197,-0.04,"[1242, 2485, 3123, 4435, 5802, 7926, 8706, 106...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",9.0,"Counter({0: 175, 1: 6})"


Selecting (5, 4) from archive


 77%|███████▋  | 24/31 [02:52<00:30,  4.34s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.270827,0.86,"[0, 1, 3, 6, 33, 308, 534, 1350, 2195, 2587, 2...","[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",36.0,"Counter({0: 155, 1: 26})"
1,0.265767,0.41,"[86, 137, 229, 281, 792, 941, 1654, 1774, 1871...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...",42.0,"Counter({0: 170, 1: 11})"
2,0.227398,-0.04,"[625, 1628, 1955, 2961, 4499, 6459, 11364]","[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 119, 0: 62})"
3,0.214624,-0.11,"[4812, 4813, 5229, 6798, 8199]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",5.0,"Counter({0: 146, 1: 35})"
4,0.220603,0.01,"[165, 9353]","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 106, 1: 75})"
5,0.213329,0.02,"[2984, 8147]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 151, 1: 30})"
6,0.22197,-0.04,"[1242, 2485, 3123, 4435, 5802, 7926, 8706, 106...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",9.0,"Counter({0: 175, 1: 6})"
7,0.211467,0.0,"[616, 4600]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 130, 1: 51})"


Selecting (1, 4) from archive


 87%|████████▋ | 27/31 [03:05<00:16,  4.05s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.270827,0.86,"[0, 1, 3, 6, 33, 308, 534, 1350, 2195, 2587, 2...","[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",36.0,"Counter({0: 155, 1: 26})"
1,0.265767,0.41,"[86, 137, 229, 281, 792, 941, 1654, 1774, 1871...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...",42.0,"Counter({0: 170, 1: 11})"
2,0.227398,-0.04,"[625, 1628, 1955, 2961, 4499, 6459, 11364]","[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 119, 0: 62})"
3,0.214624,-0.11,"[4812, 4813, 5229, 6798, 8199]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",5.0,"Counter({0: 146, 1: 35})"
4,0.220603,0.01,"[165, 9353]","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 106, 1: 75})"
5,0.213329,0.02,"[2984, 8147]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 151, 1: 30})"
6,0.22197,-0.04,"[1242, 2485, 3123, 4435, 5802, 7926, 8706, 106...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",9.0,"Counter({0: 175, 1: 6})"
7,0.211467,0.0,"[616, 4600]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 130, 1: 51})"
8,0.215367,-0.11,"[5, 370, 371, 1483, 1484, 1518, 2461, 4073, 41...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",11.0,"Counter({0: 147, 1: 34})"


Selecting (26, 4) from archive


 97%|█████████▋| 30/31 [03:21<00:04,  4.37s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.270827,0.86,"[0, 1, 3, 6, 33, 308, 534, 1350, 2195, 2587, 2...","[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",36.0,"Counter({0: 155, 1: 26})"
1,0.265767,0.41,"[86, 137, 229, 281, 792, 941, 1654, 1774, 1871...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, ...",42.0,"Counter({0: 170, 1: 11})"
2,0.227398,-0.04,"[625, 1628, 1955, 2961, 4499, 6459, 11364]","[1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, ...",7.0,"Counter({1: 119, 0: 62})"
3,0.214624,-0.11,"[4812, 4813, 5229, 6798, 8199]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",5.0,"Counter({0: 146, 1: 35})"
4,0.220603,0.01,"[165, 9353]","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 106, 1: 75})"
5,0.213329,0.02,"[2984, 8147]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 151, 1: 30})"
6,0.22197,-0.04,"[1242, 2485, 3123, 4435, 5802, 7926, 8706, 106...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",9.0,"Counter({0: 175, 1: 6})"
7,0.211467,0.0,"[616, 4600]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.0,"Counter({0: 130, 1: 51})"
8,0.215367,-0.11,"[5, 370, 371, 1483, 1484, 1518, 2461, 4073, 41...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",11.0,"Counter({0: 147, 1: 34})"
9,0.227763,-0.04,"[320, 380, 1034, 1429, 1518, 1521, 1756, 2343,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",19.0,"Counter({0: 175, 1: 6})"


Selecting (31, 4) from archive


100%|██████████| 31/31 [03:32<00:00,  6.84s/it]


TIME: GA: 3.5338881929715473 min
TIME: Total: 5.266117990016937 min

##########  borovecki, (31, 22283)
*** Computing 1D feature ranking ...
Dispersion tests took 1.17 sec
Entropy computation 2.76 sec
KNN computation 12.2 sec
Sorting and thresholds 0.09 sec
Loading clustering from file
Hierarchical clustering 0.08 sec
Handle redundant features 8.17 sec
Returning 6898 redundant features and  677 important features
TIME: 1d Features : 0.40841541687647503 min
*** Exploring 2D feature space with NN ...
trimming 17727 to 7692
handle_close_important (7692, 3), total 17757, 0.3080889582633972
relevant_features 6261 => computing 37522 
trimming 37111 to 6898
handle_important_features (14590, 3),  total 37522, 0.3080889582633972
irrelevant_features 9124 => computing 27357
trimming 26674 to 7692
handle_not_important_features (22282, 3), total 27357, 0.3080889582633972
handle_all_features 15385 => computing 46145


  0%|          | 0/31 [00:00<?, ?it/s]

trimming 45306 to 10000
handle_all_features (32282, 3),  total 46145, 0.3080889582633972
Returning (32282, 3), explored a total of 128781 feature pairs
TIME: 2d scores: 7.947285970052083e-09 min
*** Optimization algorithm 
Non redundant features 15385, orig size 22283, nb imp : 677
Selecting (50, 4) from archive


 10%|▉         | 3/31 [00:07<01:17,  2.77s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.338797,-0.03,"[584, 724, 1036, 1073, 2960, 3060, 3175, 3325,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",80.0,"Counter({1: 22, 0: 9})"


Selecting (16, 4) from archive


 19%|█▉        | 6/31 [01:02<04:07,  9.92s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.338797,-0.03,"[584, 724, 1036, 1073, 2960, 3060, 3175, 3325,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",80.0,"Counter({1: 22, 0: 9})"
1,0.331147,0.54,"[2223, 2697, 2840, 2844, 2938, 2965, 2983, 298...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"


Selecting (11, 4) from archive


 29%|██▉       | 9/31 [02:06<05:06, 13.92s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.338797,-0.03,"[584, 724, 1036, 1073, 2960, 3060, 3175, 3325,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",80.0,"Counter({1: 22, 0: 9})"
1,0.331147,0.54,"[2223, 2697, 2840, 2844, 2938, 2965, 2983, 298...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"
2,0.302408,-0.01,"[4297, 5424, 8814, 9834, 12350, 13852, 13874, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",11.0,"Counter({0: 27, 1: 4})"


Selecting (3, 4) from archive


 39%|███▊      | 12/31 [02:33<03:04,  9.72s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.338797,-0.03,"[584, 724, 1036, 1073, 2960, 3060, 3175, 3325,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",80.0,"Counter({1: 22, 0: 9})"
1,0.331147,0.54,"[2223, 2697, 2840, 2844, 2938, 2965, 2983, 298...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"
2,0.302408,-0.01,"[4297, 5424, 8814, 9834, 12350, 13852, 13874, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",11.0,"Counter({0: 27, 1: 4})"
3,0.30613,0.03,"[106, 12273, 12759, 13366, 16318, 19716]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",6.0,"Counter({0: 27, 1: 4})"


Selecting (1, 4) from archive


 48%|████▊     | 15/31 [02:46<01:30,  5.68s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.338797,-0.03,"[584, 724, 1036, 1073, 2960, 3060, 3175, 3325,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",80.0,"Counter({1: 22, 0: 9})"
1,0.331147,0.54,"[2223, 2697, 2840, 2844, 2938, 2965, 2983, 298...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"
2,0.302408,-0.01,"[4297, 5424, 8814, 9834, 12350, 13852, 13874, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",11.0,"Counter({0: 27, 1: 4})"
3,0.30613,0.03,"[106, 12273, 12759, 13366, 16318, 19716]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",6.0,"Counter({0: 27, 1: 4})"
4,0.293582,0.03,"[1140, 1451, 1875, 2064, 3432, 5760, 7451, 826...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",19.0,"Counter({0: 27, 1: 4})"


Selecting (15, 4) from archive


 58%|█████▊    | 18/31 [02:57<00:53,  4.13s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.338797,-0.03,"[584, 724, 1036, 1073, 2960, 3060, 3175, 3325,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",80.0,"Counter({1: 22, 0: 9})"
1,0.331147,0.54,"[2223, 2697, 2840, 2844, 2938, 2965, 2983, 298...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"
2,0.302408,-0.01,"[4297, 5424, 8814, 9834, 12350, 13852, 13874, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",11.0,"Counter({0: 27, 1: 4})"
3,0.30613,0.03,"[106, 12273, 12759, 13366, 16318, 19716]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",6.0,"Counter({0: 27, 1: 4})"
4,0.293582,0.03,"[1140, 1451, 1875, 2064, 3432, 5760, 7451, 826...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",19.0,"Counter({0: 27, 1: 4})"
5,0.316202,0.03,"[465, 1396, 2149, 2286, 3232, 4794, 5484, 9286...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",15.0,"Counter({0: 27, 1: 4})"


Selecting (5, 4) from archive


 68%|██████▊   | 21/31 [03:09<00:35,  3.57s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.338797,-0.03,"[584, 724, 1036, 1073, 2960, 3060, 3175, 3325,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",80.0,"Counter({1: 22, 0: 9})"
1,0.331147,0.54,"[2223, 2697, 2840, 2844, 2938, 2965, 2983, 298...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"
2,0.302408,-0.01,"[4297, 5424, 8814, 9834, 12350, 13852, 13874, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",11.0,"Counter({0: 27, 1: 4})"
3,0.30613,0.03,"[106, 12273, 12759, 13366, 16318, 19716]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",6.0,"Counter({0: 27, 1: 4})"
4,0.293582,0.03,"[1140, 1451, 1875, 2064, 3432, 5760, 7451, 826...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",19.0,"Counter({0: 27, 1: 4})"
5,0.316202,0.03,"[465, 1396, 2149, 2286, 3232, 4794, 5484, 9286...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",15.0,"Counter({0: 27, 1: 4})"
6,0.314743,0.1,"[1748, 3721, 4597, 7009, 7430, 7431, 7434, 744...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",32.0,"Counter({1: 17, 0: 14})"


Selecting (1, 4) from archive


 77%|███████▋  | 24/31 [03:32<00:37,  5.32s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.338797,-0.03,"[584, 724, 1036, 1073, 2960, 3060, 3175, 3325,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",80.0,"Counter({1: 22, 0: 9})"
1,0.331147,0.54,"[2223, 2697, 2840, 2844, 2938, 2965, 2983, 298...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"
2,0.302408,-0.01,"[4297, 5424, 8814, 9834, 12350, 13852, 13874, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",11.0,"Counter({0: 27, 1: 4})"
3,0.30613,0.03,"[106, 12273, 12759, 13366, 16318, 19716]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",6.0,"Counter({0: 27, 1: 4})"
4,0.293582,0.03,"[1140, 1451, 1875, 2064, 3432, 5760, 7451, 826...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",19.0,"Counter({0: 27, 1: 4})"
5,0.316202,0.03,"[465, 1396, 2149, 2286, 3232, 4794, 5484, 9286...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",15.0,"Counter({0: 27, 1: 4})"
6,0.314743,0.1,"[1748, 3721, 4597, 7009, 7430, 7431, 7434, 744...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",32.0,"Counter({1: 17, 0: 14})"
7,0.281488,-0.03,"[370, 1136, 1703, 1804]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, ...",4.0,"Counter({1: 23, 0: 8})"


Selecting (8, 4) from archive


 87%|████████▋ | 27/31 [03:49<00:20,  5.04s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.338797,-0.03,"[584, 724, 1036, 1073, 2960, 3060, 3175, 3325,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",80.0,"Counter({1: 22, 0: 9})"
1,0.331147,0.54,"[2223, 2697, 2840, 2844, 2938, 2965, 2983, 298...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"
2,0.302408,-0.01,"[4297, 5424, 8814, 9834, 12350, 13852, 13874, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",11.0,"Counter({0: 27, 1: 4})"
3,0.30613,0.03,"[106, 12273, 12759, 13366, 16318, 19716]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",6.0,"Counter({0: 27, 1: 4})"
4,0.293582,0.03,"[1140, 1451, 1875, 2064, 3432, 5760, 7451, 826...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",19.0,"Counter({0: 27, 1: 4})"
5,0.316202,0.03,"[465, 1396, 2149, 2286, 3232, 4794, 5484, 9286...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",15.0,"Counter({0: 27, 1: 4})"
6,0.314743,0.1,"[1748, 3721, 4597, 7009, 7430, 7431, 7434, 744...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",32.0,"Counter({1: 17, 0: 14})"
7,0.281488,-0.03,"[370, 1136, 1703, 1804]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, ...",4.0,"Counter({1: 23, 0: 8})"
8,0.331061,0.54,"[812, 1001, 2223, 2844, 2985, 3028, 3126, 3129...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"


Selecting (1, 4) from archive


 97%|█████████▋| 30/31 [04:27<00:08,  8.05s/it]

Unnamed: 0,adapted_ratkowsky_lance,ari,features,partition,size,structure
0,0.338797,-0.03,"[584, 724, 1036, 1073, 2960, 3060, 3175, 3325,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",80.0,"Counter({1: 22, 0: 9})"
1,0.331147,0.54,"[2223, 2697, 2840, 2844, 2938, 2965, 2983, 298...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"
2,0.302408,-0.01,"[4297, 5424, 8814, 9834, 12350, 13852, 13874, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",11.0,"Counter({0: 27, 1: 4})"
3,0.30613,0.03,"[106, 12273, 12759, 13366, 16318, 19716]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",6.0,"Counter({0: 27, 1: 4})"
4,0.293582,0.03,"[1140, 1451, 1875, 2064, 3432, 5760, 7451, 826...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",19.0,"Counter({0: 27, 1: 4})"
5,0.316202,0.03,"[465, 1396, 2149, 2286, 3232, 4794, 5484, 9286...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",15.0,"Counter({0: 27, 1: 4})"
6,0.314743,0.1,"[1748, 3721, 4597, 7009, 7430, 7431, 7434, 744...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",32.0,"Counter({1: 17, 0: 14})"
7,0.281488,-0.03,"[370, 1136, 1703, 1804]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, ...",4.0,"Counter({1: 23, 0: 8})"
8,0.331061,0.54,"[812, 1001, 2223, 2844, 2985, 3028, 3126, 3129...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",163.0,"Counter({1: 18, 0: 13})"
9,0.277822,0.15,"[62, 954, 1553, 7611, 8811, 13258, 15107, 1957...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...",11.0,"Counter({0: 24, 1: 7})"


Selecting (4, 4) from archive


100%|██████████| 31/31 [04:40<00:00,  9.05s/it]


TIME: GA: 4.675550969441732 min
TIME: Total: 7.306553395589193 min


In [6]:
results.groupby("dataset_name").agg({"ari": max})

Unnamed: 0_level_0,ari
dataset_name,Unnamed: 1_level_1
borovecki,0.54
gordon,0.86
khan,0.9
west,0.03


# Supervised analysis of datasets

In [None]:
import scripts.ga_evaluation as ga_evaluation
filenames = np.array(["khan","west",  "gordon",'borovecki'])
path = 'data/microarray/'
imp_f = np.arange(20)
result_df = pd.DataFrame()
for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values

    n_clusters = len(np.unique(truth))

    result = {"Dataset" : name,
             "Original Dimensions": f"{data.shape[0]} x {data.shape[1]}",
             "Cluster sizes" : ", ".join(np.array(list(Counter(truth).values())).astype(str))}
    
    predK = KMeans(n_clusters= n_clusters, random_state = 2).fit(data).labels_
    ari_all = adjusted_rand_score(truth, predK)
    
    predK = KMeans(n_clusters= n_clusters, random_state = 2).fit(data[:, :5]).labels_
    ari_top10 = adjusted_rand_score(truth, predK)
    
    pca = PCA(2)
    pca_data = pca.fit_transform(data)

    predK = KMeans(n_clusters= n_clusters, random_state = 2).fit(pca_data).labels_
    ari_pca = adjusted_rand_score(truth, predK)
    
    r1 = ga_evaluation.random_sampling(data, truth, n_clusters, algo = "gmm")
    r2 = ga_evaluation.random_sampling(data, truth, n_clusters, algo = "hdbscan")
    result["ARI all dataset"] = round(ari_all,2)
    result["ARI PCA dataset"] = round(ari_pca,2)
    result["ARI top 10 features"] = round(ari_top10,2)
    result["Random GMM"] = round(r1,2)
    result["Random HDBSCAN"] = round(r2,2)
    result_df = result_df.append(result, ignore_index = True)

result_df

# Run time analysis

In [None]:
results = pd.read_excel("reports/r_cnn.xlsx", index_col=0)

In [None]:
results["label"] = results["dataset_name"] + " (" +results["input_size"].astype(str) + " features) " 

In [None]:
perf = results.groupby("input_size").min()[['t(feature_sel)', 't(cnn)','t(ga)']]

In [None]:
perf = perf.rename(columns = {
    't(feature_sel)' : '1D Feature ranking', 
    't(cnn)' : '2D Feature ranking with NN',
    't(ga)': 'Optimization algorithm for top 10 subspaces'
})

In [None]:
plt.figure(figsize = (10,3))
ax = plt.gca()
perf.plot(kind='bar', stacked=True, ax = ax)
plt.ylabel("time (min)")
plt.xticks(rotation = 0)
sns.despine()
plt.title("Run times on West, Khan, Gordon and Boroveki datasets")
plt.xlabel("Number of dimensions in the input dataset")
plt.savefig(f"images/run_times.pdf", bbox_inches='tight')

In [7]:
from sklearn import mixture
import hdbscan

# Best scores using supervised feature selection

In [9]:
import scripts.ga_evaluation as ga_evaluation
filenames = np.array(["khan","west",  "gordon",'borovecki'])
path = 'data/microarray/'

In [None]:
for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values
    print(Counter(truth))
    n_clusters = len(np.unique(truth))
    gmm_scores = []
    hdbscan_scores = []
    for i in range(2, 50):
        input_data = data[:, :i]
        gmm = mixture.GaussianMixture(n_components=n_clusters,
                          covariance_type="full", random_state=0)
        pred = gmm.fit_predict(input_data)
        ari = adjusted_rand_score(truth, pred)
        gmm_scores.append(ari)

        pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(input_data).labels_
        ari = adjusted_rand_score(truth, pred)
        hdbscan_scores.append(ari)

        
    print(f"\n\n\n{name} GMM ari = {max(gmm_scores)}, ")
    print(f"{name} HDBSCAN ari = {max(hdbscan_scores)}, ")
    

# Mutual Information

In [12]:
from sklearn.feature_selection import chi2,  mutual_info_classif, SelectKBest

In [13]:
for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    data = data.drop("truth", axis = 1).values
    print(Counter(truth))
    n_clusters = len(np.unique(truth))
    gmm_scores = []
    hdbscan_scores = []
    sel = SelectKBest(mutual_info_classif, k=50).fit_transform(data, truth)
    for i in range(2, 50):
        input_data = sel[:, :i]
        gmm = mixture.GaussianMixture(n_components=n_clusters,
                          covariance_type="full", random_state=0)
        pred = gmm.fit_predict(input_data)
        ari = adjusted_rand_score(truth, pred)
        gmm_scores.append(ari)

        pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(input_data).labels_
        ari = adjusted_rand_score(truth, pred)
        hdbscan_scores.append(ari)

        
    print(f"\n\n\n{name} GMM ari = {max(gmm_scores)}, ")
    print(f"{name} HDBSCAN ari = {max(hdbscan_scores)}, ")

Counter({1: 23, 3: 20, 2: 12, 0: 8})



khan GMM ari = 1.0, 
khan HDBSCAN ari = 0.9753291540426958, 
Counter({0: 25, 1: 24})



west GMM ari = 0.7652246371779737, 
west HDBSCAN ari = 0.3423504867872045, 
Counter({0: 150, 1: 31})



gordon GMM ari = 0.9731180281933411, 
gordon HDBSCAN ari = 0.9060143786845584, 
Counter({1: 17, 0: 14})



borovecki GMM ari = 1.0, 
borovecki HDBSCAN ari = 0.775027912169706, 


In [None]:
import scripts.ga_evaluation as ga_evaluation
filenames = np.array(["khan","west",  "gordon",'borovecki'])
path = 'data/microarray/'

for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    input_data = data.drop("truth", axis = 1).values

    n_clusters = len(np.unique(truth))
    gmm = mixture.GaussianMixture(n_components=n_clusters,
                      covariance_type="full", random_state=0)
    pred = gmm.fit_predict(input_data)
    ari = adjusted_rand_score(truth, pred)
    print(f"{name} GMM ari = {ari}")

    pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(input_data).labels_
    ari = adjusted_rand_score(truth, pred)
    print(f"{name} HDBSCAN ari = {ari}")

In [None]:
import scripts.ga_evaluation as ga_evaluation
filenames = np.array(['borovecki'])
path = 'data/microarray/'

for name in filenames:
    t1 = time.time()
    data = pd.read_pickle(f'{path}' + name + '.pkl')
    truth = data["truth"].values
    input_data = data.drop("truth", axis = 1).values
    input_data = input_data[:, :10000]
    

    n_clusters = len(np.unique(truth))
    
    pred = hdbscan.HDBSCAN(min_cluster_size =2).fit(input_data).labels_
    ari = adjusted_rand_score(truth, pred)
    print(f"{name} HDBSCAN ari = {ari}")
    
    gmm = mixture.GaussianMixture(n_components=n_clusters,
                      covariance_type="full", random_state=0)
    pred = gmm.fit_predict(input_data)
    ari = adjusted_rand_score(truth, pred)
    print(f"{name} GMM ari = {ari}")




# Execution time analysis


In [None]:
results = pd.read_excel("reports/r_cnn.xlsx", index_col=0)

results["label"] = results["dataset_name"] + " (" +results["input_size"].astype(str) + " features) " 

perf = results.groupby("input_size").min()[['t(feature_sel)', 't(cnn)','t(ga)']]

perf = perf.rename(columns = {
    't(feature_sel)' : '1D Feature ranking', 
    't(cnn)' : '2D Feature ranking with NN',
    't(ga)': 'Optimization algorithm for top 10 subspaces'
})



plt.figure(figsize = (10,3))
ax = plt.gca()
perf.plot(kind='bar', stacked=True, ax = ax)
plt.ylabel("time (min)")
plt.xticks(rotation = 0)
sns.despine()
plt.title("Run times on West, Khan, Gordon and Boroveki datasets")
plt.xlabel("Number of dimensions in the input dataset")
plt.savefig(f"images/run_times.pdf", bbox_inches='tight')