# Hybrid-NEAT w/ UMAP & HDBSCAN Example - Usage and Hyperparameters Study:

### Import required modules:

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import umap
import pickle
import numba
import time
import hdbscan
import sys
from collections import defaultdict
from os import path
import os
import neat
import warnings

%matplotlib inline
sns.set(style='white', rc={'figure.figsize':(12,8)})

### Import data: (cell execution output denoted w/ "# " at start of line):

In [None]:
np.random.seed(42)

#file = open( "<Your Path Here>/all_pixel_overall_set.pkl", "rb" )
file = open( "all_pixel_overall_set.pkl", "rb" )

all_pixel_overall_set = pickle.load( file )
file.close()
    
data = np.asarray( all_pixel_overall_set ).astype(int)/255

print(data)
print(len(all_pixel_overall_set))
print(type(data))
print(data.shape)

n_row , n_col = data.shape
print(n_row , n_col)

b = np.ones((n_row,n_col+1))
b[:,:-1] = data
print(b)
print(b.shape)
data = b

# [[0.38431373 0.38431373 0.38431373]
#  [0.96078431 0.78431373 0.80784314]
#  [1.         0.01960784 0.02352941]
#  ...
#  [1.         1.         1.        ]
#  [0.         0.         0.        ]
#  [0.75294118 0.75294118 0.75294118]]
# 2943
# <class 'numpy.ndarray'>
# (2943, 3)
# 2943 3
# [[0.38431373 0.38431373 0.38431373 1.        ]
#  [0.96078431 0.78431373 0.80784314 1.        ]
#  [1.         0.01960784 0.02352941 1.        ]
#  ...
#  [1.         1.         1.         1.        ]
#  [0.         0.         0.         1.        ]
#  [0.75294118 0.75294118 0.75294118 1.        ]]
# (2943, 4)

### Invoke UMAP:

In [None]:
fit = umap.UMAP()
%time u = fit.fit_transform(data)
print(u.shape)
print(data.shape)

### Using UMAP, create a 2-dimensional representation of the data using ``matplotlib`` to draw a scatter plot. The color of each point of the scatter plot is controlled by the associated 4-dimensional color from the source data.

In [None]:
plt.scatter(u[:,0], u[:,1], c=data)

## UMAP Hyperparameter Selection:

UMAP has several hyperparameters that control the resulting embedding. Below the four major ones are listed:

 - ``n_neighbors``
 - ``min_dist``
 - ``n_components``
 - ``metric``

### Define function for UMAP only operation on data:

In [None]:
def draw_umap(n_neighbors=15, min_dist=0.1, n_components=2, metric='euclidean', title=''):
    fit = umap.UMAP(
        n_neighbors=n_neighbors,
        min_dist=min_dist,
        n_components=n_components,
        metric=metric
    )
    u = fit.fit_transform(data);
    fig = plt.figure()
    if n_components == 1:
        ax = fig.add_subplot(111)
        ax.scatter(u[:,0], range(len(u)), c=data)
    if n_components == 2:
        ax = fig.add_subplot(111)
        ax.scatter(u[:,0], u[:,1], c=data)
    if n_components == 3:
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(u[:,0], u[:,1], u[:,2], c=data)
    plt.title(title, fontsize=18)

### Using UMAP, create a 3-dimensional representation of the data using ``matplotlib`` to draw a scatter plot. The color of each point of the scatter plot is controlled by the associated 4-dimensional color from the source data.

In [None]:
draw_umap( n_components=3 )

### Define function for combined UMAP & HDBSCAN operation on data:

In [None]:
def draw_umap_hdbscan(n_neighbors=15, min_dist=0.1, n_components=2, metric='euclidean', min_samples=10, min_cluster_size=500, cluster_selection_epsilon=0.0, q_plot = 0):
    
    fit = umap.UMAP(
        n_neighbors=n_neighbors,
        min_dist=min_dist,
        n_components=n_components,
        metric=metric
    )
    u_data = fit.fit_transform(data);
    
    
    hdbscan_labels = hdbscan.HDBSCAN( min_samples, min_cluster_size, cluster_selection_epsilon ).fit_predict( u_data )

    num_clusters_found = hdbscan_labels.max() + 1
    
    clusterer = hdbscan.HDBSCAN( min_samples, min_cluster_size, cluster_selection_epsilon ).fit( u_data )    

    clusterer_outlier_scores = clusterer.outlier_scores_
    clusterer_outlier_scores_sum = clusterer.outlier_scores_.sum() / u_data.shape[0]
    clusterer_outlier_scores_sum_SE = np.square( ( 100.0 * clusterer.outlier_scores_ ) - 0.0 ).sum() / u_data.shape[0]

    clusterer_probabilities = clusterer.probabilities_
    clusterer_probabilities_sum = clusterer.probabilities_.sum() / u_data.shape[0]
    clusterer_probabilities_sum_SE = np.square( 100.0 - ( 100.0 * clusterer.probabilities_ ) ).sum() / u_data.shape[0]
    
    clustered = (hdbscan_labels >= 0)
    
    ratio_clustered = np.sum(clustered) / u_data.shape[0]
    
    if q_plot == 1:
        fig = plt.figure()
        if n_components == 1:
            ax = fig.add_subplot(111)
            ax.scatter(u_data[:,0], range(len(u_data)), c=data)
        if n_components == 2:
            ax = fig.add_subplot(111)
            ax.scatter(u_data[:,0], u_data[:,1], c=data)
        if n_components == 3:
            ax = fig.add_subplot(111, projection='3d')
            ax.scatter(u_data[:,0], u_data[:,1], u_data[:,2], c=data)
        #plt.title(title, fontsize=18)
    
    return ( num_clusters_found,
            ratio_clustered,
            clusterer_probabilities_sum,
            clusterer_probabilities_sum_SE,
            clusterer_outlier_scores_sum,
            clusterer_outlier_scores_sum_SE,
            clusterer_probabilities_sum_SE + clusterer_outlier_scores_sum_SE )

### 1) Simple Repeatability Test - Same Inputs Run (100) times: (cell execution output denoted w/ "# " at start of line)

In [None]:
metric = 'euclidean'
n_neighbors = 15
min_dist = 0.1
n_components = 3
min_samples = 5
min_cluster_size = 20
cluster_selection_epsilon = 0.0
q_plot = 0

for Run_Number in range( 0, 100 ):

    (num_clusters_found,
    ratio_clustered,
    clustered_probabilities_mean,
    clustered_probabilities_SE_mean,
    clustered_outlier_scores_mean,
    clustered_outlier_scores_SE_mean,
    clustered_COMB_SE_mean) = draw_umap_hdbscan(n_neighbors, 
                                                min_dist, 
                                                n_components, 
                                                metric, 
                                                min_samples, 
                                                min_cluster_size,
                                                cluster_selection_epsilon,
                                                q_plot)

    fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    if ( num_clusters_found == 7 ):
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    elif ( num_clusters_found == 0 ):
        fitness = -99999.0
    else:
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1) - ( abs( num_clusters_found - 7 ) * 1000.0 )
    
    print ( "IN: ", Run_Number, "       OUT: ",
           "{a:5d},{b:10.5f},{c:10.5f},{d:10.2f},{e:10.5f},{f:10.2f},{g:15.2f},{h:15.2f}".format(
            a = num_clusters_found,
            b = ratio_clustered,
            c = clustered_probabilities_mean,
            d = clustered_probabilities_SE_mean,
            e = clustered_outlier_scores_mean,
            f = clustered_outlier_scores_SE_mean,
            g = clustered_COMB_SE_mean,
            h = fitness ))

# IN:  0        OUT:     23,   0.86069,   0.81982,   1497.20,   0.08978,    175.89,        1673.08,      -15994.03
# IN:  1        OUT:     34,   0.76521,   0.71789,   2460.73,   0.08768,    163.18,        2623.91,      -26996.19
# IN:  2        OUT:      2,   1.00000,   0.99674,      5.64,   0.09784,    193.87,         199.52,       -4950.13
# IN:  3        OUT:     31,   0.80802,   0.76885,   2005.53,   0.07791,    141.46,        2146.99,      -23995.34
# IN:  4        OUT:     24,   0.86374,   0.82087,   1469.97,   0.09031,    169.61,        1639.58,      -16993.90
# IN:  5        OUT:     31,   0.83384,   0.78995,   1778.34,   0.09240,    191.87,        1970.21,      -23994.93
# IN:  6        OUT:     36,   0.78152,   0.73245,   2306.56,   0.09156,    186.54,        2493.10,      -28995.99
# IN:  7        OUT:      2,   1.00000,   0.99885,      1.80,   0.08886,    166.72,         168.53,       -4941.01
# IN:  8        OUT:     35,   0.80530,   0.75441,   2074.43,   0.09962,    210.50,        2284.94,      -27995.63
# IN:  9        OUT:     24,   0.86103,   0.82869,   1467.90,   0.08644,    162.00,        1629.90,      -16993.87
# IN:  10       OUT:      2,   1.00000,   0.99536,     11.19,   0.10340,    216.79,         227.99,       -4956.33
# IN:  11       OUT:      2,   1.00000,   0.99775,      3.95,   0.08840,    168.05,         172.00,       -4942.20
# IN:  12       OUT:     39,   0.70778,   0.66478,   3012.39,   0.07944,    144.40,        3156.79,      -31996.83
# IN:  13       OUT:     35,   0.76792,   0.71629,   2453.18,   0.09088,    189.91,        2643.10,      -27996.22
# IN:  14       OUT:      2,   0.99660,   0.99337,     40.38,   0.09010,    184.15,         224.53,       -4955.66
# IN:  15       OUT:     33,   0.80156,   0.76134,   2073.97,   0.08458,    159.14,        2233.11,      -25995.52
# IN:  16       OUT:      2,   1.00000,   0.99846,      2.20,   0.08402,    157.04,         159.24,       -4937.59
# IN:  17       OUT:      2,   1.00000,   0.99757,      5.55,   0.08771,    171.80,         177.35,       -4943.93
# IN:  18       OUT:      2,   1.00000,   0.99814,      2.80,   0.10479,    249.41,         252.20,       -4960.51
# IN:  19       OUT:      2,   1.00000,   0.99617,      5.54,   0.08607,    162.50,         168.04,       -4940.84
# IN:  20       OUT:     30,   0.81787,   0.77477,   1915.29,   0.08859,    167.57,        2082.86,      -22995.20
# IN:  21       OUT:     29,   0.82807,   0.78659,   1805.61,   0.08887,    156.46,        1962.07,      -21994.91
# IN:  22       OUT:     29,   0.83316,   0.78723,   1780.54,   0.09030,    184.30,        1964.84,      -21994.91
# IN:  23       OUT:     36,   0.80224,   0.75677,   2082.60,   0.08750,    168.45,        2251.05,      -28995.56
# IN:  24       OUT:     28,   0.83146,   0.79294,   1770.10,   0.09003,    160.52,        1930.61,      -20994.82
# IN:  25       OUT:     32,   0.80122,   0.76107,   2079.88,   0.08602,    158.99,        2238.88,      -24995.54
# IN:  26       OUT:     27,   0.79884,   0.75683,   2099.60,   0.08053,    138.28,        2237.87,      -19995.53
# IN:  27       OUT:      2,   1.00000,   0.99538,     11.23,   0.08503,    155.28,         166.51,       -4940.30
# IN:  28       OUT:      2,   1.00000,   0.99829,      2.99,   0.08980,    200.75,         203.74,       -4951.16
# IN:  29       OUT:      2,   1.00000,   0.99741,      3.65,   0.09847,    199.08,         202.73,       -4950.92
# IN:  30       OUT:      2,   1.00000,   0.99566,      8.47,   0.08415,    153.43,         161.90,       -4938.61
# IN:  31       OUT:      2,   1.00000,   0.99812,      3.02,   0.08750,    169.86,         172.87,       -4942.49
# IN:  32       OUT:      3,   1.00000,   0.99394,     11.25,   0.08913,    169.59,         180.84,       -3945.01
# IN:  33       OUT:      2,   1.00000,   0.99747,      3.50,   0.09678,    200.05,         203.56,       -4951.11
# IN:  34       OUT:     34,   0.77166,   0.72278,   2396.09,   0.09312,    189.74,        2585.83,      -26996.13
# IN:  35       OUT:      2,   1.00000,   0.99642,      7.45,   0.08415,    150.42,         157.87,       -4937.06
# IN:  36       OUT:      3,   1.00000,   0.98530,     39.54,   0.09336,    176.56,         216.10,       -3953.94
# IN:  37       OUT:      2,   1.00000,   0.99733,      5.06,   0.09045,    170.74,         175.79,       -4943.44
# IN:  38       OUT:     33,   0.78118,   0.74327,   2275.23,   0.08280,    153.85,        2429.08,      -25995.88
# IN:  39       OUT:     26,   0.85695,   0.82320,   1511.88,   0.09022,    171.62,        1683.50,      -18994.06
# IN:  40       OUT:     25,   0.90010,   0.84271,   1216.46,   0.10979,    302.23,        1518.69,      -17993.42
# IN:  41       OUT:     34,   0.82195,   0.77230,   1897.81,   0.09535,    192.45,        2090.26,      -26995.22
# IN:  42       OUT:      2,   1.00000,   0.99818,      2.14,   0.09696,    201.24,         203.37,       -4951.07
# IN:  43       OUT:     32,   0.80836,   0.76198,   2027.42,   0.08804,    167.76,        2195.18,      -24995.45
# IN:  44       OUT:      2,   1.00000,   0.99766,      4.36,   0.07800,    130.58,         134.93,       -4926.44
# IN:  45       OUT:      2,   1.00000,   0.99759,      4.29,   0.08934,    174.90,         179.19,       -4944.50
# IN:  46       OUT:      2,   1.00000,   0.99578,      9.24,   0.08908,    171.06,         180.30,       -4944.84
# IN:  47       OUT:      2,   1.00000,   0.99636,      5.75,   0.08538,    152.87,         158.62,       -4937.35
# IN:  48       OUT:      4,   0.99966,   0.99342,     20.23,   0.09601,    193.07,         213.30,       -2953.34
# IN:  49       OUT:     32,   0.82467,   0.78686,   1838.67,   0.07776,    145.21,        1983.89,      -24994.96
# IN:  50       OUT:     36,   0.78186,   0.73503,   2293.38,   0.08727,    168.06,        2461.44,      -28995.94
# IN:  51       OUT:      3,   1.00000,   0.99271,     16.18,   0.08918,    171.10,         187.28,       -3946.89
# IN:  52       OUT:     36,   0.75909,   0.71867,   2498.76,   0.08642,    161.78,        2660.55,      -28996.24
# IN:  53       OUT:     33,   0.80904,   0.76370,   2018.02,   0.08706,    176.46,        2194.48,      -25995.45
# IN:  54       OUT:      2,   1.00000,   0.99749,      6.27,   0.09047,    183.84,         190.11,       -4947.67
# IN:  55       OUT:      3,   1.00000,   0.99472,      9.80,   0.08988,    173.72,         183.52,       -3945.80
# IN:  56       OUT:      3,   1.00000,   0.99646,      5.91,   0.10512,    227.50,         233.41,       -3957.34
# IN:  57       OUT:     34,   0.76894,   0.72324,   2422.25,   0.08988,    176.74,        2598.99,      -26996.15
# IN:  58       OUT:      2,   1.00000,   0.99754,      3.41,   0.08419,    153.65,         157.05,       -4936.73
# IN:  59       OUT:     32,   0.80904,   0.76509,   2004.80,   0.09094,    168.02,        2172.83,      -24995.40
# IN:  60       OUT:     37,   0.78016,   0.73572,   2302.06,   0.07752,    138.21,        2440.27,      -29995.90
# IN:  61       OUT:      2,   1.00000,   0.99705,      6.47,   0.08632,    167.02,         173.49,       -4942.69
# IN:  62       OUT:     28,   0.80632,   0.76919,   2026.61,   0.09223,    177.66,        2204.26,      -20995.47
# IN:  63       OUT:     32,   0.79409,   0.75865,   2135.75,   0.08059,    146.63,        2282.37,      -24995.62
# IN:  64       OUT:      2,   1.00000,   0.99582,      7.32,   0.08815,    161.00,         168.32,       -4940.94
# IN:  65       OUT:     29,   0.81380,   0.77379,   1949.01,   0.08218,    144.81,        2093.82,      -21995.23
# IN:  66       OUT:      2,   1.00000,   0.99521,      9.70,   0.08576,    165.68,         175.37,       -4943.30
# IN:  67       OUT:      2,   1.00000,   0.99765,      3.53,   0.08227,    142.91,         146.44,       -4932.17
# IN:  68       OUT:      2,   1.00000,   0.99669,      6.12,   0.08134,    153.09,         159.21,       -4937.58
# IN:  69       OUT:      2,   1.00000,   0.99599,      7.83,   0.08906,    170.05,         177.88,       -4944.10
# IN:  70       OUT:     29,   0.82093,   0.78747,   1866.60,   0.08391,    153.04,        2019.64,      -21995.05
# IN:  71       OUT:      3,   1.00000,   0.99682,      6.20,   0.08963,    175.61,         181.80,       -3945.30
# IN:  72       OUT:     35,   0.77404,   0.72852,   2365.91,   0.08582,    162.73,        2528.64,      -27996.05
# IN:  73       OUT:      2,   1.00000,   0.99581,      7.32,   0.08724,    159.01,         166.33,       -4940.24
# IN:  74       OUT:     31,   0.80870,   0.76674,   2005.92,   0.08798,    167.74,        2173.66,      -23995.40
# IN:  75       OUT:     26,   0.83962,   0.80739,   1683.72,   0.08596,    162.95,        1846.66,      -18994.59
# IN:  76       OUT:     40,   0.77540,   0.72382,   2362.40,   0.09041,    175.39,        2537.80,      -32996.06
# IN:  77       OUT:     29,   0.81583,   0.77999,   1917.22,   0.08566,    160.01,        2077.22,      -21995.19
# IN:  78       OUT:      2,   1.00000,   0.99785,      3.24,   0.08814,    168.22,         171.46,       -4942.02
# IN:  79       OUT:      2,   1.00000,   0.99796,      4.00,   0.08481,    155.32,         159.33,       -4937.63
# IN:  80       OUT:      2,   1.00000,   0.99637,      9.56,   0.08549,    163.52,         173.08,       -4942.55
# IN:  81       OUT:     29,   0.83962,   0.80528,   1678.05,   0.09211,    176.80,        1854.85,      -21994.61
# IN:  82       OUT:      2,   1.00000,   0.99850,      1.85,   0.08750,    166.05,         167.89,       -4940.79
# IN:  83       OUT:      2,   1.00000,   0.99240,     19.67,   0.09308,    187.94,         207.61,       -4952.06
# IN:  84       OUT:     27,   0.84200,   0.78868,   1712.12,   0.09408,    197.58,        1909.70,      -19994.77
# IN:  85       OUT:     38,   0.75943,   0.71863,   2490.06,   0.08933,    167.39,        2657.45,      -30996.24
# IN:  86       OUT:      2,   0.99830,   0.99484,     25.76,   0.09235,    180.25,         206.02,       -4951.69
# IN:  87       OUT:     32,   0.79613,   0.76596,   2100.15,   0.07893,    136.63,        2236.78,      -24995.53
# IN:  88       OUT:     37,   0.79137,   0.74345,   2188.58,   0.09146,    173.02,        2361.61,      -29995.77
# IN:  89       OUT:     27,   0.81855,   0.78191,   1899.29,   0.08300,    150.94,        2050.23,      -19995.12
# IN:  90       OUT:      2,   1.00000,   0.99627,      8.04,   0.09363,    181.62,         189.66,       -4947.55
# IN:  91       OUT:     34,   0.79307,   0.74288,   2184.08,   0.08986,    173.28,        2357.36,      -26995.76
# IN:  92       OUT:      2,   1.00000,   0.99833,      2.30,   0.08752,    159.86,         162.16,       -4938.71
# IN:  93       OUT:     39,   0.76487,   0.70727,   2489.94,   0.09892,    207.20,        2697.14,      -31996.29
# IN:  94       OUT:      2,   1.00000,   0.99744,      4.05,   0.08593,    157.07,         161.12,       -4938.32
# IN:  95       OUT:      2,   1.00000,   0.99800,      3.26,   0.08783,    160.39,         163.65,       -4939.26
# IN:  96       OUT:      2,   1.00000,   0.99713,      5.44,   0.08682,    155.52,         160.96,       -4938.26
# IN:  97       OUT:     38,   0.77098,   0.71662,   2421.55,   0.09736,    197.30,        2618.86,      -30996.18
# IN:  98       OUT:      2,   1.00000,   0.99674,      5.61,   0.09168,    165.81,         171.42,       -4942.00
# IN:  99       OUT:     27,   0.82127,   0.77980,   1882.08,   0.08782,    160.39,        2042.47,      -19995.11


### 2) UMAP ``metric`` Parameter Study: (cell execution output denoted w/ "# " at start of line)


In [None]:
#metric = 'euclidean'
n_neighbors = 15
min_dist = 0.1
n_components = 3
min_samples = 5
min_cluster_size = 20
cluster_selection_epsilon = 0.0
q_plot = 0

for metric in [ 'euclidean', "manhattan", "chebyshev", "minkowski", "canberra", "braycurtis", "mahalanobis", "wminkowski", "seuclidean", "cosine", "correlation", "hamming", "jaccard", "dice", "kulsinski", "ll_dirichlet", "hellinger", "rogerstanimoto", "sokalmichener", "sokalsneath", "yule" ]:

    (num_clusters_found,
    ratio_clustered,
    clustered_probabilities_mean,
    clustered_probabilities_SE_mean,
    clustered_outlier_scores_mean,
    clustered_outlier_scores_SE_mean,
    clustered_COMB_SE_mean) = draw_umap_hdbscan(n_neighbors, 
                                                min_dist, 
                                                n_components, 
                                                metric, 
                                                min_samples, 
                                                min_cluster_size,
                                                cluster_selection_epsilon,
                                                q_plot)

    fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    if ( num_clusters_found == 7 ):
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    elif ( num_clusters_found == 0 ):
        fitness = -99999.0
    else:
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1) - ( abs( num_clusters_found - 7 ) * 1000.0 )
    
    print ( "IN: ", metric, "       OUT: ",
           "{a:5d},{b:10.5f},{c:10.5f},{d:10.2f},{e:10.5f},{f:10.2f},{g:15.2f},{h:15.2f}".format(
            a = num_clusters_found,
            b = ratio_clustered,
            c = clustered_probabilities_mean,
            d = clustered_probabilities_SE_mean,
            e = clustered_outlier_scores_mean,
            f = clustered_outlier_scores_SE_mean,
            g = clustered_COMB_SE_mean,
            h = fitness ))

# IN:  euclidean        OUT:     41,   0.76385,   0.70539,   2506.27,   0.09167,    184.81,        2691.08,      -33996.29
# IN:  manhattan        OUT:     27,   0.84438,   0.80458,   1659.62,   0.09308,    195.14,        1854.76,      -19994.61
# IN:  chebyshev        OUT:     33,   0.76317,   0.69527,   2564.15,   0.10867,    261.19,        2825.35,      -25996.46
# IN:  minkowski        OUT:      2,   1.00000,   0.99806,      3.49,   0.08796,    169.14,         172.62,       -4942.40
# IN:  canberra         OUT:     25,   0.87496,   0.82184,   1398.03,   0.11150,    247.82,        1645.84,      -17993.93
# IN:  braycurtis       OUT:      3,   1.00000,   0.99677,      6.16,   0.10826,    270.18,         276.34,       -3963.94
# IN:  mahalanobis      OUT:      4,   0.99864,   0.98903,     36.62,   0.08519,    151.95,         188.56,       -2947.25
# IN:  wminkowski       OUT:     24,   0.94665,   0.92364,    661.69,       nan,       nan,            nan,            nan
# IN:  seuclidean       OUT:     24,   0.93612,   0.89040,   1008.34,       nan,       nan,            nan,            nan
# IN:  cosine           OUT:     41,   0.73191,   0.67550,   2808.29,   0.08915,    169.89,        2978.18,      -33996.64
# IN:  correlation      OUT:     23,   0.99966,   0.90725,    375.65,   0.15333,    504.77,         880.42,      -15988.65
# IN:  hamming          OUT:      2,   0.90078,   0.86432,   1045.19,   0.08291,    159.94,        1205.14,       -4991.71
# IN:  jaccard          OUT:      2,   1.00000,   0.96875,     65.75,   0.06383,     96.43,         162.18,       -4938.72
# IN:  dice             OUT:      2,   1.00000,   0.97412,     58.14,   0.05379,     82.78,         140.92,       -4929.54
# IN:  kulsinski        OUT:      2,   1.00000,   0.92273,    166.26,   0.09086,    177.63,         343.89,       -4971.01
# IN:  ll_dirichlet     OUT:     29,   0.93680,   0.75138,   1375.61,   0.23742,    891.18,        2266.79,      -21995.59
# IN:  hellinger        OUT:      2,   1.00000,   0.99795,      3.64,   0.08471,    171.11,         174.75,       -4943.10
# IN:  rogerstanimoto   OUT:      2,   1.00000,   0.93088,    162.67,   0.08301,    172.62,         335.29,       -4970.26
# IN:  sokalmichener    OUT:      2,   1.00000,   0.89486,    261.73,   0.11306,    266.03,         527.76,       -4981.09
# IN:  sokalsneath      OUT:      2,   1.00000,   0.97743,     52.80,   0.05896,     92.18,         144.98,       -4931.50
# IN:  yule             OUT:      8,   0.44309,   0.43844,   5571.15,   0.05871,     95.19,        5666.34,        -998.24


### 3) UMAP ``n_neighbors`` Parameter Study: (cell execution output denoted w/ "# " at start of line)


In [None]:
metric = 'euclidean'
#n_neighbors = 15
min_dist = 0.1
n_components = 3
min_samples = 5
min_cluster_size = 20
cluster_selection_epsilon = 0.0
q_plot = 0

for n_neighbors in [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ]:

    (num_clusters_found,
    ratio_clustered,
    clustered_probabilities_mean,
    clustered_probabilities_SE_mean,
    clustered_outlier_scores_mean,
    clustered_outlier_scores_SE_mean,
    clustered_COMB_SE_mean) = draw_umap_hdbscan(n_neighbors, 
                                                min_dist, 
                                                n_components, 
                                                metric, 
                                                min_samples, 
                                                min_cluster_size,
                                                cluster_selection_epsilon,
                                                q_plot)

    fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    if ( num_clusters_found == 7 ):
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    elif ( num_clusters_found == 0 ):
        fitness = -99999.0
    else:
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1) - ( abs( num_clusters_found - 7 ) * 1000.0 )
    
    print ( "IN: {aa:5d}".format(aa = n_neighbors), "       OUT: ",
           "{a:5d},{b:10.5f},{c:10.5f},{d:10.2f},{e:10.5f},{f:10.2f},{g:15.2f},{h:15.2f}".format(
            a = num_clusters_found,
            b = ratio_clustered,
            c = clustered_probabilities_mean,
            d = clustered_probabilities_SE_mean,
            e = clustered_outlier_scores_mean,
            f = clustered_outlier_scores_SE_mean,
            g = clustered_COMB_SE_mean,
            h = fitness ))
    
# IN:     2        OUT:      3,   0.68298,   0.67975,   3171.29,   0.04267,     65.92,        3237.22,       -3996.91
# IN:     3        OUT:     24,   0.93136,   0.87094,    949.95,   0.11706,    370.27,        1320.22,      -16992.43
# IN:     4        OUT:      8,   0.97655,   0.94979,    335.19,   0.10717,    253.17,         588.36,        -983.03
# IN:     5        OUT:     41,   0.73972,   0.68170,   2749.41,   0.09159,    216.34,        2965.75,      -33996.63
# IN:     6        OUT:      2,   1.00000,   0.99795,      6.76,   0.08990,    185.28,         192.04,       -4948.20
# IN:     7        OUT:     38,   0.74244,   0.69165,   2698.45,   0.08502,    172.03,        2870.48,      -30996.52
# IN:     8        OUT:     38,   0.75909,   0.71388,   2508.81,   0.08556,    165.93,        2674.74,      -30996.26
# IN:     9        OUT:     35,   0.72137,   0.68284,   2866.87,   0.08164,    150.22,        3017.09,      -27996.69
# IN:    10        OUT:     36,   0.78050,   0.73536,   2302.14,   0.08831,    178.14,        2480.28,      -28995.97
# IN:    11        OUT:     31,   0.78661,   0.73668,   2266.97,   0.10315,    224.94,        2491.90,      -23995.99
# IN:    12        OUT:     37,   0.76249,   0.71171,   2495.99,   0.09266,    186.62,        2682.61,      -29996.27
# IN:    13        OUT:      2,   1.00000,   0.99601,     10.16,   0.09438,    186.11,         196.27,       -4949.31
# IN:    14        OUT:     36,   0.77064,   0.72459,   2390.23,   0.08193,    144.16,        2534.39,      -28996.06
# IN:    15        OUT:      2,   1.00000,   0.99580,      8.66,   0.09813,    223.79,         232.44,       -4957.16
# IN:    16        OUT:      2,   1.00000,   0.99695,      6.70,   0.08003,    140.48,         147.18,       -4932.52
# IN:    17        OUT:      2,   1.00000,   0.99662,      6.74,   0.08452,    158.78,         165.52,       -4939.95
# IN:    18        OUT:     28,   0.84336,   0.80284,   1656.20,   0.08778,    162.08,        1818.28,      -20994.50
# IN:    19        OUT:     26,   0.84608,   0.81447,   1598.40,   0.09090,    168.68,        1767.08,      -18994.34
# IN:    20        OUT:     31,   0.82297,   0.77515,   1893.68,   0.09042,    183.49,        2077.18,      -23995.19
# IN:    21        OUT:     31,   0.80904,   0.76393,   2017.06,   0.08984,    174.09,        2191.14,      -23995.44
# IN:    22        OUT:     39,   0.76555,   0.71147,   2482.40,   0.09629,    198.38,        2680.78,      -31996.27
# IN:    23        OUT:     32,   0.78491,   0.74089,   2253.96,   0.09087,    175.86,        2429.82,      -24995.89
# IN:    24        OUT:     39,   0.76758,   0.71604,   2444.18,   0.08839,    173.37,        2617.55,      -31996.18
# IN:    25        OUT:     31,   0.81957,   0.77177,   1925.47,   0.09980,    210.03,        2135.50,      -23995.32


### 4)  UMAP ``min_dist`` Parameter Study: (cell execution output denoted w/ "# " at start of line)



In [None]:
metric = 'euclidean'
n_neighbors = 15
#min_dist = 0.1
n_components = 3
min_samples = 5
min_cluster_size = 20
cluster_selection_epsilon = 0.0
q_plot = 0

for min_dist in [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 ]:

    (num_clusters_found,
    ratio_clustered,
    clustered_probabilities_mean,
    clustered_probabilities_SE_mean,
    clustered_outlier_scores_mean,
    clustered_outlier_scores_SE_mean,
    clustered_COMB_SE_mean) = draw_umap_hdbscan(n_neighbors, 
                                                min_dist, 
                                                n_components, 
                                                metric, 
                                                min_samples, 
                                                min_cluster_size,
                                                cluster_selection_epsilon,
                                                q_plot)

    fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    if ( num_clusters_found == 7 ):
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    elif ( num_clusters_found == 0 ):
        fitness = -99999.0
    else:
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1) - ( abs( num_clusters_found - 7 ) * 1000.0 )
    
    print ( "IN: {aa:5.2f}".format(aa = min_dist), "       OUT: ",
           "{a:5d},{b:10.5f},{c:10.5f},{d:10.2f},{e:10.5f},{f:10.2f},{g:15.2f},{h:15.2f}".format(
            a = num_clusters_found,
            b = ratio_clustered,
            c = clustered_probabilities_mean,
            d = clustered_probabilities_SE_mean,
            e = clustered_outlier_scores_mean,
            f = clustered_outlier_scores_SE_mean,
            g = clustered_COMB_SE_mean,
            h = fitness ))

# IN:  0.00        OUT:     31,   0.85015,   0.79388,   1679.85,   0.11314,    279.11,        1958.96,      -23994.90
# IN:  0.10        OUT:      4,   1.00000,   0.99065,     20.70,   0.08741,    164.54,         185.24,       -2946.30
# IN:  0.20        OUT:     31,   0.77506,   0.73861,   2318.79,   0.07548,    123.02,        2441.80,      -23995.91
# IN:  0.30        OUT:     23,   0.81719,   0.79866,   1851.55,   0.06559,     93.86,        1945.41,      -15994.86
# IN:  0.40        OUT:     21,   0.84438,   0.81414,   1613.39,   0.06813,    102.72,        1716.11,      -13994.18
# IN:  0.50        OUT:     27,   0.78118,   0.75267,   2229.21,   0.05962,     74.86,        2304.07,      -19995.66
# IN:  0.60        OUT:     29,   0.73428,   0.70274,   2702.52,   0.05794,     72.69,        2775.21,      -21996.40
# IN:  0.70        OUT:      2,   0.98233,   0.97403,    187.91,   0.05697,     73.70,         261.61,       -4961.92
# IN:  0.80        OUT:      2,   0.95753,   0.95251,    428.99,   0.04625,     45.94,         474.93,       -4978.99
# IN:  0.90        OUT:     32,   0.66361,   0.62941,   3419.34,   0.05796,     80.33,        3499.68,      -24997.14
# IN:  0.99        OUT:     24,   0.65341,   0.63157,   3495.69,   0.04622,     53.54,        3549.22,      -16997.18


### 5) UMAP ``n_components`` Parameter Study: (cell execution output denoted w/ "# " at start of line)



In [None]:
metric = 'euclidean'
n_neighbors = 15
min_dist = 0.1
#n_components = 3
min_samples = 5
min_cluster_size = 20
cluster_selection_epsilon = 0.0
q_plot = 0

for n_components in [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 200, 400, 600, 800, 1000 ]:

    (num_clusters_found,
    ratio_clustered,
    clustered_probabilities_mean,
    clustered_probabilities_SE_mean,
    clustered_outlier_scores_mean,
    clustered_outlier_scores_SE_mean,
    clustered_COMB_SE_mean) = draw_umap_hdbscan(n_neighbors, 
                                                min_dist, 
                                                n_components, 
                                                metric, 
                                                min_samples, 
                                                min_cluster_size,
                                                cluster_selection_epsilon,
                                                q_plot)

    fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    if ( num_clusters_found == 7 ):
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    elif ( num_clusters_found == 0 ):
        fitness = -99999.0
    else:
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1) - ( abs( num_clusters_found - 7 ) * 1000.0 )
    
    print ( "IN: {aa:5d}".format(aa = n_components), "       OUT: ",
           "{a:5d},{b:10.5f},{c:10.5f},{d:10.2f},{e:10.5f},{f:10.2f},{g:15.2f},{h:15.2f}".format(
            a = num_clusters_found,
            b = ratio_clustered,
            c = clustered_probabilities_mean,
            d = clustered_probabilities_SE_mean,
            e = clustered_outlier_scores_mean,
            f = clustered_outlier_scores_SE_mean,
            g = clustered_COMB_SE_mean,
            h = fitness ))

# IN:     1        OUT:     38,   0.86782,   0.80548,   1514.04,   0.10604,    277.08,        1791.12,      -30994.42
# IN:     2        OUT:      2,   1.00000,   0.99826,      2.38,   0.07791,    134.73,         137.12,       -4927.60
# IN:     3        OUT:      5,   0.99932,   0.98857,     24.94,   0.08393,    140.53,         165.47,       -1939.93
# IN:     4        OUT:     33,   0.75637,   0.71892,   2520.70,   0.08377,    155.82,        2676.51,      -25996.27
# IN:     5        OUT:     35,   0.76928,   0.72123,   2405.21,   0.08420,    148.66,        2553.87,      -27996.09
# IN:     6        OUT:     16,   0.91437,   0.89097,    901.07,   0.08585,    148.32,        1049.39,       -8990.48
# IN:     7        OUT:     32,   0.80394,   0.76208,   2053.87,   0.08795,    163.68,        2217.55,      -24995.49
# IN:     8        OUT:     33,   0.78423,   0.74806,   2236.50,   0.08418,    154.50,        2390.99,      -25995.82
# IN:     9        OUT:     27,   0.85559,   0.80993,   1557.41,   0.08856,    181.11,        1738.53,      -19994.25
# IN:    10        OUT:     33,   0.81448,   0.77232,   1947.35,   0.08761,    162.39,        2109.74,      -25995.26
# IN:    12        OUT:     35,   0.79681,   0.74592,   2154.60,   0.08818,    174.43,        2329.02,      -27995.71
# IN:    14        OUT:     31,   0.77880,   0.73363,   2321.10,   0.08651,    167.96,        2489.05,      -23995.98
# IN:    16        OUT:     31,   0.81346,   0.77697,   1953.02,   0.08532,    164.31,        2117.34,      -23995.28
# IN:    18        OUT:     25,   0.81855,   0.78496,   1878.80,   0.08958,    164.30,        2043.10,      -17995.11
# IN:    20        OUT:     38,   0.74788,   0.70437,   2610.82,   0.08190,    143.63,        2754.44,      -30996.37
# IN:    25        OUT:     26,   0.83724,   0.80324,   1700.46,   0.08517,    158.88,        1859.34,      -18994.62
# IN:    30        OUT:      2,   1.00000,   0.98879,     41.62,   0.09132,    197.80,         239.42,       -4958.41
# IN:    35        OUT:     36,   0.79001,   0.73336,   2240.91,   0.08938,    177.31,        2418.22,      -28995.87
# IN:    40        OUT:      4,   0.99422,   0.98356,     86.97,   0.08759,    164.13,         251.10,       -2960.33
# IN:    45        OUT:     28,   0.83078,   0.78960,   1774.77,   0.08661,    151.74,        1926.51,      -20994.81
# IN:    50        OUT:     30,   0.79341,   0.74507,   2176.72,   0.09511,    188.71,        2365.43,      -22995.77
# IN:    60        OUT:     30,   0.82093,   0.76782,   1917.21,   0.09581,    191.37,        2108.57,      -22995.26
# IN:    70        OUT:      2,   1.00000,   0.99902,      1.19,   0.09459,    196.02,         197.21,       -4949.55
# IN:    80        OUT:      2,   0.99524,   0.99386,     49.94,   0.08850,    181.62,         231.56,       -4957.00
# IN:    90        OUT:     35,   0.79443,   0.74730,   2173.94,   0.09290,    214.25,        2388.19,      -27995.81
# IN:   100        OUT:     34,   0.79205,   0.75053,   2181.83,   0.09199,    209.00,        2390.83,      -26995.82
# IN:   200        OUT:      2,   0.99524,   0.99419,     49.28,   0.09945,    223.63,         272.91,       -4963.49
# IN:   400        OUT:      3,   0.99524,   0.98445,     88.27,   0.09223,    187.68,         275.95,       -3963.89
# IN:   600        OUT:      3,   1.00000,   0.99157,     41.98,   0.09225,    196.88,         238.86,       -3958.31
# IN:   800        OUT:      3,   0.99524,   0.99179,     53.78,   0.09446,    178.01,         231.79,       -3957.04
# IN:  1000        OUT:      3,   0.99524,   0.99063,     57.55,   0.09131,    177.80,         235.35,       -3957.69


## HDBSCAN Hyperparameter Selection

HDBSCAN has several hyperparameters that control the resulting clustering. Below the three major ones are listed:

 - ``min_samples``
 - ``min_cluster_size``
 - ``cluster_selection_epsilon``

### 6) HDBSCAN ``min_samples`` Parameter Study: (cell execution output denoted w/ "# " at start of line)

In [None]:
metric = 'euclidean'
n_neighbors = 15
min_dist = 0.1
n_components = 3
#min_samples = 5
min_cluster_size = 20
cluster_selection_epsilon = 0.0
q_plot = 0

for min_samples in [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100 ]:

    (num_clusters_found,
    ratio_clustered,
    clustered_probabilities_mean,
    clustered_probabilities_SE_mean,
    clustered_outlier_scores_mean,
    clustered_outlier_scores_SE_mean,
    clustered_COMB_SE_mean) = draw_umap_hdbscan(n_neighbors, 
                                                min_dist, 
                                                n_components, 
                                                metric, 
                                                min_samples, 
                                                min_cluster_size,
                                                cluster_selection_epsilon,
                                                q_plot)

    fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    if ( num_clusters_found == 7 ):
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    elif ( num_clusters_found == 0 ):
        fitness = -99999.0
    else:
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1) - ( abs( num_clusters_found - 7 ) * 1000.0 )
    
    print ( "IN: {aa:5d}".format(aa = min_samples), "       OUT: ",
           "{a:5d},{b:10.5f},{c:10.5f},{d:10.2f},{e:10.5f},{f:10.2f},{g:15.2f},{h:15.2f}".format(
            a = num_clusters_found,
            b = ratio_clustered,
            c = clustered_probabilities_mean,
            d = clustered_probabilities_SE_mean,
            e = clustered_outlier_scores_mean,
            f = clustered_outlier_scores_SE_mean,
            g = clustered_COMB_SE_mean,
            h = fitness ))

# IN:     2        OUT:      2,   1.00000,   0.99646,      7.86,   0.10358,    211.81,         219.67,       -4954.68
# IN:     3        OUT:     38,   0.76181,   0.71878,   2472.75,   0.09178,    169.18,        2641.92,      -30996.22
# IN:     4        OUT:      2,   1.00000,   0.99678,      6.19,   0.08666,    159.03,         165.22,       -4939.84
# IN:     5        OUT:     32,   0.75841,   0.71149,   2524.19,   0.09755,    193.65,        2717.84,      -24996.32
# IN:     6        OUT:      2,   1.00000,   0.99650,      6.22,   0.08794,    159.79,         166.01,       -4940.12
# IN:     7        OUT:     25,   0.84676,   0.80851,   1615.68,   0.08122,    151.27,        1766.95,      -17994.34
# IN:     8        OUT:      2,   1.00000,   0.99871,      1.76,   0.08326,    151.08,         152.83,       -4934.99
# IN:     9        OUT:      2,   1.00000,   0.99851,      2.29,   0.08001,    129.72,         132.01,       -4924.82
# IN:    10        OUT:     33,   0.77234,   0.72696,   2369.71,   0.08292,    148.48,        2518.18,      -25996.03
# IN:    12        OUT:      2,   1.00000,   0.99643,      6.32,   0.07819,    146.36,         152.67,       -4934.93
# IN:    14        OUT:      2,   1.00000,   0.99759,      4.11,   0.10572,    254.50,         258.61,       -4961.48
# IN:    16        OUT:     22,   0.84404,   0.81508,   1622.99,   0.08072,    165.60,        1788.59,      -14994.41
# IN:    18        OUT:      2,   1.00000,   0.99674,      6.79,   0.08694,    179.54,         186.33,       -4946.62
# IN:    20        OUT:      2,   1.00000,   0.99719,      5.36,   0.09317,    214.48,         219.83,       -4954.72
# IN:    25        OUT:      2,   1.00000,   0.99621,      6.90,   0.07124,    130.60,         137.51,       -4927.80
# IN:    30        OUT:      2,   1.00000,   0.99771,      2.49,   0.11451,    303.93,         306.42,       -4967.47
# IN:    35        OUT:     14,   0.89059,   0.82817,   1253.37,   0.08846,    209.14,        1462.50,       -6993.17
# IN:    40        OUT:      3,   1.00000,   0.99428,     11.86,   0.08330,    159.22,         171.08,       -3941.89
# IN:    45        OUT:      2,   1.00000,   0.99524,      8.94,   0.10240,    273.37,         282.30,       -4964.70
# IN:    50        OUT:     13,   0.90112,   0.80963,   1273.99,   0.11705,    329.98,        1603.97,       -5993.77
# IN:    60        OUT:      2,   1.00000,   0.99019,     22.33,   0.08880,    225.01,         247.34,       -4959.73
# IN:    70        OUT:     12,   0.86306,   0.82269,   1456.62,   0.05960,    113.00,        1569.63,       -4993.63
# IN:    80        OUT:      2,   1.00000,   0.99516,      9.16,   0.10916,    313.10,         322.26,       -4969.07
# IN:    90        OUT:     12,   0.88481,   0.83380,   1282.69,   0.06406,    146.34,        1429.03,       -4993.01
# IN:   100        OUT:      2,   1.00000,   0.99583,      9.62,   0.10695,    284.47,         294.09,       -4966.11
        

### 7) HDBSCAN ``min_cluster_size`` Parameter Study: (cell execution output denoted w/ "# " at start of line)


In [None]:
metric = 'euclidean'
n_neighbors = 15
min_dist = 0.1
n_components = 3
min_samples = 5
#min_cluster_size = 20
cluster_selection_epsilon = 0.0
q_plot = 0

for min_cluster_size in [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100 ]:

    (num_clusters_found,
    ratio_clustered,
    clustered_probabilities_mean,
    clustered_probabilities_SE_mean,
    clustered_outlier_scores_mean,
    clustered_outlier_scores_SE_mean,
    clustered_COMB_SE_mean) = draw_umap_hdbscan(n_neighbors, 
                                                min_dist, 
                                                n_components, 
                                                metric, 
                                                min_samples, 
                                                min_cluster_size,
                                                cluster_selection_epsilon,
                                                q_plot)

    fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    if ( num_clusters_found == 7 ):
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    elif ( num_clusters_found == 0 ):
        fitness = -99999.0
    else:
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1) - ( abs( num_clusters_found - 7 ) * 1000.0 )
    
    print ( "IN: {aa:5d}".format(aa = min_cluster_size), "       OUT: ",
           "{a:5d},{b:10.5f},{c:10.5f},{d:10.2f},{e:10.5f},{f:10.2f},{g:15.2f},{h:15.2f}".format(
            a = num_clusters_found,
            b = ratio_clustered,
            c = clustered_probabilities_mean,
            d = clustered_probabilities_SE_mean,
            e = clustered_outlier_scores_mean,
            f = clustered_outlier_scores_SE_mean,
            g = clustered_COMB_SE_mean,
            h = fitness ))

# IN:     1        OUT:    284,   0.92253,   0.82269,   1220.24,   0.11342,    478.63,        1698.86,     -276994.12
# IN:     2        OUT:    225,   0.91845,   0.82678,   1222.10,   0.11382,    464.49,        1686.59,     -217994.07
# IN:     3        OUT:    203,   0.87903,   0.77022,   1707.91,   0.13056,    546.55,        2254.45,     -195995.57
# IN:     4        OUT:    190,   0.82501,   0.70604,   2294.26,   0.13798,    578.65,        2872.90,     -182996.52
# IN:     5        OUT:    162,   0.81583,   0.69854,   2326.61,   0.13584,    516.10,        2842.71,     -154996.48
# IN:     6        OUT:    133,   0.80836,   0.69739,   2370.40,   0.13545,    499.29,        2869.69,     -125996.52
# IN:     7        OUT:    116,   0.81006,   0.70844,   2274.49,   0.12572,    415.59,        2690.08,     -108996.28
# IN:     8        OUT:    102,   0.80598,   0.71908,   2258.54,   0.11343,    359.83,        2618.37,      -94996.18
# IN:     9        OUT:     92,   0.75637,   0.66971,   2752.99,   0.11729,    379.18,        3132.16,      -84996.81
# IN:    10        OUT:     92,   0.73089,   0.63748,   3024.13,   0.11890,    384.30,        3408.43,      -84997.07
# IN:    12        OUT:      3,   1.00000,   0.99785,      5.17,   0.10712,    287.48,         292.65,       -3965.95
# IN:    14        OUT:     60,   0.71594,   0.65722,   3033.83,   0.09572,    260.14,        3293.97,      -52996.97
# IN:    16        OUT:      3,   1.00000,   0.99655,      6.33,   0.09756,    209.10,         215.43,       -3953.80
# IN:    18        OUT:      2,   1.00000,   0.99816,      2.63,   0.08475,    162.47,         165.11,       -4939.80
# IN:    20        OUT:     22,   0.87360,   0.84957,   1311.92,   0.07622,    127.30,        1439.22,      -14993.06
# IN:    25        OUT:      2,   1.00000,   0.99507,     14.94,   0.09951,    194.56,         209.50,       -4952.49
# IN:    30        OUT:      5,   0.96092,   0.94858,    414.57,   0.08421,    152.46,         567.03,       -1982.40
# IN:    35        OUT:     16,   0.79613,   0.72731,   2211.30,   0.10857,    230.72,        2442.02,       -8995.91
# IN:    40        OUT:      2,   1.00000,   0.99339,     10.33,   0.12231,    275.26,         285.59,       -4965.11
# IN:    45        OUT:      2,   1.00000,   0.99072,     19.79,   0.10719,    223.30,         243.09,       -4959.03
# IN:    50        OUT:      2,   1.00000,   0.98997,     24.32,   0.12004,    267.84,         292.16,       -4965.89
# IN:    60        OUT:     12,   0.72851,   0.64811,   2941.08,   0.11898,    272.02,        3213.10,       -4996.89
# IN:    70        OUT:      2,   1.00000,   0.98201,     64.82,   0.13701,    366.22,         431.04,       -4976.85
# IN:    80        OUT:      2,   1.00000,   0.98673,     28.62,   0.12568,    292.51,         321.13,       -4968.96
# IN:    90        OUT:     11,   0.68603,   0.58804,   3450.37,   0.13517,    362.45,        3812.82,       -3997.38
# IN:   100        OUT:     10,   0.65749,   0.56095,   3729.03,   0.14011,    374.69,        4103.72,       -2997.56
        

### 8) HDBSCAN ``cluster_selection_epsilon`` Parameter Study: (cell execution output denoted w/ "# " at start of line)

In [None]:
metric = 'euclidean'
n_neighbors = 15
min_dist = 0.1
n_components = 3
min_samples = 5
min_cluster_size = 20
#cluster_selection_epsilon = 0.0
q_plot = 0

for cluster_selection_epsilon in [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 3.0, 4.0 ]:

    (num_clusters_found,
    ratio_clustered,
    clustered_probabilities_mean,
    clustered_probabilities_SE_mean,
    clustered_outlier_scores_mean,
    clustered_outlier_scores_SE_mean,
    clustered_COMB_SE_mean) = draw_umap_hdbscan(n_neighbors, 
                                                min_dist, 
                                                n_components, 
                                                metric, 
                                                min_samples, 
                                                min_cluster_size,
                                                cluster_selection_epsilon,
                                                q_plot)

    fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    if ( num_clusters_found == 7 ):
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1)
    elif ( num_clusters_found == 0 ):
        fitness = -99999.0
    else:
        fitness = 10000.0 / abs( clustered_COMB_SE_mean + 1) - ( abs( num_clusters_found - 7 ) * 1000.0 )
    
    print ( "IN: {aa:5.2f}".format(aa = cluster_selection_epsilon), "       OUT: ",
           "{a:5d},{b:10.5f},{c:10.5f},{d:10.2f},{e:10.5f},{f:10.2f},{g:15.2f},{h:15.2f}".format(
            a = num_clusters_found,
            b = ratio_clustered,
            c = clustered_probabilities_mean,
            d = clustered_probabilities_SE_mean,
            e = clustered_outlier_scores_mean,
            f = clustered_outlier_scores_SE_mean,
            g = clustered_COMB_SE_mean,
            h = fitness ))

# IN:  0.00        OUT:      2,   1.00000,   0.99648,      6.43,   0.07781,    132.44,         138.87,       -4928.51
# IN:  0.10        OUT:      2,   1.00000,   0.99406,     13.98,   0.09376,    185.49,         199.47,       -4950.12
# IN:  0.20        OUT:      2,   1.00000,   0.99836,      2.42,   0.09241,    181.93,         184.35,       -4946.05
# IN:  0.30        OUT:     30,   0.82603,   0.77838,   1845.28,   0.09291,    177.02,        2022.30,      -22995.06
# IN:  0.40        OUT:     16,   0.91777,   0.89606,    870.55,   0.08301,    144.69,        1015.24,       -8990.16
# IN:  0.50        OUT:      2,   1.00000,   0.99709,      6.20,   0.08720,    168.61,         174.81,       -4943.12
# IN:  0.60        OUT:     24,   0.83350,   0.80123,   1729.26,   0.09202,    185.33,        1914.59,      -16994.78
# IN:  0.70        OUT:      2,   1.00000,   0.99912,      0.97,   0.08788,    163.32,         164.29,       -4939.50
# IN:  0.80        OUT:      2,   1.00000,   0.99852,      2.08,   0.08671,    152.77,         154.85,       -4935.83
# IN:  0.90        OUT:      4,   0.97248,   0.96980,    280.24,   0.08984,    187.42,         467.67,       -2978.66
# IN:  1.00        OUT:      2,   1.00000,   0.99837,      2.77,   0.09134,    181.63,         184.41,       -4946.06
# IN:  1.20        OUT:      2,   1.00000,   0.99514,     10.44,   0.09098,    181.05,         191.49,       -4948.05
# IN:  1.40        OUT:      2,   1.00000,   0.99931,      0.85,   0.09268,    176.01,         176.86,       -4943.77
# IN:  1.60        OUT:      2,   0.99490,   0.99439,     51.65,   0.08989,    164.02,         215.67,       -4953.85
# IN:  1.80        OUT:      2,   0.98369,   0.98363,    163.12,   0.08231,    156.84,         319.96,       -4968.84
# IN:  2.00        OUT:      2,   0.98063,   0.97851,    201.76,   0.09626,    193.44,         395.20,       -4974.76
# IN:  3.00        OUT:      2,   1.00000,   0.99614,      8.12,   0.08323,    151.53,         159.66,       -4937.76
# IN:  4.00        OUT:      2,   1.00000,   0.99817,      2.68,   0.08121,    145.75,         148.43,       -4933.08


### Define function for HDBSCAN calculation.

In [None]:
def fff( cluster_selection_epsilon, min_samples, min_cluster_size, u_data ):

    global num_clusters_found
    global ratio_clustered
    global clusterer_probabilities_sum
    global clusterer_probabilities_sum_SE
    global clusterer_outlier_scores_sum
    global clusterer_outlier_scores_sum_SE
    global fit_HDBSCAN
    
    cluster_selection_epsilon = float( cluster_selection_epsilon )
    min_samples = int( min_samples )
    min_cluster_size = int( min_cluster_size )

    hdbscan_labels = hdbscan.HDBSCAN( min_samples, min_cluster_size, cluster_selection_epsilon ).fit_predict( u_data )

    num_clusters_found = hdbscan_labels.max() + 1

    clusterer = hdbscan.HDBSCAN( min_samples, min_cluster_size, cluster_selection_epsilon ).fit( u_data ) 
    
    fit_HDBSCAN = clusterer

    clusterer_outlier_scores = clusterer.outlier_scores_
    clusterer_outlier_scores_sum = clusterer.outlier_scores_.sum() / u_data.shape[0]
    clusterer_outlier_scores_sum_SE = np.square( ( 100.0 * clusterer.outlier_scores_ ) - 0.0 ).sum() / u_data.shape[0]

    clusterer_probabilities = clusterer.probabilities_
    clusterer_probabilities_sum = clusterer.probabilities_.sum() / u_data.shape[0]
    clusterer_probabilities_sum_SE = np.square( 100.0 - ( 100.0 * clusterer.probabilities_ ) ).sum() / u_data.shape[0]

    clustered = (hdbscan_labels >= 0)

    ratio_clustered = np.sum(clustered) / u_data.shape[0]
        
    cse_score_tmp_minimize = ( clusterer_outlier_scores_sum_SE + clusterer_probabilities_sum_SE ) + ( 1000.0 * abs(num_clusters_found - 7))
    print()
    print("cse_tmp = ", cluster_selection_epsilon, "cse_score_tmp_minimize = ", cse_score_tmp_minimize)

    print ( "{a:5d},{b:10.5f},{c:10.5f},{d:10.2f},{e:10.5f},{f:10.2f},{g:10.2f}".format(
            a = num_clusters_found,
            b = ratio_clustered,
            c = clusterer_probabilities_sum,
            d = clusterer_probabilities_sum_SE,
            e = clusterer_outlier_scores_sum,
            f = clusterer_outlier_scores_sum_SE,
            g = clusterer_probabilities_sum_SE + clusterer_outlier_scores_sum_SE ))

    return cse_score_tmp_minimize


### Define function for UMAP & HDBSCAN calculation from Hybrid-NEAT.

In [None]:
global filename_UMAP

s_unix_timestamp = str( int(time.time()))
                       
filename_UMAP = 'UMAP_dict_' + s_unix_timestamp + '.pkl'
if path.isfile( filename_UMAP ):
    # load the UMAP dict
    with open( filename_UMAP , 'rb') as f:
        UMAP_dict = pickle.load(f)
    print("*** Opened existing UMAP_dict file: ", filename_UMAP )
    f.close()
else:
    UMAP_dict = defaultdict()

filename_HDBSCAN = 'HDBSCAN_dict_' + s_unix_timestamp + '.pkl'
if path.isfile( filename_HDBSCAN ):
    # load the HDBSCAN dict
    with open( filename_HDBSCAN , 'rb') as fa:
        HDBSCAN_dict = pickle.load(fa)
    print("*** Opened existing HDBSCAN_dict file: ", filename_HDBSCAN )
    fa.close()
else:
    HDBSCAN_dict = defaultdict()

filename_NEAT = 'NEAT_dict_' + s_unix_timestamp + '.pkl'
if path.isfile( filename_NEAT ):
    # load the NEAT dict
    with open( filename_NEAT , 'rb') as fa:
        NEAT_dict = pickle.load(fa)
    print("*** Opened existing NEAT_dict file: ", filename_NEAT )
    fa.close()
else:
    NEAT_dict = defaultdict()

def draw_umap_hdbscan_fff(n_neighbors=15, min_dist=0.1, n_components=2, metric='euclidean', min_samples=10, min_cluster_size=500, cluster_selection_epsilon = 0.0, q_plot = 1):

    start_time_umap = time.time()
    
    i_UMAP_flag = 0
    if ( n_neighbors, min_dist, n_components, metric ) not in UMAP_dict:
    
        fit_UMAP = umap.UMAP(
            n_neighbors=n_neighbors,
            min_dist=min_dist,
            n_components=n_components,
            metric=metric )
        u_data = fit_UMAP.fit_transform(data);
        
        UMAP_dict[ ( n_neighbors, min_dist, n_components, metric ) ] = ( u_data, fit_UMAP )
        # write the latest UMAP dict
        with open( filename_UMAP , 'wb') as f1:
            pickle.dump( UMAP_dict , f1 )
        print( "*** Flushed contents to UMAP_dict file: ", filename_UMAP )
        
    else:
        i_UMAP_flag = 1
        u_data , fit_UMAP = UMAP_dict[ ( n_neighbors, min_dist, n_components, metric ) ]

    end_time_umap = time.time()
    print()
    print("UMAP: --- %s seconds ---" % (end_time_umap - start_time_umap))
    print()
    
    start_time_hdbscan = time.time()

    minimum_min = 987654321.0
    minimum_min_score = 987654321.0
    
    i_HDBSCAN_flag = 0
    if ( n_neighbors, min_dist, n_components, metric, min_dist, min_samples, min_cluster_size ) not in HDBSCAN_dict:
        
        cse_div10 = min_dist / 10.0
        cse_HIGH = ( min( 100.0, cse_div10*100.0 ) - min_dist ) / 10.0
        xl = min_dist
        xh = min_dist + cse_HIGH*10.0
        print( "xl = ", xl, "  xh = ", xh )

        xl_score = fff( xl, min_samples, min_cluster_size, u_data )
        
        cluster_selection_epsilon_xl       = xl
        num_clusters_found_xl              = num_clusters_found
        ratio_clustered_xl                 = ratio_clustered
        clusterer_probabilities_sum_xl     = clusterer_probabilities_sum
        clusterer_probabilities_sum_SE_xl  = clusterer_probabilities_sum_SE
        clusterer_outlier_scores_sum_xl    = clusterer_outlier_scores_sum
        clusterer_outlier_scores_sum_SE_xl = clusterer_outlier_scores_sum_SE
        fit_HDBSCAN_xl                     = fit_HDBSCAN
        if num_clusters_found <= 0:
            end_time_hdbscan = time.time()
            print()
            print("HDBSCAN: --- %s seconds ---" % (end_time_hdbscan - start_time_hdbscan))
            print()
        
            HDBSCAN_dict[ ( n_neighbors, min_dist, n_components, metric, min_dist, min_samples, min_cluster_size ) ] = ( 
                cluster_selection_epsilon_xl,
                num_clusters_found_xl,
                ratio_clustered_xl,
                clusterer_probabilities_sum_xl,
                clusterer_probabilities_sum_SE_xl,
                clusterer_outlier_scores_sum_xl,
                clusterer_outlier_scores_sum_SE_xl,
                fit_HDBSCAN_xl )

            # write the latest HDBSCAN dict
            with open( filename_HDBSCAN , 'wb') as f1a:
                pickle.dump( HDBSCAN_dict , f1a )
            print( "*** Flushed contents to HDBSCAN_dict file: ", filename_HDBSCAN )
        
            return ( num_clusters_found_xl,
                    ratio_clustered_xl,
                    clusterer_probabilities_sum_xl,
                    clusterer_probabilities_sum_SE_xl,
                    clusterer_outlier_scores_sum_xl,
                    clusterer_outlier_scores_sum_SE_xl,
                    clusterer_probabilities_sum_SE_xl + clusterer_outlier_scores_sum_SE_xl,
                    cluster_selection_epsilon_xl,
                    fit_UMAP,
                    fit_HDBSCAN_xl )

        xh_score = fff( xh, min_samples, min_cluster_size, u_data )

        cluster_selection_epsilon_xh       = xh
        num_clusters_found_xh              = num_clusters_found
        ratio_clustered_xh                 = ratio_clustered
        clusterer_probabilities_sum_xh     = clusterer_probabilities_sum
        clusterer_probabilities_sum_SE_xh  = clusterer_probabilities_sum_SE
        clusterer_outlier_scores_sum_xh    = clusterer_outlier_scores_sum
        clusterer_outlier_scores_sum_SE_xh = clusterer_outlier_scores_sum_SE
        fit_HDBSCAN_xh                     = fit_HDBSCAN
        if ( num_clusters_found <= 0 ) or ( abs( xl_score - xh_score ) < 0.1 ):
            end_time_hdbscan = time.time()
            print()
            print("HDBSCAN: --- %s seconds ---" % (end_time_hdbscan - start_time_hdbscan))
            print()
        
            HDBSCAN_dict[ ( n_neighbors, min_dist, n_components, metric, min_dist, min_samples, min_cluster_size ) ] = ( 
                cluster_selection_epsilon_xh, 
                num_clusters_found_xh,
                ratio_clustered_xh,
                clusterer_probabilities_sum_xh,
                clusterer_probabilities_sum_SE_xh,
                clusterer_outlier_scores_sum_xh,
                clusterer_outlier_scores_sum_SE_xh,
                fit_HDBSCAN_xh )

            # write the latest HDBSCAN dict
            with open( filename_HDBSCAN , 'wb') as f1a:
                pickle.dump( HDBSCAN_dict , f1a )
            print( "*** Flushed contents to HDBSCAN_dict file: ", filename_HDBSCAN )
        
            return ( num_clusters_found_xh,
                    ratio_clustered_xh,
                    clusterer_probabilities_sum_xh,
                    clusterer_probabilities_sum_SE_xh,
                    clusterer_outlier_scores_sum_xh,
                    clusterer_outlier_scores_sum_SE_xh,
                    clusterer_probabilities_sum_SE_xh + clusterer_outlier_scores_sum_SE_xh,
                    cluster_selection_epsilon_xh,
                    fit_UMAP,
                    fit_HDBSCAN_xh )

        # set low & high by their corresponding relative score
        if xl_score < xh_score:
            low = xl
            low_score = xl_score
            high = xh
            high_score = xh_score
            
            cluster_selection_epsilon_low       = cluster_selection_epsilon_xl
            num_clusters_found_low              = num_clusters_found_xl
            ratio_clustered_low                 = ratio_clustered_xl
            clusterer_probabilities_sum_low     = clusterer_probabilities_sum_xl
            clusterer_probabilities_sum_SE_low  = clusterer_probabilities_sum_SE_xl
            clusterer_outlier_scores_sum_low    = clusterer_outlier_scores_sum_xl
            clusterer_outlier_scores_sum_SE_low = clusterer_outlier_scores_sum_SE_xl
            fit_HDBSCAN_low                     = fit_HDBSCAN_xl
            
            cluster_selection_epsilon_high       = cluster_selection_epsilon_xh
            num_clusters_found_high              = num_clusters_found_xh
            ratio_clustered_high                 = ratio_clustered_xh
            clusterer_probabilities_sum_high     = clusterer_probabilities_sum_xh
            clusterer_probabilities_sum_SE_high  = clusterer_probabilities_sum_SE_xh
            clusterer_outlier_scores_sum_high    = clusterer_outlier_scores_sum_xh
            clusterer_outlier_scores_sum_SE_high = clusterer_outlier_scores_sum_SE_xh
            fit_HDBSCAN_high                     = fit_HDBSCAN_xh
            
        else:
            high = xl
            high_score = xl_score
            low = xh
            low_score = xh_score
            
            cluster_selection_epsilon_low       = cluster_selection_epsilon_xh
            num_clusters_found_low              = num_clusters_found_xh
            ratio_clustered_low                 = ratio_clustered_xh
            clusterer_probabilities_sum_low     = clusterer_probabilities_sum_xh
            clusterer_probabilities_sum_SE_low  = clusterer_probabilities_sum_SE_xh
            clusterer_outlier_scores_sum_low    = clusterer_outlier_scores_sum_xh
            clusterer_outlier_scores_sum_SE_low = clusterer_outlier_scores_sum_SE_xh
            fit_HDBSCAN_low                     = fit_HDBSCAN_xh
            
            cluster_selection_epsilon_high       = cluster_selection_epsilon_xl
            num_clusters_found_high              = num_clusters_found_xl
            ratio_clustered_high                 = ratio_clustered_xl
            clusterer_probabilities_sum_high     = clusterer_probabilities_sum_xl
            clusterer_probabilities_sum_SE_high  = clusterer_probabilities_sum_SE_xl
            clusterer_outlier_scores_sum_high    = clusterer_outlier_scores_sum_xl
            clusterer_outlier_scores_sum_SE_high = clusterer_outlier_scores_sum_SE_xl
            fit_HDBSCAN_high                     = fit_HDBSCAN_xl

        # store original low and associated score
        low_orig = low
        low_score_orig = low_score
        print()
        print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
        print("low_orig = ", low_orig, "  low_score_orig = ", low_score_orig)

        # store original high and associated score
        high_orig = high
        high_score_orig = high_score
        print("high_orig = ", high_orig, "  high_score_orig = ", high_score_orig)
        print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
        print()

        i_accum_clock = 0
        i_accum_clock_limit = 8

        if ( low_score < minimum_min_score ):
            minimum_min = low_orig
            minimum_min_score = low_score_orig
            print()
            print()
            print("==================================================================")
            print("---Best solution so far: minimum_min_score = ", minimum_min_score, minimum_min )

            num_clusters_found_MIN              = num_clusters_found_low
            ratio_clustered_MIN                 = ratio_clustered_low
            clusterer_probabilities_sum_MIN     = clusterer_probabilities_sum_low
            clusterer_probabilities_sum_SE_MIN  = clusterer_probabilities_sum_SE_low
            clusterer_outlier_scores_sum_MIN    = clusterer_outlier_scores_sum_low
            clusterer_outlier_scores_sum_SE_MIN = clusterer_outlier_scores_sum_SE_low
            cluster_selection_epsilon_MIN       = minimum_min
            fit_UMAP_MIN                        = fit_UMAP
            fit_HDBSCAN_MIN                     = fit_HDBSCAN_low

            print()
            print("OUT: num_clusters_found_MIN              = ", num_clusters_found_MIN )
            print("OUT: ratio_clustered_MIN                 = ", ratio_clustered_MIN )
            print("OUT: clusterer_probabilities_sum_MIN     = ", clusterer_probabilities_sum_MIN )
            print("OUT: clusterer_probabilities_sum_SE_MIN  = ", clusterer_probabilities_sum_SE_MIN )
            print("OUT: clusterer_outlier_scores_sum_MIN    = ", clusterer_outlier_scores_sum_MIN )
            print("OUT: clusterer_outlier_scores_sum_SE_MIN = ", clusterer_outlier_scores_sum_SE_MIN )
            print("OUT: clustered_COMB_sum_SE_MIN           = ", clusterer_probabilities_sum_SE_MIN + clusterer_outlier_scores_sum_SE_MIN )
            print("OUT: cluster_selection_epsilon_MIN       = ", cluster_selection_epsilon_MIN )
            print("==================================================================")
            print()
        
        # start bisection to find minimum over interval between low_orig & high_orig
        for i in range(20):
            
            midpoint = (low + high) / 2.0
            
            midpoint_score = fff( midpoint , min_samples, min_cluster_size, u_data )
            
            cluster_selection_epsilon_mid       = midpoint
            num_clusters_found_mid              = num_clusters_found
            ratio_clustered_mid                 = ratio_clustered
            clusterer_probabilities_sum_mid     = clusterer_probabilities_sum
            clusterer_probabilities_sum_SE_mid  = clusterer_probabilities_sum_SE
            clusterer_outlier_scores_sum_mid    = clusterer_outlier_scores_sum
            clusterer_outlier_scores_sum_SE_mid = clusterer_outlier_scores_sum_SE
            fit_HDBSCAN_mid                     = fit_HDBSCAN

            # Check for better solution w/ low_score
            if ( low_score < minimum_min_score ) and ( low_score < midpoint_score ):
                minimum_min = low
                minimum_min_score = low_score
                print()
                print()
                print("==================================================================")
                print("---Best solution so far: minimum_min_score = ", minimum_min_score, minimum_min )

                num_clusters_found_MIN              = num_clusters_found_low
                ratio_clustered_MIN                 = ratio_clustered_low
                clusterer_probabilities_sum_MIN     = clusterer_probabilities_sum_low
                clusterer_probabilities_sum_SE_MIN  = clusterer_probabilities_sum_SE_low
                clusterer_outlier_scores_sum_MIN    = clusterer_outlier_scores_sum_low
                clusterer_outlier_scores_sum_SE_MIN = clusterer_outlier_scores_sum_SE_low
                cluster_selection_epsilon_MIN       = minimum_min
                fit_UMAP_MIN                        = fit_UMAP
                fit_HDBSCAN_MIN                     = fit_HDBSCAN_low
                
                print()
                print("OUT: num_clusters_found_MIN              = ", num_clusters_found_MIN )
                print("OUT: ratio_clustered_MIN                 = ", ratio_clustered_MIN )
                print("OUT: clusterer_probabilities_sum_MIN     = ", clusterer_probabilities_sum_MIN )
                print("OUT: clusterer_probabilities_sum_SE_MIN  = ", clusterer_probabilities_sum_SE_MIN )
                print("OUT: clusterer_outlier_scores_sum_MIN    = ", clusterer_outlier_scores_sum_MIN )
                print("OUT: clusterer_outlier_scores_sum_SE_MIN = ", clusterer_outlier_scores_sum_SE_MIN )
                print("OUT: clustered_COMB_sum_SE_MIN           = ", clusterer_probabilities_sum_SE_MIN + clusterer_outlier_scores_sum_SE_MIN )
                print("OUT: cluster_selection_epsilon_MIN       = ", cluster_selection_epsilon_MIN )
                print("==================================================================")
                print()

                i_accum_clock = 0

            # Check for better solution w/ midpoint_score
            elif ( midpoint_score < minimum_min_score ) and ( midpoint_score < low_score ):
                minimum_min = midpoint
                minimum_min_score = midpoint_score
                print()
                print()
                print("==================================================================")
                print("---Best solution so far: minimum_min_score = ", minimum_min_score, minimum_min )

                num_clusters_found_MIN              = num_clusters_found_mid
                ratio_clustered_MIN                 = ratio_clustered_mid
                clusterer_probabilities_sum_MIN     = clusterer_probabilities_sum_mid
                clusterer_probabilities_sum_SE_MIN  = clusterer_probabilities_sum_SE_mid
                clusterer_outlier_scores_sum_MIN    = clusterer_outlier_scores_sum_mid
                clusterer_outlier_scores_sum_SE_MIN = clusterer_outlier_scores_sum_SE_mid
                cluster_selection_epsilon_MIN       = minimum_min
                fit_UMAP_MIN                        = fit_UMAP
                fit_HDBSCAN_MIN                     = fit_HDBSCAN_mid
                
                print()
                print("OUT: num_clusters_found_MIN              = ", num_clusters_found_MIN )
                print("OUT: ratio_clustered_MIN                 = ", ratio_clustered_MIN )
                print("OUT: clusterer_probabilities_sum_MIN     = ", clusterer_probabilities_sum_MIN )
                print("OUT: clusterer_probabilities_sum_SE_MIN  = ", clusterer_probabilities_sum_SE_MIN )
                print("OUT: clusterer_outlier_scores_sum_MIN    = ", clusterer_outlier_scores_sum_MIN )
                print("OUT: clusterer_outlier_scores_sum_SE_MIN = ", clusterer_outlier_scores_sum_SE_MIN )
                print("OUT: clustered_COMB_sum_SE_MIN           = ", clusterer_probabilities_sum_SE_MIN + clusterer_outlier_scores_sum_SE_MIN )
                print("OUT: cluster_selection_epsilon_MIN       = ", cluster_selection_epsilon_MIN )
                print("==================================================================")
                print()

                i_accum_clock = 0

            # Check for best solution time-out
            else:
                i_accum_clock += 1
                if i_accum_clock >= i_accum_clock_limit:
                    print()
                    print("*** Exceeded Bisection Update Limit ***")
                    print()
                    break

            print()
            print("-------------------------------------------------------------------------")
            print("i = ", i, "  low = ", low, "  low_score = ", low_score)
            print("i = ", i, "  midpoint = ", midpoint, "  midpoint_score = ", midpoint_score)
            print("i = ", i, "  high = ", high, "  high_score = ", high_score)
            print("-------------------------------------------------------------------------")
            print()

            # Check for case "A": midpoint_score > low_score & midpoint_score > high_score
            if ( midpoint_score > low_score ) and ( midpoint_score > high_score ):
                #print("Found case 'A': midpoint_score > low_score & midpoint_score > high_score")

                # set new high equal to midpoint
                high = midpoint
                high_score = midpoint_score

                num_clusters_found_high              = num_clusters_found_mid
                ratio_clustered_high                 = ratio_clustered_mid
                clusterer_probabilities_sum_high     = clusterer_probabilities_sum_mid
                clusterer_probabilities_sum_SE_high  = clusterer_probabilities_sum_SE_mid
                clusterer_outlier_scores_sum_high    = clusterer_outlier_scores_sum_mid
                clusterer_outlier_scores_sum_SE_high = clusterer_outlier_scores_sum_SE_mid
                cluster_selection_epsilon_high       = cluster_selection_epsilon_mid
                fit_UMAP_high                        = fit_UMAP
                fit_HDBSCAN_high                     = fit_HDBSCAN_mid

            # Check for case "B": midpoint_score > low_score & midpoint_score < high_score
            elif ( midpoint_score > low_score ) and ( midpoint_score < high_score ):
                #print("Found case 'B': midpoint_score > low_score & midpoint_score < high_score")

                # set new high equal to midpoint
                high = midpoint
                high_score = midpoint_score

                num_clusters_found_high              = num_clusters_found_mid
                ratio_clustered_high                 = ratio_clustered_mid
                clusterer_probabilities_sum_high     = clusterer_probabilities_sum_mid
                clusterer_probabilities_sum_SE_high  = clusterer_probabilities_sum_SE_mid
                clusterer_outlier_scores_sum_high    = clusterer_outlier_scores_sum_mid
                clusterer_outlier_scores_sum_SE_high = clusterer_outlier_scores_sum_SE_mid
                cluster_selection_epsilon_high       = cluster_selection_epsilon_mid
                fit_UMAP_high                        = fit_UMAP
                fit_HDBSCAN_high                     = fit_HDBSCAN_mid

            # Check for case "C": midpoint_score < low_score & midpoint_score < high_score
            elif ( midpoint_score < low_score ) and ( midpoint_score < high_score ):
                #print("Found case 'C': midpoint_score < low_score & midpoint_score < high_score")

                if ( high_score > low_score ):
                    #print("C_0a: Found high_score > low_score")

                    high = low
                    high_score = low_score

                    num_clusters_found_high              = num_clusters_found_low
                    ratio_clustered_high                 = ratio_clustered_low
                    clusterer_probabilities_sum_high     = clusterer_probabilities_sum_low
                    clusterer_probabilities_sum_SE_high  = clusterer_probabilities_sum_SE_low
                    clusterer_outlier_scores_sum_high    = clusterer_outlier_scores_sum_low
                    clusterer_outlier_scores_sum_SE_high = clusterer_outlier_scores_sum_SE_low
                    cluster_selection_epsilon_high       = cluster_selection_epsilon_low
                    fit_UMAP_high                        = fit_UMAP
                    fit_HDBSCAN_high                     = fit_HDBSCAN_low

                if ( midpoint_score < low_score ):
                    #print("C_0b: Found midpoint_score < low_score")

                    low = midpoint
                    low_score = midpoint_score

                    num_clusters_found_low              = num_clusters_found_mid
                    ratio_clustered_low                 = ratio_clustered_mid
                    clusterer_probabilities_sum_low     = clusterer_probabilities_sum_mid
                    clusterer_probabilities_sum_SE_low  = clusterer_probabilities_sum_SE_mid
                    clusterer_outlier_scores_sum_low    = clusterer_outlier_scores_sum_mid
                    clusterer_outlier_scores_sum_SE_low = clusterer_outlier_scores_sum_SE_mid
                    cluster_selection_epsilon_low       = cluster_selection_epsilon_mid
                    fit_UMAP_low                        = fit_UMAP
                    fit_HDBSCAN_low                     = fit_HDBSCAN_mid

            # Check for case "D": midpoint_score = low_score & midpoint_score < high_score
            elif ( abs( midpoint_score - low_score ) < 0.01 ) and ( midpoint_score < high_score ):
                #print("Found case 'D': midpoint_score = low_score & midpoint_score < high_score")

                # set new low equal to midpoint
                low = midpoint
                low_score = midpoint_score

                num_clusters_found_low              = num_clusters_found_mid
                ratio_clustered_low                 = ratio_clustered_mid
                clusterer_probabilities_sum_low     = clusterer_probabilities_sum_mid
                clusterer_probabilities_sum_SE_low  = clusterer_probabilities_sum_SE_mid
                clusterer_outlier_scores_sum_low    = clusterer_outlier_scores_sum_mid
                clusterer_outlier_scores_sum_SE_low = clusterer_outlier_scores_sum_SE_mid
                cluster_selection_epsilon_low       = cluster_selection_epsilon_mid
                fit_UMAP_low                        = fit_UMAP
                fit_HDBSCAN_low                     = fit_HDBSCAN_mid

            # Check for case "E": midpoint_score > low_score & midpoint_score = high_score
            elif ( midpoint_score > low_score ) and ( abs( midpoint_score - high_score ) < 0.01 ):
                #print("Found case 'E': midpoint_score > low_score & midpoint_score = high_score")

                # set new high equal to midpoint
                high = midpoint
                high_score = midpoint_score

                num_clusters_found_high              = num_clusters_found_mid
                ratio_clustered_high                 = ratio_clustered_mid
                clusterer_probabilities_sum_high     = clusterer_probabilities_sum_mid
                clusterer_probabilities_sum_SE_high  = clusterer_probabilities_sum_SE_mid
                clusterer_outlier_scores_sum_high    = clusterer_outlier_scores_sum_mid
                clusterer_outlier_scores_sum_SE_high = clusterer_outlier_scores_sum_SE_mid
                cluster_selection_epsilon_high       = cluster_selection_epsilon_mid
                fit_UMAP_high                        = fit_UMAP
                fit_HDBSCAN_high                     = fit_HDBSCAN_mid

            # Check for case "F": midpoint_score = low_score & midpoint_score = high_score
            elif ( abs( midpoint_score - low_score ) < 0.01 ) and ( abs( midpoint_score - high_score ) < 0.01 ):
                #print("Found case 'F': midpoint_score = low_score & midpoint_score = high_score")
                break
        
    else:
        i_HDBSCAN_flag = 1

        ( cluster_selection_epsilon_MIN,
        num_clusters_found_MIN,
        ratio_clustered_MIN,
        clusterer_probabilities_sum_MIN,
        clusterer_probabilities_sum_SE_MIN,
        clusterer_outlier_scores_sum_MIN,
        clusterer_outlier_scores_sum_SE_MIN,
        fit_HDBSCAN_MIN ) = HDBSCAN_dict[ ( n_neighbors, min_dist, n_components, metric, min_dist, min_samples, min_cluster_size ) ]
        
        fit_UMAP_MIN = fit_UMAP
        
    cluster_selection_epsilon_MIN = minimum_min

    print("minimum = ", minimum_min )
    
    end_time_hdbscan = time.time()
    print()
    print("HDBSCAN: --- %s seconds ---" % (end_time_hdbscan - start_time_hdbscan))
    print()
            
    if i_HDBSCAN_flag != 1:
        HDBSCAN_dict[ ( n_neighbors, min_dist, n_components, metric, min_dist, min_samples, min_cluster_size ) ] = ( 
            cluster_selection_epsilon_MIN,
            num_clusters_found_MIN,
            ratio_clustered_MIN,
            clusterer_probabilities_sum_MIN,
            clusterer_probabilities_sum_SE_MIN,
            clusterer_outlier_scores_sum_MIN,
            clusterer_outlier_scores_sum_SE_MIN,
            fit_HDBSCAN_MIN )

        # write the latest HDBSCAN dict
        with open( filename_HDBSCAN , 'wb') as f1a:
            pickle.dump( HDBSCAN_dict , f1a )
        print( "*** Flushed contents to HDBSCAN_dict file: ", filename_HDBSCAN )
        print()
        
    return ( num_clusters_found_MIN,
            ratio_clustered_MIN,
            clusterer_probabilities_sum_MIN,
            clusterer_probabilities_sum_SE_MIN,
            clusterer_outlier_scores_sum_MIN,
            clusterer_outlier_scores_sum_SE_MIN,
            clusterer_probabilities_sum_SE_MIN + clusterer_outlier_scores_sum_SE_MIN,
            cluster_selection_epsilon_MIN,
            fit_UMAP_MIN,
            fit_HDBSCAN_MIN )


## Hybrid-NEAT: (cell execution output denoted w/ "# " at start of line)

In [None]:
warnings.filterwarnings('ignore')

global neat_inputs
global best_fitness_so_far
global n_neighbors_max, n_neighbors_min
global min_dist_max, min_dist_min
global n_components_max, n_components_min
global min_samples_max, min_samples_min
global min_cluster_size_max, min_cluster_size_min
global metric_list

np.random.seed(333)

q_plot = 0

#n_neighbors_list = [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ]
#n_neighbors_max = max(n_neighbors_list)
#n_neighbors_min = min(n_neighbors_list)
n_neighbors_max = 2
n_neighbors_min = 100

#min_dist_list = [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 ]
#min_dist_max = max(min_dist_list)
#min_dist_min = min(min_dist_list)
min_dist_max = 0.99
min_dist_min = 0.0

#n_components_list = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100 ]
#n_components_max = max(n_components_list)
#n_components_min = min(n_components_list)
n_components_max = 1000
n_components_min = 1

#min_samples_list = [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100 ]
#min_samples_max = max(min_samples_list)
#min_samples_min = min(min_samples_list)
min_samples_max = 1000
min_samples_min = 2

#min_cluster_size_list = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100 ]
#min_cluster_size_max = max(min_cluster_size_list)
#min_cluster_size_min = min(min_cluster_size_list)
min_cluster_size_max = 2
min_cluster_size_min = 2

#metric_list = [ 'euclidean', "manhattan", "chebyshev", "minkowski", "canberra", "braycurtis", "mahalanobis", "wminkowski", "seuclidean", "cosine", "correlation", "hamming", "jaccard", "dice", "kulsinski", "ll_dirichlet", "hellinger", "rogerstanimoto", "sokalmichener", "sokalsneath", "yule" ]
#metric_list = [ "jaccard", "dice", "kulsinski", "rogerstanimoto", "sokalmichener" ]

neat_inputs = [(1.0)]

best_fitness_so_far = -99999.0

def eval_genome( genome_id , genome , config , winner_flag ):
    global neat_inputs
    global best_fitness_so_far
    global n_neighbors_max, n_neighbors_min
    global min_dist_max, min_dist_min
    global n_components_max, n_components_min
    global min_samples_max, min_samples_min
    global min_cluster_size_max, min_cluster_size_min
    global metric_list

    net = neat.nn.FeedForwardNetwork.create(genome, config)
    output = net.activate( neat_inputs )
    
    # determine "metric" selection
    #metric_select_list = [0,0,0,0,0,0,0,0,0,0]
    #for i in range( 0, len(metric_select_list)):
    #    if output[i] > 0.5:
    #        metric_select_list[i] = 1
    #    else:
    #        metric_select_list[i] = 0
    #metric = metric_list[ int( sum( metric_select_list ) ) - 1 ]
    metric = "jaccard"
    
    # determine "n_neighbors"
    n_neighbors = int(round( ( output[ 0 ] * ( n_neighbors_max - n_neighbors_min ) + n_neighbors_min ),0))
    
    # determine "min_dist"
    min_dist = output[ 1 ] * ( min_dist_max - min_dist_min ) + min_dist_min
    
    # determine "n_components"
    n_components = int(round( ( output[ 2 ] * ( n_components_max - n_components_min ) + n_components_min ),0))
    
    # determine "min_samples"
    min_samples = int(round( ( output[ 3 ] * ( min_samples_max - min_samples_min ) + min_samples_min ),0))
    
    # determine "min_cluster_size"
    #min_cluster_size = int(round( ( output[ 14 ] * ( min_cluster_size_max - min_cluster_size_min ) + min_cluster_size_min ),0))
    min_cluster_size = 2

    try:
    #if 1 == 1:
        

        #################################
        #################################
        # --> OVER RIDE DEFAULT HERE <--
        #n_components = 1
        #n_components = 2
        #n_components = 3
        #n_components = 10
        #n_components = 100
        # --> OVER RIDE DEFAULT HERE <--
        #################################
        #################################
        
        
        print()
        print("===========================================")
        print("IN: n_neighbors = ", n_neighbors )
        print("IN: min_dist = ", min_dist )
        print("IN: n_components = ", n_components )
        print("IN: metric = ", metric )
        print("IN: min_samples = ", min_samples )
        print("IN: min_cluster_size = ", min_cluster_size )
        print("===========================================")
        print()
        
        (num_clusters_found,
        ratio_clustered,
        clustered_probabilities_sum,
        clustered_probabilities_sum_SE,
        clustered_outlier_scores_sum,
        clustered_outlier_scores_sum_SE,
        clustered_COMB_sum_SE,
        cluster_selection_epsilon,
        fit_UMAP_RTN,
        fit_HDBSCAN_RTN ) = draw_umap_hdbscan_fff(n_neighbors, min_dist, n_components, metric, min_samples, min_cluster_size, q_plot)

        print ( "OUT: ",
           "{a:5d},{b:10.5f},{c:10.5f},{d:10.2f},{e:10.5f},{f:10.2f},{g:15.2f}".format(
            a = num_clusters_found,
            b = ratio_clustered,
            c = clustered_probabilities_sum,
            d = clustered_probabilities_sum_SE,
            e = clustered_outlier_scores_sum,
            f = clustered_outlier_scores_sum_SE,
            g = clustered_COMB_sum_SE ))
        
        genome.fitness = 10000.0 / abs( clustered_COMB_sum_SE + 1)
        if ( num_clusters_found == 7 ):
            genome.fitness = 10000.0 / abs( clustered_COMB_sum_SE + 1)
        elif ( num_clusters_found == 0 ):
            genome.fitness = -99999.0
        else:
            genome.fitness = 10000.0 / abs( clustered_COMB_sum_SE + 1) - ( abs( num_clusters_found - 7 ) * 1000.0 )

        print("genome.fitness = ", genome.fitness )
            
        # check for best fitness so far and write files if appropriate
        if ( genome.fitness > best_fitness_so_far ):

            best_fitness_so_far = genome.fitness
            print()
            print()
            print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
            print( "New best_fitness_so_far = ", best_fitness_so_far , genome_id )
            print()
            print( "New best: metric                    = ", metric )
            print( "New best: n_neighbors               = ",  n_neighbors )
            print( "New best: min_dist                  = ",  min_dist )
            print( "New best: n_components              = ",  n_components )
            print( "New best: min_samples               = ", min_samples )
            print( "New best: min_cluster_size          = ", min_cluster_size )
            print( "New best: cluster_selection_epsilon = ", cluster_selection_epsilon )
            print()

            print("OUT: num_clusters_found              = ", num_clusters_found )
            print("OUT: ratio_clustered                 = ", ratio_clustered )
            print("OUT: clusterer_probabilities_sum     = ", clusterer_probabilities_sum )
            print("OUT: clusterer_probabilities_sum_SE  = ", clusterer_probabilities_sum_SE )
            print("OUT: clusterer_outlier_scores_sum    = ", clusterer_outlier_scores_sum )
            print("OUT: clusterer_outlier_scores_sum_SE = ", clusterer_outlier_scores_sum_SE )
            print("OUT: clustered_COMB_sum_SE           = ", clusterer_probabilities_sum_SE + clusterer_outlier_scores_sum_SE )
            print()


            # write out the current best genome
            with open('best_genome_' + str(round(best_fitness_so_far,0)), 'wb') as f1:
                pickle.dump( genome , f1 )
            f1.close()

            # write out the current best genome
            with open('best_genome_current' , 'wb') as f2:
                pickle.dump( genome , f2 )
            f2.close()

            input_tpl = ( metric,  
                          n_neighbors, 
                          min_dist, 
                          n_components, 
                          min_samples, 
                          min_cluster_size, 
                          cluster_selection_epsilon )

            output_tpl = ( num_clusters_found,
                           ratio_clustered,
                           clustered_probabilities_sum,
                           clustered_probabilities_sum_SE,
                           clustered_outlier_scores_sum,
                           clustered_outlier_scores_sum_SE,
                           clustered_COMB_sum_SE,
                           cluster_selection_epsilon,
                           fit_UMAP_RTN, 
                           fit_HDBSCAN_RTN )

            NEAT_dict[ str( best_fitness_so_far ) ] = ( input_tpl , output_tpl )

            # write the latest NEAT dict
            with open( filename_NEAT , 'wb') as f3:
                pickle.dump( NEAT_dict , f3 )
            print( "*** Flushed contents to NEAT_dict file: ", filename_NEAT )
            print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
            print()
            print()
            f3.close()


        else:
            print()
            print("  --->>> OLD best_fitness_so_far = ",  best_fitness_so_far, "<<<---" )
            print()
                
    except:
        print("Exception:")
        genome.fitness = -99999.9

def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        winner_flag = 0
        eval_genome( genome_id , genome , config , winner_flag )


def run(config_file):
    global neat_inputs

    # Load configuration.
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)

    # Create the population, which is the top-level object for a NEAT run.
    p = neat.Population(config)

    # Add a stdout reporter to show progress in the terminal.
    p.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    p.add_reporter(stats)
    p.add_reporter(neat.Checkpointer(5))

    # Run for up to 100 generations.
    winner = p.run(eval_genomes, 100)

    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(winner))

    # load the seed genome
    with open('best_genome_current', 'rb') as f:
        c_loaded_genome = pickle.load(f)
    f.close()

    print('Loaded genome:')
    print(c_loaded_genome)
    
    eval_genome( 1 , c_loaded_genome , config , 1 )
    print("c_loaded_genome.fitness = ", c_loaded_genome.fitness)
    
#local_dir = os.path.abspath('')
local_dir = path.abspath('')
config_path = path.join(local_dir, 'config_test')
run(config_path)


###############################################################################################
###############################################################################################
# n_components              =  1
###############################################################################################
###############################################################################################

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  9.724927926824497 70
# New best: metric                    =  jaccard
# New best: n_neighbors               =  94
# New best: min_dist                  =  0.006959148742128127
# New best: n_components              =  1
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.0382753180817047
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9986408426775399
# OUT: clusterer_probabilities_sum     =  0.998479477403332
# OUT: clusterer_probabilities_sum_SE  =  13.85185011673148
# OUT: clusterer_outlier_scores_sum    =  0.18020072309768087
# OUT: clusterer_outlier_scores_sum_SE =  1011.4472895662788
# OUT: clustered_COMB_sum_SE           =  1025.2991396830103
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590335180.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  10.989274448928889 79
# New best: metric                    =  jaccard
# New best: n_neighbors               =  99
# New best: min_dist                  =  0.0033716231190036076
# New best: n_components              =  1
# New best: min_samples               =  82
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.02044046515895938
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9466530750934421
# OUT: clusterer_probabilities_sum     =  0.9232328486460644
# OUT: clusterer_probabilities_sum_SE  =  623.2161048299381
# OUT: clusterer_outlier_scores_sum    =  0.08204544696970589
# OUT: clusterer_outlier_scores_sum_SE =  285.7620785773101
# OUT: clustered_COMB_sum_SE           =  908.9781834072483
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590335180.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  19.81631092796434 136
# New best: metric                    =  jaccard
# New best: n_neighbors               =  67
# New best: min_dist                  =  0.00290672171279422
# New best: n_components              =  1
# New best: min_samples               =  28
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.022527093274155204
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9826707441386341
# OUT: clusterer_probabilities_sum     =  0.9703969671713213
# OUT: clusterer_probabilities_sum_SE  =  212.8673202643525
# OUT: clusterer_outlier_scores_sum    =  0.09204311017667792
# OUT: clusterer_outlier_scores_sum_SE =  290.7674745948774
# OUT: clustered_COMB_sum_SE           =  503.6347948592299
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590335180.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  21.26725476883598 230
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.002929070004939838
# New best: n_components              =  1
# New best: min_samples               =  27
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.029290700049398377
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9833503227998641
# OUT: clusterer_probabilities_sum     =  0.9770774271934677
# OUT: clusterer_probabilities_sum_SE  =  218.64348789777074
# OUT: clusterer_outlier_scores_sum    =  0.09440671790199935
# OUT: clusterer_outlier_scores_sum_SE =  301.49263080863614
# OUT: clustered_COMB_sum_SE           =  520.1361187064069
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590335180.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  22.710117987797858 298
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.007416544133236732
# New best: n_components              =  1
# New best: min_samples               =  15
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.04079099273280203
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9918450560652395
# OUT: clusterer_probabilities_sum     =  0.9864897759543961
# OUT: clusterer_probabilities_sum_SE  =  106.53649091085505
# OUT: clusterer_outlier_scores_sum    =  0.09384150425292966
# OUT: clusterer_outlier_scores_sum_SE =  330.6606568379162
# OUT: clustered_COMB_sum_SE           =  437.1971477487712
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590335180.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  26.64879253146397 333
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.00293584926790005
# New best: n_components              =  1
# New best: min_samples               =  25
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.026055662252612948
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9860686374447842
# OUT: clusterer_probabilities_sum     =  0.9843815198238788
# OUT: clusterer_probabilities_sum_SE  =  118.8315291780387
# OUT: clusterer_outlier_scores_sum    =  0.06884192025375332
# OUT: clusterer_outlier_scores_sum_SE =  201.19009883602945
# OUT: clustered_COMB_sum_SE           =  320.02162801406814
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590335180.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  34.69399169309408 552
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.0029055391133648903
# New best: n_components              =  1
# New best: min_samples               =  24
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.029055391133648904
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9976214746856948
# OUT: clusterer_probabilities_sum     =  0.9953936640984699
# OUT: clusterer_probabilities_sum_SE  =  26.081173629541937
# OUT: clusterer_outlier_scores_sum    =  0.07805018686635841
# OUT: clusterer_outlier_scores_sum_SE =  261.39352461989614
# OUT: clustered_COMB_sum_SE           =  287.47469824943806
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590335180.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  40.40663617415181 1207
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.004727453816151321
# New best: n_components              =  1
# New best: min_samples               =  21
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.03530817068938018
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9969418960244648
# OUT: clusterer_probabilities_sum     =  0.99234361711406
# OUT: clusterer_probabilities_sum_SE  =  40.24036334429631
# OUT: clusterer_outlier_scores_sum    =  0.07182828739274436
# OUT: clusterer_outlier_scores_sum_SE =  209.64112228227452
# OUT: clustered_COMB_sum_SE           =  249.88148562657085
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590335180.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# Best genome:
# Key: 1207
# Fitness: 40.40663617415181
# Nodes:
# 	0 DefaultNodeGene(key=0, bias=0.06973346987756822, response=1.0, activation=sigmoid, aggregation=sum)
# 	1 DefaultNodeGene(key=1, bias=-1.0733293250455875, response=1.0, activation=sigmoid, aggregation=sum)
# 	2 DefaultNodeGene(key=2, bias=0.9534932612828289, response=1.0, activation=sigmoid, aggregation=sum)
# 	3 DefaultNodeGene(key=3, bias=-0.7934406415849313, response=1.0, activation=sigmoid, aggregation=sum)
# 	9 DefaultNodeGene(key=9, bias=-1.0239343422802425, response=1.0, activation=sigmoid, aggregation=sum)
# 	66 DefaultNodeGene(key=66, bias=-2.596775177797545, response=1.0, activation=sigmoid, aggregation=sum)
# Connections:
# 	DefaultConnectionGene(key=(-1, 66), weight=1.245141710051548, enabled=True)
# 	DefaultConnectionGene(key=(9, 1), weight=0.918336962764926, enabled=True)
# 	DefaultConnectionGene(key=(9, 3), weight=0.12425217362823236, enabled=True)
# 	DefaultConnectionGene(key=(66, 0), weight=1.8733609822347028, enabled=False)
# 	DefaultConnectionGene(key=(66, 9), weight=-1.0892795350319977, enabled=True)


###############################################################################################
###############################################################################################
# n_components              =  2
###############################################################################################
###############################################################################################

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  22.715844775657306 64
# New best: metric                    =  jaccard
# New best: n_neighbors               =  32
# New best: min_dist                  =  0.09689743826574919
# New best: n_components              =  2
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.18504193411003175
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9904858987427795
# OUT: clusterer_probabilities_sum     =  0.9895535743475289
# OUT: clusterer_probabilities_sum_SE  =  97.05919260747149
# OUT: clusterer_outlier_scores_sum    =  0.09560245525215605
# OUT: clusterer_outlier_scores_sum_SE =  332.5270792109775
# OUT: clustered_COMB_sum_SE           =  429.58627181844895
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590336040.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  29.57831388878229 82
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.043208220111894825
# New best: n_components              =  2
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.1951121189427751
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9928644240570846
# OUT: clusterer_probabilities_sum     =  0.9915167163550114
# OUT: clusterer_probabilities_sum_SE  =  80.3529319008717
# OUT: clusterer_outlier_scores_sum    =  0.07988962809275599
# OUT: clusterer_outlier_scores_sum_SE =  263.5398934346695
# OUT: clustered_COMB_sum_SE           =  343.8928253355412
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590336040.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  30.28348751458714 211
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.04785423506748781
# New best: n_components              =  2
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.16898526758206628
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9921848453958546
# OUT: clusterer_probabilities_sum     =  0.9924700019853071
# OUT: clusterer_probabilities_sum_SE  =  70.14715889360406
# OUT: clusterer_outlier_scores_sum    =  0.07799147429250188
# OUT: clusterer_outlier_scores_sum_SE =  248.8626231975411
# OUT: clustered_COMB_sum_SE           =  319.00978209114515
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590336040.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  30.805985392151467 477
# New best: metric                    =  jaccard
# New best: n_neighbors               =  69
# New best: min_dist                  =  0.030519316409752313
# New best: n_components              =  2
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.23652470217558041
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9976214746856948
# OUT: clusterer_probabilities_sum     =  0.9967384728087028
# OUT: clusterer_probabilities_sum_SE  =  26.984460839744298
# OUT: clusterer_outlier_scores_sum    =  0.08699682603701617
# OUT: clusterer_outlier_scores_sum_SE =  296.62778164180054
# OUT: clustered_COMB_sum_SE           =  323.61224248154485
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590336040.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  32.42479665344004 521
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.14712960253799498
# New best: n_components              =  2
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.24023505414406993
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9972816853550799
# OUT: clusterer_probabilities_sum     =  0.9969101491757029
# OUT: clusterer_probabilities_sum_SE  =  28.41481746769128
# OUT: clusterer_outlier_scores_sum    =  0.07892196255144118
# OUT: clusterer_outlier_scores_sum_SE =  278.8996400433249
# OUT: clustered_COMB_sum_SE           =  307.31445751101614
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590336040.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# Best genome:
# Key: 521
# Fitness: 32.42479665344004
# Nodes:
# 	0 DefaultNodeGene(key=0, bias=-0.024470720300238252, response=1.0, activation=sigmoid, aggregation=sum)
# 	1 DefaultNodeGene(key=1, bias=-1.8464893852474893, response=1.0, activation=sigmoid, aggregation=sum)
# 	2 DefaultNodeGene(key=2, bias=-1.2897681010432893, response=1.0, activation=sigmoid, aggregation=sum)
# 	3 DefaultNodeGene(key=3, bias=2.3151029942881975, response=1.0, activation=sigmoid, aggregation=sum)
# 	15 DefaultNodeGene(key=15, bias=-0.40634025248434374, response=1.0, activation=sigmoid, aggregation=sum)
# 	121 DefaultNodeGene(key=121, bias=-0.20941379482807876, response=1.0, activation=sigmoid, aggregation=sum)
# Connections:
# 	DefaultConnectionGene(key=(-1, 15), weight=0.30821764746284175, enabled=True)
# 	DefaultConnectionGene(key=(15, 1), weight=1.0521605835488685, enabled=False)
# 	DefaultConnectionGene(key=(15, 2), weight=0.7708819963230386, enabled=True)
# 	DefaultConnectionGene(key=(15, 121), weight=1.3198269768408015, enabled=True)
# 	DefaultConnectionGene(key=(121, 1), weight=1.8455040513977314, enabled=True)


###############################################################################################
###############################################################################################
# n_components              =  3
###############################################################################################
###############################################################################################

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  44.81515585316241 49
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.03076266133976533
# New best: n_components              =  3
# New best: min_samples               =  3
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.2730186193904174
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9918450560652395
# OUT: clusterer_probabilities_sum     =  0.9899032718119612
# OUT: clusterer_probabilities_sum_SE  =  84.41006270172551
# OUT: clusterer_outlier_scores_sum    =  0.056757620615627724
# OUT: clusterer_outlier_scores_sum_SE =  137.72873509170768
# OUT: clustered_COMB_sum_SE           =  222.13879779343318
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590336534.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  61.56178912242786 64
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.04161927222388945
# New best: n_components              =  3
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.3927818816129567
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9972816853550799
# OUT: clusterer_probabilities_sum     =  0.9970477604960263
# OUT: clusterer_probabilities_sum_SE  =  27.50193701788292
# OUT: clusterer_outlier_scores_sum    =  0.051386383430435764
# OUT: clusterer_outlier_scores_sum_SE =  133.9364869225424
# OUT: clustered_COMB_sum_SE           =  161.4384239404253
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590336534.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  63.29257919549846 652
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.08317403874913921
# New best: n_components              =  3
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.45745721312026577
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9959225280326198
# OUT: clusterer_probabilities_sum     =  0.9953376778840423
# OUT: clusterer_probabilities_sum_SE  =  41.63558634299609
# OUT: clusterer_outlier_scores_sum    =  0.04920422902839621
# OUT: clusterer_outlier_scores_sum_SE =  115.36081903514383
# OUT: clustered_COMB_sum_SE           =  156.9964053781399
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590336534.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# Best genome:
# Key: 652
# Fitness: 63.29257919549846
# Nodes:
# 	0 DefaultNodeGene(key=0, bias=0.17430019791621187, response=1.0, activation=sigmoid, aggregation=sum)
# 	1 DefaultNodeGene(key=1, bias=-1.2422658068666113, response=1.0, activation=sigmoid, aggregation=sum)
# 	2 DefaultNodeGene(key=2, bias=-1.1937942136771873, response=1.0, activation=sigmoid, aggregation=sum)
# 	3 DefaultNodeGene(key=3, bias=0.10691633006466761, response=1.0, activation=sigmoid, aggregation=sum)
# 	5 DefaultNodeGene(key=5, bias=0.643781047998245, response=1.0, activation=sigmoid, aggregation=sum)
# Connections:
# 	DefaultConnectionGene(key=(-1, 5), weight=-0.6080711053784972, enabled=True)
# 	DefaultConnectionGene(key=(5, 0), weight=-0.3464310129909578, enabled=False)
# 	DefaultConnectionGene(key=(5, 1), weight=1.4039224411836266, enabled=True)
# 	DefaultConnectionGene(key=(5, 2), weight=-0.3935648741308501, enabled=True)


###############################################################################################
###############################################################################################
# n_components              =  10
###############################################################################################
###############################################################################################

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  41.82579263351763 11
# New best: metric                    =  jaccard
# New best: n_neighbors               =  92
# New best: min_dist                  =  0.9899242938848881
# New best: n_components              =  10
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.0247263198417786
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9789330615018689
# OUT: clusterer_probabilities_sum     =  0.9796898813550546
# OUT: clusterer_probabilities_sum_SE  =  200.79158690953344
# OUT: clusterer_outlier_scores_sum    =  0.02019656878973271
# OUT: clusterer_outlier_scores_sum_SE =  27.049418240250475
# OUT: clustered_COMB_sum_SE           =  227.8410051497839
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590353500.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  79.77923879360532 39
# New best: metric                    =  jaccard
# New best: n_neighbors               =  90
# New best: min_dist                  =  0.0
# New best: n_components              =  10
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.0
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9928644240570846
# OUT: clusterer_probabilities_sum     =  0.9914677725397337
# OUT: clusterer_probabilities_sum_SE  =  73.33524290544432
# OUT: clusterer_outlier_scores_sum    =  0.031655867017552033
# OUT: clusterer_outlier_scores_sum_SE =  51.01065097893737
# OUT: clustered_COMB_sum_SE           =  124.3458938843817
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590353500.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  81.65634553315252 335
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.9899906397293243
# New best: n_components              =  10
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.1563989784631232
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9898063200815495
# OUT: clusterer_probabilities_sum     =  0.9893691322795627
# OUT: clusterer_probabilities_sum_SE  =  102.59516295848117
# OUT: clusterer_outlier_scores_sum    =  0.018516729562137835
# OUT: clusterer_outlier_scores_sum_SE =  18.8692938416336
# OUT: clustered_COMB_sum_SE           =  121.46445680011476
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590353500.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  88.85634824362533 421
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.04763278208278601
# New best: n_components              =  10
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.47632782082786007
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9932042133876996
# OUT: clusterer_probabilities_sum     =  0.992349511824376
# OUT: clusterer_probabilities_sum_SE  =  68.81733814768573
# OUT: clusterer_outlier_scores_sum    =  0.029659883771645735
# OUT: clusterer_outlier_scores_sum_SE =  42.723861188822134
# OUT: clustered_COMB_sum_SE           =  111.54119933650787
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590353500.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  91.65752893116469 808
# New best: metric                    =  jaccard
# New best: n_neighbors               =  56
# New best: min_dist                  =  0.0
# New best: n_components              =  10
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.0
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9938837920489296
# OUT: clusterer_probabilities_sum     =  0.9927840940787748
# OUT: clusterer_probabilities_sum_SE  =  62.467465414850004
# OUT: clusterer_outlier_scores_sum    =  0.030404426433937468
# OUT: clusterer_outlier_scores_sum_SE =  45.63431942063312
# OUT: clustered_COMB_sum_SE           =  108.10178483548313
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590353500.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  93.91378731658149 928
# New best: metric                    =  jaccard
# New best: n_neighbors               =  22
# New best: min_dist                  =  0.0
# New best: n_components              =  10
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.0
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9969418960244648
# OUT: clusterer_probabilities_sum     =  0.9960345504192898
# OUT: clusterer_probabilities_sum_SE  =  34.52472429328828
# OUT: clusterer_outlier_scores_sum    =  0.0385584479597457
# OUT: clusterer_outlier_scores_sum_SE =  70.95591380823137
# OUT: clustered_COMB_sum_SE           =  105.48063810151965
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590353500.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# Best genome:
# Key: 928
# Fitness: 93.91378731658149
# Nodes:
# 	0 DefaultNodeGene(key=0, bias=-0.04403189625803773, response=1.0, activation=sigmoid, aggregation=sum)
# 	1 DefaultNodeGene(key=1, bias=1.957927827683601, response=1.0, activation=sigmoid, aggregation=sum)
# 	2 DefaultNodeGene(key=2, bias=1.2993913033308864, response=1.0, activation=sigmoid, aggregation=sum)
# 	3 DefaultNodeGene(key=3, bias=-0.59618767209586, response=1.0, activation=sigmoid, aggregation=sum)
# 	14 DefaultNodeGene(key=14, bias=-1.856549028447144, response=1.0, activation=sigmoid, aggregation=sum)
# 	100 DefaultNodeGene(key=100, bias=0.6472929664251026, response=1.0, activation=sigmoid, aggregation=sum)
# 	147 DefaultNodeGene(key=147, bias=0.8786033216013412, response=1.0, activation=sigmoid, aggregation=sum)
# Connections:
# 	DefaultConnectionGene(key=(-1, 100), weight=1.3640575047349257, enabled=True)
# 	DefaultConnectionGene(key=(1, 147), weight=0.5920257235004911, enabled=True)
# 	DefaultConnectionGene(key=(14, 0), weight=0.6804537908138908, enabled=True)
# 	DefaultConnectionGene(key=(14, 3), weight=-0.8404856916266954, enabled=False)
# 	DefaultConnectionGene(key=(14, 147), weight=0.13500241759133047, enabled=True)
# 	DefaultConnectionGene(key=(100, 14), weight=1.8218481670195483, enabled=True)
# 	DefaultConnectionGene(key=(147, 3), weight=-2.6341209511042085, enabled=True)


###############################################################################################
###############################################################################################
# n_components              =  100
###############################################################################################
###############################################################################################

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  32.12881660134433 15
# New best: metric                    =  jaccard
# New best: n_neighbors               =  3
# New best: min_dist                  =  0.0027112730112158825
# New best: n_components              =  100
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.00576145514883375
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9969418960244648
# OUT: clusterer_probabilities_sum     =  0.9930956269171854
# OUT: clusterer_probabilities_sum_SE  =  51.589760033885895
# OUT: clusterer_outlier_scores_sum    =  0.0746307145701497
# OUT: clusterer_outlier_scores_sum_SE =  258.6573090405966
# OUT: clustered_COMB_sum_SE           =  310.2470690744825
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590512224.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  55.6889504095446 19
# New best: metric                    =  jaccard
# New best: n_neighbors               =  84
# New best: min_dist                  =  0.444983032759282
# New best: n_components              =  100
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.0707404225770223
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9847094801223242
# OUT: clusterer_probabilities_sum     =  0.984628547675529
# OUT: clusterer_probabilities_sum_SE  =  152.94989563741382
# OUT: clusterer_outlier_scores_sum    =  0.01877352633592938
# OUT: clusterer_outlier_scores_sum_SE =  25.618940306854334
# OUT: clustered_COMB_sum_SE           =  178.56883594426816
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590512224.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  90.48505002686002 45
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.9886878667700282
# New best: n_components              =  100
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.544824791828169
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9915052667346246
# OUT: clusterer_probabilities_sum     =  0.9914306988489142
# OUT: clusterer_probabilities_sum_SE  =  84.99068061232761
# OUT: clusterer_outlier_scores_sum    =  0.017438948284417378
# OUT: clusterer_outlier_scores_sum_SE =  24.52481335083593
# OUT: clustered_COMB_sum_SE           =  109.51549396316354
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590512224.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  93.1956444368298 61
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.9897755396442393
# New best: n_components              =  100
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.5813210770097414
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9915052667346246
# OUT: clusterer_probabilities_sum     =  0.9914504234140255
# OUT: clusterer_probabilities_sum_SE  =  84.98815752375627
# OUT: clusterer_outlier_scores_sum    =  0.017450330210498913
# OUT: clusterer_outlier_scores_sum_SE =  21.313187046746506
# OUT: clustered_COMB_sum_SE           =  106.30134457050278
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590512224.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  187.1589510789491 79
# New best: metric                    =  jaccard
# New best: n_neighbors               =  46
# New best: min_dist                  =  0.9899219120532323
# New best: n_components              =  100
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.3379413342594468
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9972816853550799
# OUT: clusterer_probabilities_sum     =  0.9982006022461797
# OUT: clusterer_probabilities_sum_SE  =  17.064678138848162
# OUT: clusterer_outlier_scores_sum    =  0.019343932836663524
# OUT: clusterer_outlier_scores_sum_SE =  25.172916474904188
# OUT: clustered_COMB_sum_SE           =  42.23759461375235
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590512224.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  248.14189326687662 871
# New best: metric                    =  jaccard
# New best: n_neighbors               =  33
# New best: min_dist                  =  0.988788809368383
# New best: n_components              =  100
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  0.988788809368383
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.99932042133877
# OUT: clusterer_probabilities_sum     =  0.9992504519583119
# OUT: clusterer_probabilities_sum_SE  =  6.822906840761608
# OUT: clusterer_outlier_scores_sum    =  0.02124505278294496
# OUT: clusterer_outlier_scores_sum_SE =  32.476616421297976
# OUT: clustered_COMB_sum_SE           =  39.29952326205958
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590512224.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  267.1628371038339 1000
# New best: metric                    =  jaccard
# New best: n_neighbors               =  36
# New best: min_dist                  =  0.9899939168248885
# New best: n_components              =  100
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.2684297059318883
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.99932042133877
# OUT: clusterer_probabilities_sum     =  0.99932042133877
# OUT: clusterer_probabilities_sum_SE  =  6.795786612300374
# OUT: clusterer_outlier_scores_sum    =  0.022083905717332715
# OUT: clusterer_outlier_scores_sum_SE =  29.630965233668103
# OUT: clustered_COMB_sum_SE           =  36.426751845968475
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590512224.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  292.28261015286785 1495
# New best: metric                    =  jaccard
# New best: n_neighbors               =  33
# New best: min_dist                  =  0.8147831297521589
# New best: n_components              =  100
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.0439408849949536
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9996602106693849
# OUT: clusterer_probabilities_sum     =  0.9991358866432054
# OUT: clusterer_probabilities_sum_SE  =  4.5914813788567
# OUT: clusterer_outlier_scores_sum    =  0.020176569044927022
# OUT: clusterer_outlier_scores_sum_SE =  28.778213770444925
# OUT: clustered_COMB_sum_SE           =  33.36969514930163
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590512224.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# Best genome:
# Key: 1495
# Fitness: 292.28261015286785
# Nodes:
# 	0 DefaultNodeGene(key=0, bias=0.540255542736279, response=1.0, activation=sigmoid, aggregation=sum)
# 	1 DefaultNodeGene(key=1, bias=1.226467932367567, response=1.0, activation=sigmoid, aggregation=sum)
# 	2 DefaultNodeGene(key=2, bias=-0.4437046633132916, response=1.0, activation=sigmoid, aggregation=sum)
# 	3 DefaultNodeGene(key=3, bias=-0.27471912142665267, response=1.0, activation=sigmoid, aggregation=sum)
# 	6 DefaultNodeGene(key=6, bias=-0.2219183460551772, response=1.0, activation=sigmoid, aggregation=sum)
# Connections:
# 	DefaultConnectionGene(key=(-1, 6), weight=3.7000889405722774, enabled=True)
# 	DefaultConnectionGene(key=(6, 0), weight=-0.3853478345687985, enabled=True)
# 	DefaultConnectionGene(key=(6, 1), weight=-0.9190884552811565, enabled=True)


###############################################################################################
###############################################################################################
# n_components              =  1 to 1000
###############################################################################################
###############################################################################################

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  101.14881986496701 22
# New best: metric                    =  jaccard
# New best: n_neighbors               =  62
# New best: min_dist                  =  0.9899999999632542
# New best: n_components              =  999
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.3728515624490438
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9928644240570846
# OUT: clusterer_probabilities_sum     =  0.9927767523289678
# OUT: clusterer_probabilities_sum_SE  =  71.45859895164133
# OUT: clusterer_outlier_scores_sum    =  0.018222440792311813
# OUT: clusterer_outlier_scores_sum_SE =  26.40562915650596
# OUT: clustered_COMB_sum_SE           =  97.8642281081473
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590791478.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  125.61428112290648 102
# New best: metric                    =  jaccard
# New best: n_neighbors               =  93
# New best: min_dist                  =  0.9891309139801236
# New best: n_components              =  1000
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.5846379535003836
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9938837920489296
# OUT: clusterer_probabilities_sum     =  0.994167647814377
# OUT: clusterer_probabilities_sum_SE  =  57.80844935446099
# OUT: clusterer_outlier_scores_sum    =  0.014145916527693871
# OUT: clusterer_outlier_scores_sum_SE =  17.402520321869858
# OUT: clustered_COMB_sum_SE           =  75.21096967633085
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590791478.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  134.1364019399053 228
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.987741530691884
# New best: n_components              =  1000
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.612796718082842
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9942235813795447
# OUT: clusterer_probabilities_sum     =  0.9941198963431043
# OUT: clusterer_probabilities_sum_SE  =  57.78852079851655
# OUT: clusterer_outlier_scores_sum    =  0.013679819526548695
# OUT: clusterer_outlier_scores_sum_SE =  15.762457574081507
# OUT: clustered_COMB_sum_SE           =  73.55097837259805
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590791478.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  138.0444586114615 351
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.961125105171662
# New best: n_components              =  981
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.5693370857881042
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9955827387020048
# OUT: clusterer_probabilities_sum     =  0.9954610596955206
# OUT: clusterer_probabilities_sum_SE  =  44.25639346489053
# OUT: clusterer_outlier_scores_sum    =  0.016079211815729424
# OUT: clusterer_outlier_scores_sum_SE =  27.184036963004424
# OUT: clustered_COMB_sum_SE           =  71.44043042789495
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590791478.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  150.855983433178 361
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.9899901267062072
# New best: n_components              =  1000
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.5468595729784487
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9952429493713897
# OUT: clusterer_probabilities_sum     =  0.9952081055803252
# OUT: clusterer_probabilities_sum_SE  =  47.57568696654925
# OUT: clusterer_outlier_scores_sum    =  0.014167011017821952
# OUT: clusterer_outlier_scores_sum_SE =  17.71270128574385
# OUT: clustered_COMB_sum_SE           =  65.2883882522931
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590791478.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
# New best_fitness_so_far =  279.2884344841309 517
# New best: metric                    =  jaccard
# New best: n_neighbors               =  100
# New best: min_dist                  =  0.9899737771808268
# New best: n_components              =  365
# New best: min_samples               =  2
# New best: min_cluster_size          =  2
# New best: cluster_selection_epsilon =  1.6512453236570825
# OUT: num_clusters_found              =  7
# OUT: ratio_clustered                 =  0.9986408426775399
# OUT: clusterer_probabilities_sum     =  0.9986408426775399
# OUT: clusterer_probabilities_sum_SE  =  13.591573224600747
# OUT: clusterer_outlier_scores_sum    =  0.01704970508292765
# OUT: clusterer_outlier_scores_sum_SE =  21.213704636155775
# OUT: clustered_COMB_sum_SE           =  34.80527786075652
# *** Flushed contents to NEAT_dict file:  NEAT_dict_1590791478.pkl
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

#   --->>> OLD best_fitness_so_far =  279.2884344841309 <<<---
# 
# Population's average fitness: -267397.56087 stdev: 265949.22173
# Best fitness: 279.28843 - size: (7, 8) - species 1 - id 517
# Average adjusted fitness: 0.526
# Mean genetic distance 1.051, standard deviation 0.372
# Population of 20 members in 1 species:
#    ID   age  size  fitness  adj fit  stag
#   ====  ===  ====  =======  =======  ====
#      1   86    20    279.3    0.526    58
# Total extinctions: 0
# Generation time: 6955.156 sec (10290.515 average)
# Saving checkpoint to neat-checkpoint-86
# 
 # ****** Running generation 87 ****** 