### Tests
Use of the agglomerative clustering with HR diagram.

We test here metrics to detect good solutions for clustering

In [11]:
import sys, os
sys.path.append('../../src')

from numba import jit

import matplotlib.pyplot as plt
from pylab import rcParams
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
import pickle

from math import ceil
import math
import gaia_utils as gu
from sklearn import cluster
from sklearn.neighbors import kneighbors_graph
from astroML.correlation import two_point
from astroML.correlation import bootstrap_two_point_angular

%matplotlib inline

## directory
rootdir = "/home/stephane/Science/GAIA"
wdir    = "%s/products"%(rootdir)
datadir = "%s/master/notebooks/data"%(rootdir)

os.chdir(wdir)
rcParams['figure.figsize'] = 9, 6
###################################

clustername = "NGC 1647"
# voname = 'NGC 752-1.0deg.vot'
# voname = 'NGC 2682-3.0deg.vot'
voname = 'NGC 1647-2.0deg.vot'
# voname = "Ruprecht 1-2.0deg.vot"
RADIUS   = 2.0
kCluster = 8
votable_disk = False
distclust = 572.0
WEIGHT = [3.,3.,11.,5.,5., 2., 2., 2.]
WEIGHT = [4.87863010104081, 4.87863010104081, 4.306272782136562, 2.5786331381796077, 2.5786331381796077, 1.4117964989460319, 1.4117964989460319, 1.4117964989460319]

## dscan
eps = 1.5
min_samples = 20
## Ward
neighbors = 30

In [12]:
## plot2D and plot3D


def plot2d(df, labels, ilab, cmap = "gist_stern" ,color = False):
    
    rcParams['figure.figsize'] = 14, 14
    f, axarr = plt.subplots(2, 2)
    
    if color:
        axarr[0,0].scatter(df[np.where(labels == ilab),0],df[np.where(labels == ilab),1],  s = 0.5, c= df[np.where(labels == ilab),2], cmap=cmap )
    else:
        axarr[0,0].scatter(df[np.where(labels == ilab),0],df[np.where(labels == ilab),1],  s = 0.5, c = "k")
    axarr[0,0].set_xlabel("l")
    axarr[0,0].set_ylabel("b")
    
    axarr[1,0].scatter(df[np.where(labels == ilab),0],df[np.where(labels == ilab),2] , s=0.5, c= df[np.where(labels == ilab),2], cmap=cmap)
    axarr[1,0].set_xlabel("l")
    axarr[1,0].set_ylabel("d (pc)")
    
    
    axarr[0,1].scatter(df[np.where(labels == ilab),3],df[np.where(labels == ilab),4] , s= 0.5, c= df[np.where(labels == ilab),2], cmap=cmap)
    axarr[0,1].set_xlabel("Vdra")
    axarr[0,1].set_ylabel("Vdec")
    
    axarr[1,1].scatter(df[np.where(labels == ilab),6],df[np.where(labels == ilab),5] , s = 0.5, c= df[np.where(labels == ilab),2], cmap=cmap)
    axarr[1,1].set_xlabel("G-R")
    axarr[1,1].set_ylabel("G")
    axarr[1,1].set_xlim(-1.,1.5)
    axarr[1,1].set_ylim(27.,10)
    
    plt.show()
    

In [13]:
## astrometric conversion
## 
def convert_to_cartesian(lgal, bga, dist, offCenter = [0., 0.]):
    "Convert ra,dec (ICRS) and distance (pc) to Cartesian reference. Off is the offset in Lgal,Bgal"
    
    xx = np.zeros(len(lgal))
    yy = np.zeros(len(lgal))
    zz = np.zeros(len(lgal))
    
    lgalOff = lgal - offCenter[0]
    bgalOff = bgal - offCenter[1]
    
    print(offCenter[0])
    print(offCenter[1])
    print(min(lgalOff))
    print(max(lgalOff))
    print(min(bgalOff))
    print(max(bgalOff))
    
    
    for i in range(len(lgal)):
        c = coord.SkyCoord(l=lgalOff[i]*u.degree, b=bgalOff[i]*u.degree, distance=dist[i]*u.pc, frame='galactic')
        
        xx[i] = c.cartesian.x.value
        yy[i] = c.cartesian.y.value
        zz[i] = c.cartesian.z.value
        
    print("## XX")
    print("min, max: %f , %f"%(min(xx),max(xx)))
    print("## YY")
    print("min, max: %f , %f"%(min(yy),max(yy)))
    print("## ZZ")
    print("min, max: %f , %f"%(min(zz),max(zz)))  
        
    return(xx,yy,zz)

In [14]:
## Read the data and do the conversion


source = gu.source(clustername)
source.weight = WEIGHT
#source.query(RADIUS, errtol = 0.2, dump = True)
source.read_votable(voname)
source.convert_filter_data(mag_range = [0., 40])
source.normalization_normal()
#source.normalization_minmax()

## NGC 1647-2.0deg.vot read...
## Total stars: 20719
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..


()

### Metrics

Metric to quantify goodness-of-solution for the clustering.

In [15]:
def metric1(df, labels, APERTURE = 0.2 , MAXRADIUS = 1. , NBOOTSTRAP =20 ):
    "Using the density contrat assuming the OC is at the center"
    
    xc   = np.mean(df[:,0])
    yc   = np.mean(df[:,1]) 
    
    nlab = max(labels)+1
    aper2 = APERTURE*APERTURE
    metric = {}
    metric['label'] = []
    metric['Q'] = []
    metric['Q_err'] = []
    
    for ilab in range(nlab):
        
        dflab = df[np.where(labels == ilab),:][0]
        radii = (dflab[:,0]- xc)*(dflab[:,0]- xc)+(dflab[:,1]- yc)*(dflab[:,1]- yc)
        nclust = radii[np.where(radii < aper2)]
        dens_clust = len(nclust) / aper2
        
        angle_out = np.random.uniform(0., 2*math.pi, NBOOTSTRAP)
        rad_out   = np.random.uniform(APERTURE,MAXRADIUS-APERTURE, NBOOTSTRAP)
        
        Q_c = np.zeros(NBOOTSTRAP)
        
        for k in range(NBOOTSTRAP): 
            xi = xc + rad_out[k]*math.cos(angle_out[k])
            yi = yc + rad_out[k]*math.sin(angle_out[k])
            radii_out = (dflab[:,0]- xi)*(dflab[:,0]- xi)+(dflab[:,1]- yi)*(dflab[:,1]- yi)
            nout = radii_out[np.where(radii_out < aper2)]
            dens_out_k = max(1,len(nout)) / aper2
            Q_c[k] = dens_clust / dens_out_k
            
        metric['label'].append(ilab)
        metric['Q'].append(np.mean(Q_c))
        metric['Q_err'].append(np.std(Q_c))
        
    return(metric)
                          
    
def metric2(df, labels, APERTURE = 0.2 , MAXRADIUS = 1. , NBOOTSTRAP = 50 , SIGCLIP = 0.):
    "Using the density contrat assuming the OC is at the center and the distribution around is regular (no holes)"
        
    epsilon = 0.1
    xc   = np.mean(df[:,0])
    yc   = np.mean(df[:,1]) 
    
    nlab = max(labels)+1
    aper2 = APERTURE*APERTURE
    metric = {}
    metric['label'] = []
    metric['Q'] = []
    metric['Q_err'] = []
    
    for ilab in range(nlab):
        
        dflab = df[np.where(labels == ilab),:][0]
        radii = (dflab[:,0]- xc)*(dflab[:,0]- xc)+(dflab[:,1]- yc)*(dflab[:,1]- yc)
        nclust = radii[np.where(radii < aper2)]
        dens_clust = len(nclust) / aper2
        
        angle_out = np.random.uniform(0., 2*math.pi, NBOOTSTRAP)
        rad_out   = np.random.uniform(APERTURE,MAXRADIUS-APERTURE, NBOOTSTRAP)
        
        nstarsout = np.zeros(NBOOTSTRAP)
        
        for k in range(NBOOTSTRAP): 
            xi = xc + rad_out[k]*math.cos(angle_out[k])
            yi = yc + rad_out[k]*math.sin(angle_out[k])
            radii_out = (dflab[:,0]- xi)*(dflab[:,0]- xi)+(dflab[:,1]- yi)*(dflab[:,1]- yi)
            nout = radii_out[np.where(radii_out < aper2)]
            nstarsout[k] = len(nout) + np.random.uniform(1., 1.+ epsilon)
                
        outmean = np.mean(nstarsout)
        outstd  = np.std(nstarsout)
        
        nstar_filtered = np.where( (nstarsout - outmean)/ outstd > SIGCLIP )

        dens_out = nstarsout[nstar_filtered] / aper2
        Q_c = np.zeros(len(dens_out))
        Q_c = dens_clust / dens_out
        
        metric['label'].append(ilab)
        metric['Q'].append(np.mean(Q_c))
        metric['Q_err'].append(np.std(Q_c))
        
    return(metric)                      

In [16]:
def iter_parameters(angmin,angmax,dmin,dmax,vmin,vmax,magmin,magmax, kmin, kmax ,ntrial, von = "test.vot", radius = 1):
    "Range of the weight for each group of parameters"
        
    s = gu.source(clustername)
    s.read_votable(von)

    metric = {}
    angle = np.linspace(angmin,angmax,ntrial)
    distance = np.linspace(dmin,dmax,ntrial)
    vel   = np.linspace(vmin, vmax,ntrial)
    mag   = np.linspace(magmin, magmax,ntrial)
    kclus = range(kmin,kmax)
    
    metric['kmeans'] = {}
    metric['kmeans']['weight'] = []
    metric['kmeans']['metric'] = []
                            
                            
    for a in angle:
        for v in vel:
            for m in mag:
                for d in distance:
                    for k in kclus:
                        WEIGHT = [a,a,d,v,v,m,m,m] 
                        s.weight = WEIGHT
                        s.convert_filter_data(mag_range = [0., 40])
                        s.normalization_normal()
    
                        print(WEIGHT)
        
                        kmeans = cluster.KMeans(n_clusters= kCluster, max_iter = 2000, n_init = 50)
                        kmeans.fit(s.dfnorm)
                        labels_k = kmeans.labels_
                        qk = metric1(s.df, labels_k, APERTURE = 0.2 , MAXRADIUS = radius , NBOOTSTRAP =10 )
                        metric['kmeans']['weight'].append(WEIGHT)
                        metric['kmeans']['metric'].append(qk)
                            
        
    return(metric)


def random_weighting(angmin,angmax,dmin,dmax,vmin,vmax,magmin,magmax, kmin, kmax , von = "test.vot", radius = 1, NBOOTSTRAP = 100, SCAN = None):
    "Sample with NBOOTSTRAP trial in the weight range to get the Q"
    
    np.random.seed()
    
    s = gu.source(clustername)
    s.read_votable(von)
    
    aper = 0.5
    
    if SCAN == None:
        metric = {}
        metric['kmeans'] = {}
        metric['kmeans']['weight'] = []
        metric['kmeans']['metric'] = []
        metric['ward'] = {}
        metric['ward']['weight'] = []
        metric['ward']['metric'] = []
        metric['spectral'] = {}
        metric['spectral']['weight'] = []
        metric['spectral']['metric'] = []
        metric['dbscan'] = {}
        metric['dbscan']['weight'] = []
        metric['dbscan']['metric'] = []
    else:
        print("## Adding to the previous metric ...")
        metric = SCAN
        
    
    angle     = np.random.uniform(angmin, angmax, NBOOTSTRAP)
    distance  = np.random.uniform(dmin, dmax, NBOOTSTRAP)
    velocity  = np.random.uniform(vmin, vmax, NBOOTSTRAP)
    magnitude = np.random.uniform(magmin, magmax, NBOOTSTRAP)
    ncluster  = np.random.randint(kmin, kmax, NBOOTSTRAP)
    
            
    for i in range(NBOOTSTRAP):
        WEIGHT = [angle[i],angle[i],distance[i],velocity[i],velocity[i], magnitude[i],magnitude[i], magnitude[i]]
        nclust = ncluster[i]
        
        s.weight = WEIGHT
        s.convert_filter_data(mag_range = [0., 40])
        s.normalization_normal()
    
        print(WEIGHT)
        print(i)
        
        # kmeans
        kmeans = cluster.KMeans(n_clusters= nclust, max_iter = 2000, n_init = 50)
        kmeans.fit(s.dfnorm)
        labels_k = kmeans.labels_
        qk = metric2(s.df, labels_k, APERTURE = aper , MAXRADIUS = 0.9 * radius , NBOOTSTRAP =50 )
        metric['kmeans']['weight'].append(WEIGHT)
        metric['kmeans']['metric'].append(qk)
        print("## Best Q: %3.1f"%(max(qk['Q'])))
        print("# k-means done")
                            
        # ward
        connectivity = kneighbors_graph(source.dfnorm, n_neighbors= neighbors, include_self=False)
        # make connectivity symmetric
        connectivity = 0.5 * (connectivity + connectivity.T)
        ward = cluster.AgglomerativeClustering(n_clusters= nclust, linkage='ward', connectivity=connectivity)
        ward.fit(s.dfnorm)
        labels_w = ward.labels_
        qw = metric2(s.df, labels_w, APERTURE = aper , MAXRADIUS = 0.9 * radius , NBOOTSTRAP =50 )
        metric['ward']['weight'].append(WEIGHT)
        metric['ward']['metric'].append(qw)
        print("## Best Q: %3.1f"%(max(qw['Q'])))        
        print("# Ward done")
        
        # Spectral
        spectral = cluster.SpectralClustering(n_clusters = nclust, eigen_solver='arpack', affinity="nearest_neighbors")
        # spectral.fit(s.dfnorm) !!!!
        # !!!!!!
        print("# !!! Spectral = Ward")
        labels_s = ward.labels_
        #!!!!!!!
        qs = metric2(s.df, labels_s, APERTURE = aper , MAXRADIUS = 0.9 * radius , NBOOTSTRAP =50 )
        metric['spectral']['weight'].append(WEIGHT)
        metric['spectral']['metric'].append(qs)
        print("## Best Q: %3.1f"%(max(qs['Q'])))        
        print("# Spectral done")
        
        # DBSCAN
        dbscan = cluster.DBSCAN(eps = eps, min_samples = min_samples)
        dbscan.fit(s.dfnorm)
        labels_d = dbscan.labels_
        unique_labels = set(labels_d)
        print(unique_labels)
        n_clusters_ = len(set(labels_d)) - (1 if -1 in labels_d else 0)
        if n_clusters_ > 0:
            qd = metric2(s.df, labels_d, APERTURE = aper , MAXRADIUS = 0.9 * radius , NBOOTSTRAP =50 )
            for i in range(max(labels_d)+1):
                print("# Label %5d : %5d  Dist: %6.1f (%5.1f)"%(i,len(labels_d[np.where(labels_d == i)]), np.median(source.df[np.where(labels_d == i),2]), np.std(source.df[np.where(labels_d == i),2]) ))
            print("##")
        else:
            qd = {}
            qd['Q'] = [0., 0.]
            qd['Q_err'] = [0.,0.]
            qd['label'] = [0,1]

        metric['dbscan']['weight'].append(WEIGHT)
        metric['dbscan']['metric'].append(qd)
        print("## Best Q: %3.1f"%(max(qd['Q'])))
        print("# DBSCAN done")
            
    return(metric)       
    

### Clustering

In [None]:
## testing loop on parameters ..
## Could be very long!!!
## To continue a previous scan..
with open('dataQran.pickle', 'rb') as f:
    previousMetric = pickle.load(f)

# q = iter_parameters(2.,5.,7.,12.,2.,4.,1.,3., 7,8 ,2, von = voname, radius = 2.)
q = random_weighting(1.,7.,3.,15.,1.,7.,1.,6., 5 ,12 , von = voname, radius = 2., NBOOTSTRAP = 450, 
                     SCAN = previousMetric)

with open('dataQran.pickle', 'wb') as f:
    pickle.dump(q, f, pickle.HIGHEST_PROTOCOL)

## NGC 1647-2.0deg.vot read...
## Total stars: 20719
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[6.760378601963096, 6.760378601963096, 4.873485187026343, 1.318268053408259, 1.318268053408259, 1.3490518956894695, 1.3490518956894695, 1.3490518956894695]
0
## Best Q: 2.6
# k-means done
## Best Q: 1.6
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, -1}
# Label     0 :   329  Dist:  594.3 ( 40.6)
##
## Best Q: 4.1
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[4.833946146279689, 4.833946146279689, 7.8382816765758925, 1.669717081266935, 1.669717081266935, 5.74033546990242, 5.74033546990242, 5.74033546990242]
1
## Best Q: 1.8
# k-means done
## Best Q: 1.5
# Ward done
# !!! Spectral = Ward
## Best Q: 1.3
# Spectral done
{0, -1}
# Label     0 :    22  Dist:  597.0 ( 20.7)
##
## Best Q: 5.7
# DBSCAN done
## Conversion done...
## Stars selected: 18658

## Best Q: 1.8
# k-means done
## Best Q: 1.7
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, -1}
# Label     0 :    27  Dist:  596.8 ( 20.9)
##
## Best Q: 3.7
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[5.282880436144016, 5.282880436144016, 3.7958796738631513, 1.516448663768312, 1.516448663768312, 2.0120789259505623, 2.0120789259505623, 2.0120789259505623]
17
## Best Q: 2.3
# k-means done
## Best Q: 1.7
# Ward done
# !!! Spectral = Ward
## Best Q: 1.5
# Spectral done
{0, 1, -1}
# Label     0 :   332  Dist:  594.3 ( 45.6)
# Label     1 :    17  Dist:  601.0 ( 34.7)
##
## Best Q: 5.1
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.1435072388359444, 2.1435072388359444, 6.325663434104613, 5.242679823078515, 5.242679823078515, 5.8685230807596165, 5.8685230807596165, 5.8685230807596165]
18
## Best Q: 1.8
# k-means done
## Best Q: 1

## Best Q: 1.4
# Ward done
# !!! Spectral = Ward
## Best Q: 1.4
# Spectral done
{0, 1, -1}
# Label     0 :   394  Dist:  594.5 ( 33.1)
# Label     1 :    45  Dist:  595.8 ( 22.6)
##
## Best Q: 3.6
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.0070872860495177, 2.0070872860495177, 6.2377964309204925, 1.4744008456038453, 1.4744008456038453, 3.7363139508621077, 3.7363139508621077, 3.7363139508621077]
34
## Best Q: 1.5
# k-means done
## Best Q: 1.8
# Ward done
# !!! Spectral = Ward
## Best Q: 1.7
# Spectral done
{0, -1}
# Label     0 :   540  Dist:  596.3 ( 40.0)
##
## Best Q: 2.8
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[5.967867675131016, 5.967867675131016, 9.244742885471734, 4.681884492978569, 4.681884492978569, 2.727780246136754, 2.727780246136754, 2.727780246136754]
35
## Best Q: 1.2
# k-means done
## Best Q: 1.2
# Ward done
# !!! Spectral =

## Best Q: 1.8
# k-means done
## Best Q: 1.6
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, -1}
# Label     0 :   256  Dist:  594.3 ( 29.0)
##
## Best Q: 4.1
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[1.0709249115543928, 1.0709249115543928, 9.666595912096493, 1.603569788045598, 1.603569788045598, 5.802170818531372, 5.802170818531372, 5.802170818531372]
52
## Best Q: 1.6
# k-means done
## Best Q: 1.8
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, -1}
# Label     0 :   444  Dist:  595.6 ( 30.3)
##
## Best Q: 2.8
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[4.422244137663981, 4.422244137663981, 12.468046520432448, 5.320644414137732, 5.320644414137732, 5.632895712502246, 5.632895712502246, 5.632895712502246]
53
## Best Q: 1.7
# k-means done
## Best Q: 1.5
# Ward done
# !!! Spectral = Ward
## Best Q: 1.5
#

## Best Q: 1.8
# k-means done
## Best Q: 1.8
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, 1, -1}
# Label     0 :   308  Dist:  594.3 ( 25.9)
# Label     1 :    18  Dist:  581.0 ( 13.9)
##
## Best Q: 3.6
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[5.790479794420979, 5.790479794420979, 11.957652636452154, 6.496945576060829, 6.496945576060829, 3.691533247633326, 3.691533247633326, 3.691533247633326]
70
## Best Q: 1.7
# k-means done
## Best Q: 1.9
# Ward done
# !!! Spectral = Ward
## Best Q: 2.0
# Spectral done
{0, -1}
# Label     0 :    31  Dist:  595.1 ( 21.3)
##
## Best Q: 5.0
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[6.123879855606702, 6.123879855606702, 14.94664385450437, 3.8707999134898063, 3.8707999134898063, 4.853683881782828, 4.853683881782828, 4.853683881782828]
71
## Best Q: 1.8
# k-means done
## Best Q: 1.8
# W

## Best Q: 1.3
# k-means done
## Best Q: 2.2
# Ward done
# !!! Spectral = Ward
## Best Q: 2.4
# Spectral done
{-1}
## Best Q: 0.0
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[4.771442622948524, 4.771442622948524, 4.848664956654595, 2.9317510436612677, 2.9317510436612677, 5.010104347580504, 5.010104347580504, 5.010104347580504]
88
## Best Q: 1.5
# k-means done
## Best Q: 1.9
# Ward done
# !!! Spectral = Ward
## Best Q: 2.0
# Spectral done
{0, 1, -1}
# Label     0 :    57  Dist:  592.6 ( 24.6)
# Label     1 :    25  Dist:  603.1 ( 30.9)
##
## Best Q: 5.7
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.235834766833777, 2.235834766833777, 3.7109375275726824, 5.7870651335341545, 5.7870651335341545, 2.3063858285933487, 2.3063858285933487, 2.3063858285933487]
89
## Best Q: 1.8
# k-means done
## Best Q: 1.9
# Ward done
# !!! Spectral = Ward
## Best Q: 2.1

## Best Q: 2.6
# k-means done
## Best Q: 1.6
# Ward done
# !!! Spectral = Ward
## Best Q: 1.9
# Spectral done
{0, -1}
# Label     0 :   233  Dist:  594.0 ( 30.7)
##
## Best Q: 4.6
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.6877398112144455, 2.6877398112144455, 3.283357962134796, 2.467979613410483, 2.467979613410483, 1.5248443978739274, 1.5248443978739274, 1.5248443978739274]
104
## Best Q: 1.3
# k-means done
## Best Q: 1.5
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, 1, -1}
# Label     0 :   601  Dist:  596.7 ( 56.3)
# Label     1 :    21  Dist:  943.2 ( 68.7)
##
## Best Q: 3.3
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[6.575836897317096, 6.575836897317096, 11.153975961349285, 6.110255577750824, 6.110255577750824, 2.8057999357073298, 2.8057999357073298, 2.8057999357073298]
105
## Best Q: 1.9
# k-means done
## Best Q:

## Best Q: 1.8
# k-means done
## Best Q: 1.9
# Ward done
# !!! Spectral = Ward
## Best Q: 1.9
# Spectral done
{0, 1, -1}
# Label     0 :    39  Dist:  595.5 ( 18.8)
# Label     1 :    30  Dist:  595.1 ( 17.1)
##
## Best Q: 5.0
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.5760098268940315, 2.5760098268940315, 7.047156756860279, 5.64264370123399, 5.64264370123399, 2.6001058317178756, 2.6001058317178756, 2.6001058317178756]
122
## Best Q: 1.5
# k-means done
## Best Q: 1.5
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, -1}
# Label     0 :   418  Dist:  595.1 ( 28.6)
##
## Best Q: 5.5
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[6.479636432994332, 6.479636432994332, 9.214496181827217, 4.8889938157208555, 4.8889938157208555, 5.873466644204657, 5.873466644204657, 5.873466644204657]
123
## Best Q: 1.2
# k-means done
## Best Q: 1.7

## Best Q: 1.7
# Ward done
# !!! Spectral = Ward
## Best Q: 1.7
# Spectral done
{0, -1}
# Label     0 :   266  Dist:  594.3 ( 23.0)
##
## Best Q: 3.3
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[3.3914379602536155, 3.3914379602536155, 10.341003127492156, 6.134360844859167, 6.134360844859167, 4.909226066964147, 4.909226066964147, 4.909226066964147]
140
## Best Q: 1.7
# k-means done
## Best Q: 1.8
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, 1, -1}
# Label     0 :   106  Dist:  594.3 ( 20.8)
# Label     1 :    21  Dist:  596.5 ( 17.9)
##
## Best Q: 4.9
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[1.351355054603015, 1.351355054603015, 3.9628848946141177, 3.242105231209799, 3.242105231209799, 1.9738896604679486, 1.9738896604679486, 1.9738896604679486]
141
## Best Q: 1.5
# k-means done
## Best Q: 1.7
# Ward done
# !!! Spectral 

## Best Q: 1.7
# k-means done
## Best Q: 2.7
# Ward done
# !!! Spectral = Ward
## Best Q: 2.9
# Spectral done
{0, 1, 2, -1}
# Label     0 :    65  Dist:  594.0 ( 17.9)
# Label     1 :    80  Dist:  598.6 ( 23.4)
# Label     2 :    20  Dist:  597.2 ( 16.5)
##
## Best Q: 5.4
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[4.012100596821671, 4.012100596821671, 13.485315869082765, 1.1710211752902624, 1.1710211752902624, 4.867594686459698, 4.867594686459698, 4.867594686459698]
157
## Best Q: 1.9
# k-means done
## Best Q: 2.0
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, 1, -1}
# Label     0 :    40  Dist:  592.1 ( 18.9)
# Label     1 :    27  Dist:  599.1 ( 17.0)
##
## Best Q: 7.2
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[4.256253896908162, 4.256253896908162, 6.197986942383988, 1.3397742660525476, 1.3397742660525476, 1.226020722

## Best Q: 1.7
# k-means done
## Best Q: 2.0
# Ward done
# !!! Spectral = Ward
## Best Q: 1.9
# Spectral done
{0, -1}
# Label     0 :   610  Dist:  597.2 ( 39.2)
##
## Best Q: 2.7
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[1.6541182211014542, 1.6541182211014542, 8.103443325346891, 2.364588079034932, 2.364588079034932, 5.526087219296292, 5.526087219296292, 5.526087219296292]
175
## Best Q: 1.7
# k-means done
## Best Q: 1.6
# Ward done
# !!! Spectral = Ward
## Best Q: 1.7
# Spectral done
{0, -1}
# Label     0 :   340  Dist:  594.3 ( 27.7)
##
## Best Q: 3.3
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.0983370992905748, 2.0983370992905748, 9.532982644661313, 1.0628919550970306, 1.0628919550970306, 1.212673562688161, 1.212673562688161, 1.212673562688161]
176
## Best Q: 1.9
# k-means done
## Best Q: 1.9
# Ward done
# !!! Spectral = Ward
## Best Q: 

{0, 1, -1}
# Label     0 :   182  Dist:  594.1 ( 28.4)
# Label     1 :    35  Dist:  595.9 ( 21.3)
##
## Best Q: 3.7
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.483820471578634, 2.483820471578634, 5.217907903827609, 5.800831341672774, 5.800831341672774, 4.412814413035487, 4.412814413035487, 4.412814413035487]
191
## Best Q: 1.7
# k-means done
## Best Q: 1.9
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, -1}
# Label     0 :   303  Dist:  594.3 ( 28.7)
##
## Best Q: 3.9
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[1.6869160251521063, 1.6869160251521063, 5.576040482720751, 5.891160838266524, 5.891160838266524, 3.6503567409143707, 3.6503567409143707, 3.6503567409143707]
192
## Best Q: 1.7
# k-means done
## Best Q: 1.7
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, -1}
# Label     0 :   440  Dist:  594.5 

## Best Q: 1.8
# k-means done
## Best Q: 2.3
# Ward done
# !!! Spectral = Ward
## Best Q: 2.1
# Spectral done
{0, -1}
# Label     0 :   459  Dist:  595.8 ( 28.0)
##
## Best Q: 4.5
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[1.1332262822046353, 1.1332262822046353, 5.650969494398529, 3.4263866171111257, 3.4263866171111257, 3.114536160944849, 3.114536160944849, 3.114536160944849]
209
## Best Q: 1.6
# k-means done
## Best Q: 2.1
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, -1}
# Label     0 :   603  Dist:  595.8 ( 40.7)
##
## Best Q: 4.0
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[3.119499777798543, 3.119499777798543, 13.380986196523544, 3.0788835204411695, 3.0788835204411695, 2.814463758507932, 2.814463758507932, 2.814463758507932]
210
## Best Q: 1.9
# k-means done
## Best Q: 2.2
# Ward done
# !!! Spectral = Ward
## Best Q:

{-1}
## Best Q: 0.0
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.615121115601144, 2.615121115601144, 6.47800839429744, 5.816451279862823, 5.816451279862823, 5.982224489660069, 5.982224489660069, 5.982224489660069]
227
## Best Q: 1.6
# k-means done
## Best Q: 2.0
# Ward done
# !!! Spectral = Ward
## Best Q: 1.9
# Spectral done
{0, 1, -1}
# Label     0 :   179  Dist:  594.3 ( 28.6)
# Label     1 :    27  Dist:  594.2 ( 20.9)
##
## Best Q: 4.8
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[6.36780964418951, 6.36780964418951, 12.013387135870888, 5.854721884603709, 5.854721884603709, 2.4974888563134248, 2.4974888563134248, 2.4974888563134248]
228
## Best Q: 1.9
# k-means done
## Best Q: 1.7
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, -1}
# Label     0 :    83  Dist:  594.0 ( 20.1)
##
## Best Q: 5.9
# DBSCAN done
## Conversion 

## Best Q: 1.9
# k-means done
## Best Q: 1.7
# Ward done
# !!! Spectral = Ward
## Best Q: 2.0
# Spectral done
{0, -1}
# Label     0 :   452  Dist:  595.3 ( 34.0)
##
## Best Q: 3.2
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[3.234948772265726, 3.234948772265726, 11.478136747833624, 3.5373724457333986, 3.5373724457333986, 2.7958029210171733, 2.7958029210171733, 2.7958029210171733]
244
## Best Q: 2.2
# k-means done
## Best Q: 1.6
# Ward done
# !!! Spectral = Ward
## Best Q: 1.7
# Spectral done
{0, 1, -1}
# Label     0 :    20  Dist:  594.4 ( 18.5)
# Label     1 :   294  Dist:  594.0 ( 24.3)
##
## Best Q: 3.9
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[5.782877431243116, 5.782877431243116, 7.986619019021246, 1.9286665728444694, 1.9286665728444694, 2.057913378220572, 2.057913378220572, 2.057913378220572]
245
## Best Q: 0.9
# k-means done
## Best Q: 

## Best Q: 1.7
# k-means done
## Best Q: 1.6
# Ward done
# !!! Spectral = Ward
## Best Q: 1.7
# Spectral done
{0, -1}
# Label     0 :    25  Dist:  597.3 ( 21.9)
##
## Best Q: 4.9
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[4.322664040177257, 4.322664040177257, 7.280702127130851, 1.7430848914831412, 1.7430848914831412, 5.819532798139521, 5.819532798139521, 5.819532798139521]
262
## Best Q: 1.6
# k-means done
## Best Q: 1.4
# Ward done
# !!! Spectral = Ward
## Best Q: 1.4
# Spectral done
{0, 1, -1}
# Label     0 :    38  Dist:  594.7 ( 24.0)
# Label     1 :    20  Dist:  606.2 ( 24.9)
##
## Best Q: 6.0
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[5.917758494213831, 5.917758494213831, 10.31490700514395, 1.1841486883752779, 1.1841486883752779, 4.069337703677162, 4.069337703677162, 4.069337703677162]
263
## Best Q: 1.2
# k-means done
## Best Q: 2.1


## Best Q: 1.8
# k-means done
## Best Q: 1.8
# Ward done
# !!! Spectral = Ward
## Best Q: 1.7
# Spectral done
{0, -1}
# Label     0 :   467  Dist:  595.8 ( 26.2)
##
## Best Q: 3.6
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[5.308067578650327, 5.308067578650327, 14.43262568770087, 1.5629210186214686, 1.5629210186214686, 5.652367496377031, 5.652367496377031, 5.652367496377031]
279
## Best Q: 1.7
# k-means done
## Best Q: 1.6
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{-1}
## Best Q: 0.0
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[4.401064619385735, 4.401064619385735, 13.056793683360926, 3.260268577649046, 3.260268577649046, 1.8671871361735524, 1.8671871361735524, 1.8671871361735524]
280
## Best Q: 1.4
# k-means done
## Best Q: 1.6
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, -1}
# Label     0 :   292 

## Best Q: 1.8
# k-means done
## Best Q: 2.1
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, 1, -1}
# Label     0 :   142  Dist:  592.3 ( 21.2)
# Label     1 :    25  Dist:  596.5 ( 14.2)
##
## Best Q: 5.1
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[6.660574822289667, 6.660574822289667, 7.5806676331181215, 4.858087852815481, 4.858087852815481, 5.478788179070456, 5.478788179070456, 5.478788179070456]
297
## Best Q: 1.5
# k-means done
## Best Q: 2.6
# Ward done
# !!! Spectral = Ward
## Best Q: 2.2
# Spectral done
{-1}
## Best Q: 0.0
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[3.357831913344115, 3.357831913344115, 9.799158877380194, 6.8282671737107625, 6.8282671737107625, 1.3398943171918019, 1.3398943171918019, 1.3398943171918019]
298
## Best Q: 1.8
# k-means done
## Best Q: 1.9
# Ward done
# !!! Spectral = Ward
## Best Q: 2.0

{0, 1, -1}
# Label     0 :   187  Dist:  594.1 ( 30.7)
# Label     1 :    36  Dist:  596.2 ( 21.9)
##
## Best Q: 5.1
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.2063009036205425, 2.2063009036205425, 7.568509810829108, 1.7258188696194643, 1.7258188696194643, 2.6652406443650865, 2.6652406443650865, 2.6652406443650865]
315
## Best Q: 1.9
# k-means done
## Best Q: 1.5
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, -1}
# Label     0 :   539  Dist:  596.3 ( 34.4)
##
## Best Q: 3.2
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[3.365261653152412, 3.365261653152412, 9.533866576321598, 6.490379916336549, 6.490379916336549, 3.1697384844787115, 3.1697384844787115, 3.1697384844787115]
316
## Best Q: 1.7
# k-means done
## Best Q: 1.5
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, -1}
# Label     0 :   263  Dist:  5

## Best Q: 2.4
# k-means done
## Best Q: 2.2
# Ward done
# !!! Spectral = Ward
## Best Q: 2.1
# Spectral done
{0, -1}
# Label     0 :   148  Dist:  593.5 ( 25.0)
##
## Best Q: 5.2
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[1.574030421275283, 1.574030421275283, 4.9324217130685675, 6.679120563497788, 6.679120563497788, 4.494496293357347, 4.494496293357347, 4.494496293357347]
333
## Best Q: 1.7
# k-means done
## Best Q: 1.7
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, 1, 2, -1}
# Label     0 :   354  Dist:  594.5 ( 30.0)
# Label     1 :    22  Dist:  600.1 ( 19.8)
# Label     2 :    11  Dist:  580.3 ( 10.9)
##
## Best Q: 3.5
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[5.822379218869563, 5.822379218869563, 12.881076795890142, 6.176470312496119, 6.176470312496119, 3.481185680376075, 3.481185680376075, 3.481185680376075]
334


## Best Q: 1.5
# k-means done
## Best Q: 1.1
# Ward done
# !!! Spectral = Ward
## Best Q: 1.0
# Spectral done
{0, -1}
# Label     0 :    29  Dist:  596.8 ( 23.4)
##
## Best Q: 5.7
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.1102774319772757, 2.1102774319772757, 11.775581316988287, 5.356515358380092, 5.356515358380092, 3.5719984524064716, 3.5719984524064716, 3.5719984524064716]
351
## Best Q: 1.6
# k-means done
## Best Q: 1.9
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, 1, -1}
# Label     0 :   321  Dist:  594.2 ( 24.8)
# Label     1 :    32  Dist:  589.9 ( 17.0)
##
## Best Q: 3.8
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[1.2269908362840802, 1.2269908362840802, 4.785580113426569, 1.6000663324863382, 1.6000663324863382, 1.0809960526608922, 1.0809960526608922, 1.0809960526608922]
352
## Best Q: 2.1
# k-means done
## Bes

## Best Q: 1.2
# k-means done
## Best Q: 1.4
# Ward done
# !!! Spectral = Ward
## Best Q: 1.4
# Spectral done
{0, 1, -1}
# Label     0 :   147  Dist:  592.7 ( 28.7)
# Label     1 :    24  Dist:  593.9 ( 35.7)
##
## Best Q: 3.7
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[6.91179274027348, 6.91179274027348, 9.913475762465712, 6.435553028775654, 6.435553028775654, 1.8218816111267406, 1.8218816111267406, 1.8218816111267406]
369
## Best Q: 2.2
# k-means done
## Best Q: 1.5
# Ward done
# !!! Spectral = Ward
## Best Q: 1.5
# Spectral done
{0, -1}
# Label     0 :   131  Dist:  594.6 ( 22.7)
##
## Best Q: 5.5
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[5.337267108672115, 5.337267108672115, 8.632831094581839, 6.335347209171216, 6.335347209171216, 5.7324697000545966, 5.7324697000545966, 5.7324697000545966]
370
## Best Q: 1.7
# k-means done
## Best Q: 1.9


## Best Q: 1.8
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, 1, -1}
# Label     0 :   185  Dist:  592.5 ( 22.4)
# Label     1 :    53  Dist:  594.2 ( 18.3)
##
## Best Q: 4.4
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[3.4084875380500437, 3.4084875380500437, 7.526956517372215, 3.0608646979765584, 3.0608646979765584, 1.7844507694544056, 1.7844507694544056, 1.7844507694544056]
386
## Best Q: 1.3
# k-means done
## Best Q: 1.7
# Ward done
# !!! Spectral = Ward
## Best Q: 1.6
# Spectral done
{0, -1}
# Label     0 :   434  Dist:  595.8 ( 31.6)
##
## Best Q: 3.3
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[3.085849317415127, 3.085849317415127, 11.128286321944824, 2.8963911292407394, 2.8963911292407394, 2.712144070055967, 2.712144070055967, 2.712144070055967]
387
## Best Q: 1.9
# k-means done
## Best Q: 1.5
# Ward done
# !!! Spectr

## Best Q: 1.3
# k-means done
## Best Q: 1.9
# Ward done
# !!! Spectral = Ward
## Best Q: 1.8
# Spectral done
{0, -1}
# Label     0 :   303  Dist:  594.1 ( 32.2)
##
## Best Q: 3.9
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[4.9549325800712385, 4.9549325800712385, 7.092543337165218, 3.1972705466006217, 3.1972705466006217, 4.70455409283697, 4.70455409283697, 4.70455409283697]
404
## Best Q: 1.9
# k-means done
## Best Q: 2.1
# Ward done
# !!! Spectral = Ward
## Best Q: 2.3
# Spectral done
{0, 1, -1}
# Label     0 :    54  Dist:  591.6 ( 25.0)
# Label     1 :    20  Dist:  601.1 ( 24.9)
##
## Best Q: 6.5
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[1.4600370970716106, 1.4600370970716106, 5.675536433171143, 1.2346056952638154, 1.2346056952638154, 3.5118594197657593, 3.5118594197657593, 3.5118594197657593]
405
## Best Q: 1.6
# k-means done
## Best Q: 

## Best Q: 1.7
# k-means done
## Best Q: 1.8
# Ward done
# !!! Spectral = Ward
## Best Q: 1.9
# Spectral done
{0, -1}
# Label     0 :   378  Dist:  594.5 ( 26.5)
##
## Best Q: 3.4
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[6.473155377530483, 6.473155377530483, 3.349665343911026, 4.17903493100085, 4.17903493100085, 2.6561995676796917, 2.6561995676796917, 2.6561995676796917]
419
## Best Q: 1.7
# k-means done
## Best Q: 1.0
# Ward done
# !!! Spectral = Ward
## Best Q: 1.0
# Spectral done
{0, -1}
# Label     0 :   124  Dist:  594.0 ( 29.8)
##
## Best Q: 6.7
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[3.784551986023501, 3.784551986023501, 4.392162962193093, 6.316756708059911, 6.316756708059911, 4.4250012735308655, 4.4250012735308655, 4.4250012735308655]
420
## Best Q: 1.7
# k-means done
## Best Q: 1.8
# Ward done
# !!! Spectral = Ward
## Best Q: 1.

## Best Q: 1.7
# k-means done
## Best Q: 1.5
# Ward done
# !!! Spectral = Ward
## Best Q: 1.7
# Spectral done
{0, -1}
# Label     0 :   287  Dist:  594.0 ( 24.9)
##
## Best Q: 3.8
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.410247146426695, 2.410247146426695, 12.998884852736731, 6.2658484051285805, 6.2658484051285805, 4.079381227519595, 4.079381227519595, 4.079381227519595]
437
## Best Q: 1.9
# k-means done
## Best Q: 1.8
# Ward done
# !!! Spectral = Ward
## Best Q: 1.9
# Spectral done
{0, -1}
# Label     0 :   270  Dist:  593.2 ( 22.5)
##
## Best Q: 4.7
# DBSCAN done
## Conversion done...
## Stars selected: 18658
## Normalization Normal-Gauss done on filtered data..
[2.9737293394717246, 2.9737293394717246, 4.23530632634902, 1.2596362442517832, 1.2596362442517832, 3.919949981939596, 3.919949981939596, 3.919949981939596]
438
## Best Q: 1.4
# k-means done
## Best Q: 1.7
# Ward done
# !!! Spectral = Ward
## Best Q: 

In [None]:
print("## k-means...")

# KMeans for each normalisation
kmeans = cluster.KMeans(n_clusters= kCluster, max_iter = 2000, n_init = 50)
kmeans.fit(source.dfnorm)
labels_k = kmeans.labels_
for i in range(kCluster):
    print("# Label %5d : %5d  Dist: %6.1f (%5.1f)"%(i,len(labels_k[np.where(labels_k == i)]), np.median(source.df[np.where(labels_k == i),2]), np.std(source.df[np.where(labels_k == i),2])))
print("##")

###########
print("## Ward... ")
# connectivity matrix for structured Ward

connectivity = kneighbors_graph(source.dfnorm, n_neighbors= neighbors, include_self=False)
# make connectivity symmetric
connectivity = 0.5 * (connectivity + connectivity.T)

ward = cluster.AgglomerativeClustering(n_clusters= kCluster, linkage='ward', connectivity=connectivity)
ward.fit(source.dfnorm)
labels_w = ward.labels_
for i in range(kCluster):
    print("# Label %5d : %5d  Dist: %6.1f (%5.1f)"%(i,len(labels_w[np.where(labels_w == i)]), np.median(source.df[np.where(labels_w == i),2]),np.std(source.df[np.where(labels_w == i),2])))
print("##")
    
############# 
print("## Spectral...")
spectral = cluster.SpectralClustering(n_clusters = kCluster, eigen_solver='arpack', affinity="nearest_neighbors")
spectral.fit(source.dfnorm)
labels_s = spectral.labels_
for i in range(kCluster):
    print("# Label %5d : %5d  Dist: %6.1f (%5.1f)"%(i,len(labels_s[np.where(labels_s == i)]), np.median(source.df[np.where(labels_s == i),2]),np.std(source.df[np.where(labels_s == i),2])))
print("##")


############# 
print("## DBSCAN...")
dbscan = cluster.DBSCAN(eps, min_samples)
dbscan.fit(source.dfnorm)
labels_d = dbscan.labels_
unique_labels = set(labels_d)
print(unique_labels)
for i in range(max(labels_d)+1):
    print("# Label %5d : %5d  Dist: %6.1f (%5.1f)"%(i,len(labels_d[np.where(labels_d == i)]), np.median(source.df[np.where(labels_d == i),2]), np.std(source.df[np.where(labels_d == i),2]) ))
print("##")


## k-means...
# Label     0 :  2366  Dist:  541.6 (234.5)
# Label     1 :  1915  Dist: 1288.8 (299.3)
# Label     2 :  2387  Dist:  554.2 (245.2)
# Label     3 :  2187  Dist: 1305.9 (293.6)
# Label     4 :  2077  Dist: 1311.3 (302.3)
# Label     5 :  2898  Dist:  550.5 (222.2)
# Label     6 :  2226  Dist: 1287.3 (305.5)
# Label     7 :  2602  Dist:  555.9 (225.0)
##
## Ward... 
# Label     0 :  4040  Dist:  552.2 (238.5)
# Label     1 :  3243  Dist:  668.8 (329.4)
# Label     2 :  3041  Dist: 1391.9 (320.9)
# Label     3 :  1183  Dist:  835.7 (331.9)
# Label     4 :  2250  Dist:  433.8 (203.5)
# Label     5 :  1819  Dist: 1156.1 (320.8)
# Label     6 :  2208  Dist: 1150.9 (354.7)
# Label     7 :   874  Dist: 1584.2 (240.5)
##
## Spectral...


In [None]:
## Metrics of the solutions
np.random.seed(0)
labs = labels_k
qk = metric2(source.df, labs , APERTURE = 0.5 , MAXRADIUS = 0.9 * RADIUS, SIGCLIP = 0.0) 
labs = labels_w
qw = metric2(source.df, labs , APERTURE = 0.5 , MAXRADIUS = 0.9 * RADIUS, SIGCLIP = 0.0) 
labs = labels_s
qs = metric2(source.df, labs , APERTURE = 0.5 , MAXRADIUS = 0.9 * RADIUS, SIGCLIP = 0.0) 
labs = labels_d
qd = metric2(source.df, labs , APERTURE = 0.5 , MAXRADIUS = 0.9 * RADIUS, SIGCLIP = 0.0)  

plt.yscale("log", nonposy='clip')
plt.xlim([-1,kCluster+1])
plt.errorbar(qk['label'],qk['Q'], qk['Q_err'], label='k-means',fmt='.k', ecolor='gray', lw=1, capsize=5)
plt.errorbar(qw['label'],qw['Q'], qw['Q_err'], label='Ward', fmt='*r', ecolor='gray', lw=1, capsize=5)
plt.errorbar(qs['label'],qs['Q'], qs['Q_err'], label='Spectral', fmt='Db', ecolor='gray', lw=1, capsize=5)
plt.errorbar(qd['label'],qd['Q'], qd['Q_err'], label='DBSCAN', fmt='og', ecolor='gray', lw=1, capsize=5)
plt.legend(loc='upper right', shadow=True)
plt.xlabel("Label")
plt.show()

In [None]:
## separation distance
angl2pc = 3600. * 150e6 * distclust / 3.1e13
print("## Angular distance (1deg) : %3.1f pc"%(angl2pc))
plot2d(source.df, labels_d,0, cmap = "hsv")