In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
from time import time

In [3]:
os.chdir('..')

In [4]:
from miscellaneous import centering
from clustering import Kmeans, RandomSwap
from experiment import AlgorithmPipeline

### Generate Dataset

In [5]:
from generation import generate_set

In [6]:
%%time
X, Y = generate_set(1000, 10, 5, 100, 0.6)

Wall time: 0 ns


### Run Kmeans

In [7]:
%%time
km_pipe = AlgorithmPipeline(data = centering(X),
                            algorithm=Kmeans)

Wall time: 0 ns


In [8]:
%%time
L = km_pipe.run(k_range = np.arange(2,31),
                exec_number=30,
                max_iter = 100)

Wall time: 10.3 s


In [9]:
L.shape

(29, 30, 1000)

### Run RandomSwap

In [10]:
%%time
rs_pipe = AlgorithmPipeline(data = centering(X),
                            algorithm=RandomSwap)

Wall time: 0 ns


In [11]:
%%time
L = km_pipe.run(k_range = np.arange(2,31),
                exec_number=30,
                max_iter = 100)

Wall time: 10.1 s


In [12]:
L.shape

(29, 30, 1000)

### Check indices

In [13]:
from metrics import bss_matrix, wss_matrix

In [14]:
%time SSW = wss_matrix(centering(X), L, 'conventional')
%time SSB = bss_matrix(centering(X), L, 'conventional')

Wall time: 627 ms
Wall time: 973 ms


In [15]:
print(f'{SSW.shape} and {SSB.shape}')

(29, 30) and (29, 30)


In [16]:
from metrics import calinski_harabasz_matrix, elbow, hartigan, wb_index_matrix, xu_index_matrix, silhouette_matrix, silhouette_wss

In [17]:
%%time
calinski_harabasz_matrix(centering(X), L, SSW, SSB, np.mean)

Wall time: 14 ms


array([ 593.06907021,  665.58339156, 1082.62421459, 4243.37020494,
       3481.12486082, 2963.98093594, 2609.63619205, 2344.34271304,
       2108.60461983, 1942.87307402, 1806.82289067, 1688.06359136,
       1577.37970445, 1480.54657309, 1404.49559631, 1324.13169174,
       1276.90440618, 1216.72749831, 1164.77715286, 1121.35963062,
       1075.62902356, 1036.63684635, 1003.16632033,  975.43640932,
        941.25320951,  912.82391029,  889.14396757,  864.41087486,
        837.04358197])

In [18]:
%%time
elbow(SSW, (1,1), np.mean)

Wall time: 0 ns


array([        -inf,   1.0282866 ,   1.0793349 , 132.64036334,
         1.1872598 ,   0.82692515,   1.04386137,   2.17205274,
         0.53160996,   1.0559937 ,   1.20775131,   1.53639151,
         1.1377903 ,   0.69030345,   2.63261622,   0.26580122,
         2.59862314,   0.87325826,   0.80885156,   1.77043818,
         0.77658464,   0.84809913,   0.82430668,   2.54387795,
         0.64938749,   0.77506921,   1.37275708,   2.3399245 ,
               -inf])

In [19]:
%%time
elbow(SSW, (2,2), np.mean)

Wall time: 0 ns


array([        -inf,         -inf,   2.17281924, 149.70818265,
        71.81726417,   0.92375747,   1.30585076,   1.54087259,
         0.86611001,   0.88478322,   1.50412839,   1.80529915,
         1.17856594,   1.06948127,   0.93442335,   0.6972418 ,
         1.53339434,   1.40522209,   0.96827515,   1.3998664 ,
         0.98732155,   0.68080618,   1.09353225,   1.82715479,
         1.00486534,   0.7396114 ,   1.70715981,         -inf,
               -inf])

In [20]:
%%time
elbow(SSW, (2,1), np.mean)

Wall time: 0 ns


array([        -inf,         -inf,   2.18920052, 275.80373711,
       158.66583092,   1.80870014,   1.90705659,   4.43937468,
         1.68629484,   1.61737048,   2.4831291 ,   3.39197038,
         2.88588167,   1.47572402,   4.44992028,   0.96555381,
         3.28934032,   3.14252737,   1.51518787,   3.20245987,
         2.15147974,   1.50671989,   1.52340046,   4.64081354,
         2.30135   ,   1.27838946,   2.43673883,   5.55207243,
               -inf])

In [21]:
%%time
elbow(SSW, (1,2), np.mean)

Wall time: 0 ns


array([       -inf,  0.53375991,  1.07125849, 71.99810978,  0.53739202,
        0.4223355 ,  0.71478066,  0.75390269,  0.27304401,  0.57768181,
        0.73158219,  0.81770946,  0.46466247,  0.50027417,  0.55281397,
        0.1919393 ,  1.21140217,  0.39048882,  0.51689357,  0.77389789,
        0.3563774 ,  0.38321067,  0.59170649,  1.0015612 ,  0.28354965,
        0.44841579,  0.96174268,        -inf,        -inf])

In [22]:
%%time
hartigan(centering(X), L, SSW, np.mean)

Wall time: 31.3 ms


array([ 463.34543255,  821.3702402 , 3222.04617425,   24.87445946,
         21.3807283 ,   26.52019352,   26.04733881,   12.12668131,
         23.32565791,   22.57059132,   19.02913433,   12.53029294,
         11.1259301 ,   16.36892832,    6.25092171,   24.06918734,
          9.34095713,   10.80356857,   13.5274158 ,    7.69295218,
          9.9972679 ,   11.91960193,   14.66257554,    5.79219738,
          8.99267142,   11.73036367,    8.61203189,    3.69067297,
                 inf])

In [23]:
%%time
wb_index_matrix(L, SSW, SSB, np.mean)

Wall time: 16 ms


array([3.36554391, 2.24690102, 1.22664908, 0.29310429, 0.34264786,
       0.39085946, 0.43443385, 0.47555974, 0.52167201, 0.55994394,
       0.59652675, 0.63341808, 0.67317092, 0.7128159 , 0.74731455,
       0.78877162, 0.81428547, 0.85105334, 0.8856449 , 0.91669967,
       0.95253234, 0.98531042, 1.01522027, 1.04120063, 1.07618225,
       1.10692004, 1.13367468, 1.16342656, 1.19880051])

In [24]:
%%time
xu_index_matrix(X, L, SSW, np.mean)

Wall time: 15.6 ms


array([-68.5354446 , -70.70370339, -74.62683945, -84.7222333 ,
       -84.63749161, -84.56876728, -84.566435  , -84.58365934,
       -84.51947857, -84.55013026, -84.58753515, -84.60980841,
       -84.59398596, -84.57547236, -84.60137261, -84.55963521,
       -84.65184602, -84.64220435, -84.64729035, -84.67589149,
       -84.66529652, -84.67460382, -84.70076573, -84.74954457,
       -84.73573119, -84.74764567, -84.78171135, -84.79478126,
       -84.77326554])

In [25]:
%%time
silhouette_wss(centering(X), L, SSW, 'mean', np.argmin)

Wall time: 7.51 s


array([0.45548343, 0.55019358, 0.66737595, 0.76676761, 0.50766136,
       0.40273498, 0.38551156, 0.28008027, 0.20220361, 0.10371281,
       0.10235467, 0.10124305, 0.09753452, 0.09823959, 0.09780396,
       0.09173158, 0.09567475, 0.09466396, 0.08960068, 0.09197556,
       0.0920889 , 0.09319207, 0.09203726, 0.0944339 , 0.09005058,
       0.09491074, 0.09142941, 0.08995559, 0.09272456])

In [26]:
%%time
silhouette_matrix(centering(X), L, 'mean', np.max)

  a = np.sum(distances * bool_matrix, axis=1) / np.sum(bool_matrix, axis=1)


Wall time: 3min 44s


array([0.45548343, 0.55019358, 0.66737595, 0.76676761, 0.6892881 ,
       0.68673403, 0.57834397, 0.48040133, 0.47776043, 0.47619841,
       0.35402891, 0.29831804, 0.22029837, 0.22005699, 0.21615625,
       0.22012977, 0.20297094, 0.21496231, 0.21954665, 0.09737905,
       0.21611903, 0.21523121, 0.21571921, 0.19049256, 0.21575842,
       0.21788616, 0.09434203, 0.09631915, 0.09426517])