In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
from time import time

In [3]:
os.chdir('..')

In [4]:
from miscellaneous import centering
from clustering import Kmeans, RandomSwap
from experiment import AlgorithmPipeline

### Generate Dataset

In [5]:
from generation import generate_set

In [6]:
%%time
X, Y = generate_set(1000, 10, 5, 100, 0.6)

Wall time: 1.99 ms


### Run Kmeans

In [7]:
%%time
km_pipe = AlgorithmPipeline(data = centering(X),
                            algorithm=Kmeans)

Wall time: 996 µs


In [8]:
%%time
L = km_pipe.run(k_range = np.arange(2,31),
                exec_number=30,
                max_iter = 100)

Wall time: 9.84 s


In [9]:
L.shape

(29, 30, 1000)

### Run RandomSwap

In [10]:
%%time
rs_pipe = AlgorithmPipeline(data = centering(X),
                            algorithm=RandomSwap)

Wall time: 0 ns


In [11]:
%%time
L = km_pipe.run(k_range = np.arange(2,31),
                exec_number=30,
                max_iter = 100)

Wall time: 10.6 s


In [12]:
L.shape

(29, 30, 1000)

### Check indices

In [13]:
from metrics import bss_matrix, wss_matrix

In [14]:
%time SSW = wss_matrix(centering(X), L, 'conventional')
%time SSB = bss_matrix(centering(X), L, 'conventional')

Wall time: 708 ms
Wall time: 1.11 s


In [15]:
print(f'{SSW.shape} and {SSB.shape}')

(29, 30) and (29, 30)


In [16]:
from metrics import calinski_harabasz_matrix, elbow, hartigan, wb_index_matrix, xu_index_matrix, silhouette_matrix

In [17]:
%%time
calinski_harabasz_matrix(centering(X), L, SSW, SSB, np.mean)

Wall time: 28.9 ms


array([ 829.03665279,  885.6796091 , 1596.22919782, 3323.48704826,
       2740.28724056, 2343.67244009, 2045.40005985, 1824.20017195,
       1652.89297369, 1510.29109625, 1391.48022569, 1301.1461963 ,
       1212.08870262, 1142.2179115 , 1086.38505971, 1033.58423556,
        978.21021927,  936.41801002,  899.60204895,  869.45004128,
        830.92462899,  799.72384847,  775.35690996,  742.32564683,
        722.75118366,  703.75504972,  681.88918478,  663.00763835,
        641.9683739 ])

In [18]:
%%time
elbow(SSW, (1,1), np.mean)

Wall time: 0 ns


array([          -inf, 9.90084081e-01, 1.83298457e+00, 5.14216293e+01,
       1.18557386e+00, 1.44641721e+00, 9.63716129e-01, 1.01319488e+00,
       1.26971484e+00, 1.13738644e+00, 6.99100735e-01, 2.08642765e+00,
       6.52248121e-01, 8.04303469e-01, 1.28456123e+00, 2.41502503e+00,
       4.57543101e-01, 9.81889058e-01, 8.34179775e-01, 4.09957092e+00,
       4.87361811e-01, 6.44724420e-01, 2.96707446e+02, 3.28179824e-03,
       1.12548522e+00, 1.90708047e+00, 7.79186232e-01, 2.41901115e+00,
                 -inf])

In [19]:
%%time
elbow(SSW, (2,2), np.mean)

Wall time: 0 ns


array([       -inf,        -inf,  3.57820775, 79.02293752, 36.7453331 ,
        1.55142037,  1.18655213,  1.11302929,  1.3602446 ,  1.06218453,
        1.01011318,  1.39945851,  0.89738601,  0.74721781,  1.63904976,
        1.73194877,  0.77412124,  0.65088098,  1.32905748,  2.46385515,
        0.97424034,  0.9557174 ,  1.59628536,  0.5173486 ,  0.74075517,
        1.77519869,  1.60263889,        -inf,        -inf])

In [20]:
%%time
elbow(SSW, (2,1), np.mean)

Wall time: 0 ns


array([        -inf,         -inf,   3.64779341, 145.6766822 ,
        62.1497135 ,   3.16125165,   2.35765173,   1.98962714,
         2.55618342,   2.58154287,   1.49424843,   3.54505075,
         2.01311664,   1.3289089 ,   2.31773828,   5.51727254,
         1.56252114,   1.43114562,   1.65325177,   7.51935006,
         2.48533612,   0.95893848, 488.00198209,   0.97701577,
         1.12917883,   4.05347136,   2.26515708,   4.30387133,
               -inf])

In [21]:
%%time
elbow(SSW, (1,2), np.mean)

Wall time: 0 ns


array([          -inf, 6.40599621e-01, 1.79801838e+00, 2.78938821e+01,
       7.00957479e-01, 7.09845775e-01, 4.85016260e-01, 5.66797449e-01,
       6.75664640e-01, 4.67981490e-01, 4.72592676e-01, 8.23646580e-01,
       2.90752324e-01, 4.52243100e-01, 9.08411354e-01, 7.58110028e-01,
       2.26680985e-01, 4.46560367e-01, 6.70601350e-01, 1.34330080e+00,
       1.91043592e-01, 6.42558789e-01, 9.70548828e-01, 1.73777515e-03,
       7.38332117e-01, 8.35196913e-01, 5.51288107e-01,           -inf,
                 -inf])

In [22]:
%%time
hartigan(centering(X), L, SSW, np.mean)

Wall time: 27.9 ms


array([5.15187780e+02, 1.08730389e+03, 1.46525255e+03, 2.93054823e+01,
       2.53232553e+01, 1.78038416e+01, 1.88057546e+01, 1.88960284e+01,
       1.50939707e+01, 1.34376424e+01, 1.95828502e+01, 9.46633782e+00,
       1.47152816e+01, 1.86230186e+01, 1.46994093e+01, 6.11834117e+00,
       1.35429645e+01, 1.39751944e+01, 1.70272036e+01, 4.16684651e+00,
       8.61638443e+00, 1.35359253e+01, 4.55758317e-02, 1.40736724e+01,
       1.26541577e+01, 6.67405001e+00, 8.63267084e+00, 3.57815296e+00,
                  inf])

In [23]:
%%time
wb_index_matrix(L, SSW, SSB, np.mean)

Wall time: 29.9 ms


array([2.4076137 , 1.68853385, 0.83196072, 0.37423043, 0.43528284,
       0.49430969, 0.55427508, 0.61115826, 0.66549983, 0.72032471,
       0.7745839 , 0.82177545, 0.87604657, 0.92395429, 0.96613994,
       1.01050061, 1.06292562, 1.10580957, 1.14670587, 1.1822991 ,
       1.23304978, 1.27720224, 1.31350449, 1.36816639, 1.40153351,
       1.43575961, 1.47824606, 1.51684312, 1.56308055])

In [24]:
%%time
xu_index_matrix(X, L, SSW, np.mean)

Wall time: 30.9 ms


array([-70.90576185, -73.3256588 , -78.23389957, -84.44216141,
       -84.38872267, -84.34798014, -84.28365029, -84.2493295 ,
       -84.23371178, -84.2054676 , -84.17738493, -84.20362738,
       -84.1656366 , -84.17306867, -84.21520447, -84.23481065,
       -84.19715254, -84.21805284, -84.24619275, -84.30018485,
       -84.26373895, -84.262947  , -84.30090115, -84.24234464,
       -84.28924533, -84.32800616, -84.3248994 , -84.33812129,
       -84.31577193])

In [25]:
%%time
silhouette_matrix(X, L, 'mean', np.max)

  a = np.sum(distances * bool_matrix, axis=1) / np.sum(bool_matrix, axis=1)


Wall time: 4min 5s


array([0.47342076, 0.58865396, 0.65485474, 0.72237253, 0.59241904,
       0.51377289, 0.41191009, 0.50977879, 0.3418199 ,        nan,
              nan, 0.33942444,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan])