###  Libraries

In [1]:
from sklearn.metrics.pairwise import pairwise_kernels
from KernelKMeans import KernelKMeans
from GlobalKernelKMeans import GlobalKernelKMeans, GlobalKernelKMeansPP 
from Datasets.Datasets_Functions import *
from Common_Modules.Visualization import Visualization
from Common_Modules.General_Functions import General_Functions

### Switches

In [2]:
plot_is_enabled = False
kernelKMeans_is_enabled = True
globalKernelKMeans_is_enabled = True
globalKernelKMeansPP_is_enabled = True

### Global Variables

In [3]:
n_candidates = n_init = 100
kernels = ["cosine", "polynomial", "rbf"]
kernel = kernels[2]

### Visualization

In [4]:
visualization = Visualization()

### Load Dataset

In [5]:
dataset_names = ["avila", "breast_cancer", "dermatology", "ecoli", "iris", "olivetti_faces", "pendigits", "waveform_v1", "wine"]
dataset_name = dataset_names[4] 
module_name = "Datasets.Datasets"
dataloader, input_dim, X, y = function_get_dataset(dataset_name, module_name)

gamma = General_Functions.calculate_gamma_scale(X)
if kernel == "rbf":
    kernel_matrix = pairwise_kernels(X, X, metric=kernel, gamma=gamma)
elif kernel == "polynomial":
    degree = 3
    coef0  = 1
    kernel_matrix = pairwise_kernels(X, X, metric=kernel, gamma=gamma, degree=degree, coef0=coef0)
else:
    kernel_matrix = pairwise_kernels(X, X, metric=kernel)
    
data_dir_path = f"Results\\{dataset_name}\\{kernel}\\{n_init}\\{gamma}\\"
General_Functions().create_directory(data_dir_path)
K = 50

Directory 'Results\iris\rbf\100\3.633944119437154\' already exists.


### Kernel kMeans++

In [6]:
if(kernelKMeans_is_enabled):
    initialization_methods = ["forgy", "random", "k-means++"]
    n_iters_ = {}
    execution_times_ = {}
    inertias_ = {}
    init = initialization_methods[1]
    prev_K = K
    K = 2
    while (K != prev_K + 1 ):
        kernelKMeans = KernelKMeans(n_clusters=K, kernel_matrix=kernel_matrix, n_init=n_init, init=init, verbose=0)
        kernelKMeans.fit()    
            
        if(plot_is_enabled):
            visualization.plot(X, kernelKMeans.labels_, data_dir_path=data_dir_path + "KernelKMeans_" + init)                
        
        n_iters_[K] = sum(kernelKMeans.n_iters_.values())
        execution_times_[K] = sum(kernelKMeans.execution_times_.values())
        inertias_[K] = kernelKMeans.inertia_
        new_row = { "K": K, "MSE": inertias_[K], "ITERATIONS": n_iters_[K], "EXECUTION TIME": execution_times_[K]}
        General_Functions.append_to_csv(f"{data_dir_path}KernelKMeans_Dataset_{dataset_name}_Init_{init}_Kernel_{kernel}_K_{prev_K}_Inits_{n_init}_Results.csv", new_row)
        K += 1
    K = prev_K   

### Global Kernel kMeans

In [7]:
if(globalKernelKMeans_is_enabled):
    globalKernelKMeans = GlobalKernelKMeans(n_clusters=K, kernel_matrix=kernel_matrix, data_dir_path=f"{data_dir_path}GlobalKernelKMeans_Dataset_{dataset_name}_Kernel_{kernel}_K_{K}_Inits_{n_init}_Results.csv", verbose=1)
    globalKernelKMeans.fit()    
    
    if(plot_is_enabled):
        visualization.plot(X, globalKernelKMeans.labels_[K], data_dir_path=f"{data_dir_path}GlobalKernelKMeans")     

Solving Kernel 2-means
Solved 2-means MSE: 52.88282712016293 in 0.10158300399780273s
Solving Kernel 3-means
Solved 3-means MSE: 38.31369052869924 in 0.11027836799621582s
Solving Kernel 4-means
Solved 4-means MSE: 31.780724228837997 in 0.12830448150634766s
Solving Kernel 5-means
Solved 5-means MSE: 27.230008766760097 in 0.1244816780090332s
Solving Kernel 6-means
Solved 6-means MSE: 24.041238378368874 in 0.135528564453125s
Solving Kernel 7-means
Solved 7-means MSE: 21.535352533949805 in 0.14791226387023926s
Solving Kernel 8-means
Solved 8-means MSE: 19.604115937888025 in 0.1473698616027832s
Solving Kernel 9-means
Solved 9-means MSE: 17.77666405180515 in 0.153151273727417s
Solving Kernel 10-means
Solved 10-means MSE: 16.33047586995295 in 0.14740753173828125s
Solving Kernel 11-means
Solved 11-means MSE: 14.889523477408813 in 0.16533803939819336s
Solving Kernel 12-means
Solved 12-means MSE: 13.998885601532727 in 0.15471887588500977s
Solving Kernel 13-means
Solved 13-means MSE: 13.1348989535

In [8]:
if(globalKernelKMeansPP_is_enabled):
    sampling_methods = ["sequential", "batch"]
    sampling = sampling_methods[0]
    globalKernelKMeansPP = GlobalKernelKMeansPP(n_clusters=K, kernel_matrix=kernel_matrix, n_candidates=n_candidates, sampling=sampling, data_dir_path=f"{data_dir_path}GlobalKernelKMeans_Dataset_{dataset_name}_Sampling_{sampling}_Kernel_{kernel}_K_{K}_Inits_{n_init}_Results.csv", verbose=1)
    globalKernelKMeansPP.fit()    
        
    if(plot_is_enabled):
        visualization.plot(X, globalKernelKMeansPP.labels_[K], data_dir_path=f"{data_dir_path}GlobalKernelKMeans_{sampling}_")

Solving 2-means
Solved 2-means MSE: 52.88282712016293 in 0.07924079895019531s
Solving 3-means
Solved 3-means MSE: 38.31369052869924 in 0.0770101547241211s
Solving 4-means
Solved 4-means MSE: 31.780724228837997 in 0.09595346450805664s
Solving 5-means
Solved 5-means MSE: 27.230008766760097 in 0.09512519836425781s
Solving 6-means
Solved 6-means MSE: 24.041238378368874 in 0.09900641441345215s
Solving 7-means
Solved 7-means MSE: 21.535352533949805 in 0.10484552383422852s
Solving 8-means
Solved 8-means MSE: 19.604115937888025 in 0.10194778442382812s
Solving 9-means
Solved 9-means MSE: 17.77666405180515 in 0.1055154800415039s
Solving 10-means
Solved 10-means MSE: 16.33047586995295 in 0.10446596145629883s
Solving 11-means
Solved 11-means MSE: 14.889523477408813 in 0.11458230018615723s
Solving 12-means
Solved 12-means MSE: 13.998885601532727 in 0.10906267166137695s
Solving 13-means
Solved 13-means MSE: 13.134898953548444 in 0.11740469932556152s
Solving 14-means
Solved 14-means MSE: 12.325649638