###  Libraries

In [1]:
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import pairwise_kernels
from tslearn.clustering import KernelKMeans as TSKernelKMeans
from KernelKMeans import KernelKMeans
from GlobalKernelKMeans import GlobalKernelKMeans, GlobalKernelKMeansPP 
from Datasets.Datasets_Functions import *
from Rings import Rings
from Common_Modules.Evaluation import Evaluator
from Common_Modules.Graph import Graph
from Common_Modules.Visualization import Visualization
from Common_Modules.General_Functions import General_Functions

KeyboardInterrupt: 

### Global Variables

In [None]:
seed = 42
evaluator = Evaluator()
graph_is_enabled = False
kMeans_is_enabled = False
kernelKMeans_is_enabled = False
kernelKMeansPP_is_enabled = False
globalKernelKMeans_is_enabled = False
globalKernelKMeansPP_is_enabled = True
n_candidates = n_init = 100
kernel = 'cosine'
gamma = 1.0

### Visualization

In [None]:
visualization = Visualization()
colors = ['red', 'blue', 'green', 'orange', 'purple', 'yellow', 'black', 'cyan', 'gray', 'lime']

### Load Dataset

### 2Rings

In [None]:
if(False):
    # centers_coordinates = [(-1, 2), (2, 2), (2, 5), (2, -1), (5, 2), (6, 6), (-2, 6), (2, 8), (6, -2), (2, -4), (-2, -2)]
    centers_coordinates = [(-1, 2), (2, 2)]
    rings = Rings(colors)
    X, y = rings.make_rings_pairs(centers_coordinates=centers_coordinates, n_samples=100, factor=0.2, noise=0.0)

    K = 2 * len(centers_coordinates)
    kernel_matrix = pairwise_kernels(X, X, metric='rbf', gamma=1)

### 3Rings

In [None]:
if (False):
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.colors import ListedColormap

    class Rings:
        def __init__(self, colors, seed=42):
            self.colors = colors
            self.seed = seed

        def plot(self, X, labels_):
            plt.scatter(X[:, 0], X[:, 1], c=labels_, cmap=ListedColormap(self.colors))
            plt.show()

        def move_rings(self, center_coordinates, X):
            X[:, 0] += center_coordinates[0]
            X[:, 1] += center_coordinates[1]
            return X

        def generate_circle(self, n_samples, radius, noise):
            angles = np.linspace(0, 2 * np.pi, n_samples, endpoint=False)
            X = np.c_[radius * np.cos(angles), radius * np.sin(angles)]
            X += noise * np.random.randn(n_samples, 2)
            return X

        def make_concentric_rings(self, centers_coordinates, n_samples=300, radii=[1, 2, 3], noise=0.05):
            pairs = []
            label = 0

            for center_coordinates in centers_coordinates:
                X_list, y_list = [], []
                for i, radius in enumerate(radii):
                    X = self.generate_circle(n_samples // len(radii), radius, noise)
                    y = np.full(X.shape[0], i)  # Assign unique label for each ring
                    X_list.append(X)
                    y_list.append(y)

                # Concatenate all rings
                X = np.concatenate(X_list)
                y = np.concatenate(y_list)

                X = self.move_rings(center_coordinates, X)

                pairs.append((X, y))
                label += len(radii)

            return self.concatenate_pairs(pairs)

        def concatenate_pairs(self, pairs):
            X = np.concatenate([X for X, _ in pairs])
            y = np.concatenate([y for _, y in pairs])
            self.plot(X, y)
            return X, y

    # Example usage:
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
    rings = Rings(colors=colors)

    # Define the center for the single set of three concentric rings
    centers_coordinates = [(0, 0)]

    # Generate and plot the rings
    X, y = rings.make_concentric_rings(centers_coordinates=centers_coordinates, n_samples=150, radii=[1, 10, 25], noise=0)

    K = 3 * len(centers_coordinates)
    kernel_matrix = pairwise_kernels(X, X, metric='sigmoid')
    print(X)

### 1 Ring with 2 Gaussians Inside

In [None]:
if(False):
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.colors import ListedColormap
    from sklearn.metrics.pairwise import pairwise_kernels
    import copy
    
    class Rings:
        def __init__(self, colors, seed=42):
            self.colors = colors
            self.seed = seed
            np.random.seed(self.seed)
    
        def plot(self, X, labels_):
            plt.scatter(X[:, 0], X[:, 1], c=labels_, cmap=ListedColormap(self.colors))
            plt.axis('off')
            plt.show()
    
        def generate_circle(self, n_samples, radius, noise):
            angles = np.linspace(0, 2 * np.pi, n_samples, endpoint=False)
            X = np.c_[radius * np.cos(angles), radius * np.sin(angles)]
            X += noise * np.random.randn(n_samples, 2)
            return X
    
        def generate_gaussians(self, n_samples, means, cov, labels):
            X = np.vstack([np.random.multivariate_normal(mean, cov, n_samples) for mean in means])
            y = np.hstack([[label]*n_samples for label in labels])
            return X, y
    
        def make_multiple_rings_with_gaussians(self, centers_coordinates, n_samples=100, radius=10, noise=0.05, gaussian_samples=50):
            X_list, y_list = [], []
            label_offset = 0
            
            for center_coordinates in centers_coordinates:
                # Generate ring
                X_ring = self.generate_circle(n_samples, radius, noise)
                y_ring = np.full(X_ring.shape[0], label_offset)  # Label for the ring
                X_ring = self.move_rings(center_coordinates, X_ring)
                X_list.append(X_ring)
                y_list.append(y_ring)
                
                # Generate 2 Gaussians inside the ring
                gaussians_X, gaussians_y = self.generate_gaussians(
                    gaussian_samples,
                    means=[(center_coordinates[0] - 1, center_coordinates[1]), 
                           (center_coordinates[0] + 1, center_coordinates[1])],  # Centered near the ring's center
                    cov=[[0.1, 0], [0, 0.1]],  # Covariance matrix
                    labels=[label_offset + 1, label_offset + 2]  # Unique labels for Gaussians
                )
    
                X_list.append(gaussians_X)
                y_list.append(gaussians_y)
    
                label_offset = label_offset + 3
            # Concatenate all parts
            X = np.concatenate(X_list)
            y = np.concatenate(y_list)
    
            self.plot(X, y)
            return X, y
    
        def move_rings(self, center_coordinates, X):
            X[:, 0] += center_coordinates[0]
            X[:, 1] += center_coordinates[1]
            return X
    
    # Usage example
    colors = ['purple', 'orange', 'blue', 'red', 'green', 'lime']
    
    rings = Rings(colors)
    centers_coordinates = [(5, 5), (-5, 5)]
    X, y = rings.make_multiple_rings_with_gaussians(centers_coordinates, n_samples=100, radius=3, noise=0.0, gaussian_samples=50)
    
    K = 3 * len(centers_coordinates)
    kernel_matrix = pairwise_kernels(X, X, metric='rbf', gamma=0.5)

### 3Rings Paper

In [None]:
if (False):
    import scipy.io
    import numpy as np
    X_mat = scipy.io.loadmat('3circles_dataset.mat')
    X = np.array(X_mat['Dataset'])
    kernel_matrix_mat = scipy.io.loadmat('3circles_kernel_matrix.mat')
    kernel_matrix = np.array(kernel_matrix_mat['K'])
    y = np.loadtxt('array.txt').astype(int)
    K = 3
    rings.plot(X,y)
    print(y)

### Graphs

In [None]:
if(graph_is_enabled):
    # Number of nodes per community
    n_nodes_per_community = 10

    # Number of communities
    K = n_communities = 10
    
    # Probability of intra-community edge
    p_intra = 1  
    
    # Probability of inter-community edge
    p_inter = 0.01  

    graph = Graph(n_communities, n_nodes_per_community, p_intra, p_inter, colors)
    graph.create_adj_matrix()
    kernel_matrix = graph.adj_matrix = pairwise_kernels(graph.adj_matrix, graph.adj_matrix, metric='linear')
    graph.create_kernel_matrix_from_adj_matrix()

### Real Dataset

In [None]:
dataset_name = "waveform_v1" # The available datasets are: breast_cancer, dermatology, ecoli, iris, olivetti_faces, pendigits, waveform_v1, wine
module_name = "Datasets.Datasets"
batch_size = 16
dataloader, input_dim, X, y = function_get_dataset(dataset_name, module_name, batch_size)
print('Data_Shape is:', input_dim)
print('Batches Number is:', len(dataloader))
K = len(np.unique(y))
kernel_matrix = pairwise_kernels(X, X, metric=kernel)
data_dir_path = f"Results\\{dataset_name}\\{kernel}\\{n_init}\\"
General_Functions().create_directory(data_dir_path)
K = 50

Data_Shape is: 22
Batches Number is: 313
Directory 'Results\waveform_v1\cosine\100\' already exists.


  y = column_or_1d(y, warn=True)


### kMeans

In [None]:
if(kMeans_is_enabled):
    kmeans = KMeans(n_clusters=K).fit(X)
    acc, pur, nmi, ari = evaluator.evaluate_model(y, kmeans.labels_)
    evaluator.print_evaluation()
    visualization.plot_tsne(X, y, data_dir_path="Figures\\"+dataset_name)

### Kernel kMeans

In [None]:
if(kernelKMeans_is_enabled):
    kernel_kmeans = TSKernelKMeans(n_clusters=K, n_init=10, kernel='precomputed').fit(kernel_matrix)
    acc, pur, nmi, ari = evaluator.evaluate_model(y, kernel_kmeans.labels_)
    evaluator.print_evaluation()
    visualization.plot_tsne(X, y, data_dir_path="Figures\\"+dataset_name)

### Kernel kMeans++

In [None]:
if(kernelKMeansPP_is_enabled):
    n_iters_ = {}
    execution_times_ = {}
    inertias_ = {}
    #init = "random"
    init = "k-means++"
    #init = "forgy"
    #General_Functions.append_to_csv(f"{data_dir_path}KernelKMeans_Dataset_{dataset_name}_Init_{init}_Kernel_{kernel}_K_{K}_Inits_{n_init}_Results.csv", new_row)
    prev_K = K
    K = 47
    while (K != prev_K + 1):
        print(K)
        kernelKMeans = KernelKMeans(n_clusters=K, kernel_matrix=kernel_matrix, n_init=n_init, init=init, verbose=0)
        kernelKMeans.fit()
        if graph_is_enabled:
            graph.plot_clusters(kernelKMeans.labels_)
        #else:
            #evaluation_results = General_Functions().append_to_file(y, kernelKMeans.labels_, kernelKMeans.inertia_, kernelKMeans.execution_times_, f"{data_dir_path}KernelKMeans_{init}_Results.txt")
            #visualization.plot_tsne(X, y, data_dir_path=data_dir_path + "KernelKMeans_" + init)
            #General_Functions.create_csv(kernelKMeans.inertia_, kernelKMeans.n_iters_, kernelKMeans.execution_times_, f"{data_dir_path}KernelKMeans_{init}_Results.csv")        
            # That's the function of grount truth error
            #ground_truth_error = kernelKMeans.calculate_ground_truth_error(y)
            #print(f"Ground Truth Error is: {ground_truth_error}")    
        n_iters_[K] = sum(kernelKMeans.n_iters_.values())
        execution_times_[K] = sum(kernelKMeans.execution_times_.values())
        inertias_[K] = kernelKMeans.inertia_
        new_row = { "K": K, "MSE": inertias_[K], "ITERATIONS": n_iters_[K], "EXECUTION TIME": execution_times_[K]}
        General_Functions.append_to_csv(f"{data_dir_path}KernelKMeans_Dataset_{dataset_name}_Init_{init}_Kernel_{kernel}_K_{prev_K}_Inits_{n_init}_Results.csv", new_row)
        K += 1
    K = prev_K   

### Global Kernel kMeans

In [None]:
if(globalKernelKMeans_is_enabled):
    globalKernelKMeans = GlobalKernelKMeans(n_clusters=K, kernel_matrix=kernel_matrix, data_dir_path=f"{data_dir_path}GlobalKernelKMeans_Dataset_{dataset_name}_Kernel_{kernel}_K_{K}_Inits_{n_init}_Results.csv", verbose=1)
    globalKernelKMeans.fit()
    
    if graph_is_enabled:
        graph.plot_clusters(globalKernelKMeans.labels_[K])
    else:
        evaluation_results = General_Functions().append_to_file(y, globalKernelKMeans.labels_[K], globalKernelKMeans.inertia_[K], globalKernelKMeans.execution_times_, f"{data_dir_path}GlobalKernelKMeans_Results.txt")
        #visualization.plot_tsne(X, y, data_dir_path=data_dir_path + "\\GlobalKernelKMeans")
        #General_Functions.create_csv(globalKernelKMeans.inertia_, globalKernelKMeans.n_iters_, globalKernelKMeans.execution_times_, f"{data_dir_path}GlobalKernelKMeans_Dataset_{dataset_name}_Kernel_{kernel}_K_{K}_Inits_{n_init}_Results.csv")
# 0.265

Solving Kernel 2-means
Solved 2-means MSE: 374.123699923039 in 25762.097885370255s
Solving Kernel 3-means


KeyboardInterrupt: 

### Global Kernel kMeans++

In [None]:
if(True):
    #sampling = 'batch'
    sampling = 'sequential'
    globalKernelKMeansPP = GlobalKernelKMeansPP(n_clusters=K, kernel_matrix=kernel_matrix, n_candidates=n_candidates, sampling=sampling, data_dir_path=f"{data_dir_path}GlobalKernelKMeans_Dataset_{dataset_name}_Sampling_{sampling}_Kernel_{kernel}_K_{K}_Inits_{n_init}_Results.csv", verbose=1)
    globalKernelKMeansPP.fit()
    if graph_is_enabled:
        graph.plot_clusters(globalKernelKMeansPP.labels_[K])
    else:
        acc, pur, nmi, ari = evaluator.evaluate_model(y, globalKernelKMeansPP.labels_[K])
        evaluation_results = General_Functions().append_to_file(y, globalKernelKMeansPP.labels_[K], globalKernelKMeansPP.inertia_[K], globalKernelKMeansPP.execution_times_,  f"{data_dir_path}GlobalKernelKMeans_{sampling}_Results.txt")
        #General_Functions.create_csv(globalKernelKMeansPP.inertia_, globalKernelKMeansPP.n_iters_, globalKernelKMeansPP.execution_times_, f"{data_dir_path}GlobalKernelKMeans_Dataset_{dataset_name}_Sampling_{sampling}_Kernel_{kernel}_K_{K}_Inits_{n_init}_Results.csv")
        #visualization.plot_tsne(X, y, data_dir_path= f"{data_dir_path}GlobalKernelKMeans_{sampling}_")

Solving 2-means


KeyError: 1