In [9]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import HBox, VBox, Label, IntSlider, FloatSlider, Button, Output
from IPython.display import display

def rand_2D_points(num_points, num_centroids, **kwargs):
    
    space = kwargs.get('space', 20)
    noise = kwargs.get('noise' , 0.1)
    
    points    = np.zeros(shape = (num_points, 2))
    centroids = np.zeros(shape = (num_centroids, 2))
    
    np.random.seed()
    
    for i in range(num_centroids):
        centroids[i] = space*(np.random.rand(2) - 0.5)
    
    for i in range(num_points):
        
        points[i] = np.random.normal(
            loc = centroids[np.random.choice(num_centroids)],
            scale = noise * space,
            size = (2)
        )
        
    return points

def find_dist(a, b):
    return np.sqrt(np.sum([(a[i] - b[i])**2 for i in range(len(a))]))

def kmeans(data, centroids, max_iter = 100):
    
    n = data.shape[0]
    s = np.zeros(n, dtype = int)
    
    if type(centroids) == int:
        ## pick the centroids at random
        centroids = data[np.random.choice(n, centroids), :]
    
    num = centroids.shape[0]
    
    c_new = 0
    
    for i in range(max_iter):
        
        c_old = c_new
    
        for j in range(n):
            d = np.array([find_dist(data[j], centroids[k]) for k in range(num)])
            s[j] = np.argmin(d)
            
        #centroids = np.array([np.mean(data[s == j], axis = 0) for j in range(num)])
        
        ## update centroids
        for j in range(num):
            centroids[j] = np.mean(data[s == j], axis = 0)
            
        ## calculate coherence
        c_new = np.sum([np.linalg.norm(data[j] - centroids[s[j]]) for j in range(n)])
        
        if c_new == c_old: break

    return s, centroids, c_new

In [10]:
points = np.array([])

def generate_on_click(b):
    
    global points
    
    output_points.clear_output(wait = True)
    
    num_points = points_box.value
    num_centroids = centroids_box.value
    noise = noise_box.value
    
    points = rand_2D_points(num_points, num_centroids, noise = noise)
    
    fig, ax = plt.subplots(figsize = (6,6))

    plt.plot(points[:,0], points[:,1], '.')
    
    plt.xticks([])
    plt.yticks([])
    
    plt.axis('equal')
    plt.tight_layout()
    
    with output_points:
        plt.show()
    
points_box = IntSlider(
    value = 100,
    min = 10,
    max = 1000
)

centroids_box = IntSlider(
    value = 3,
    min = 1,
    max = 8
)

noise_box = FloatSlider(
    value = 0.1,
    min = 0.01,
    max = 0.20,
    step = 0.01,
    readout_format = '0.2f'
)

generate_button = Button(
    description = 'Generate Points'
)

generate_button.on_click(generate_on_click)

output_points = Output()

display(
    HBox([VBox([Label('Number of points: '), Label('Number of centroids: '), Label('Noise: ')]),
         VBox([points_box, centroids_box, noise_box])]),
    generate_button,
    output_points
)

HBox(children=(VBox(children=(Label(value='Number of points: '), Label(value='Number of centroids: '), Label(v…

Button(description='Generate Points', style=ButtonStyle())

Output()

In [11]:
import time

def cluster_on_click(b):
    
    max_iter = 20
    delay = speed_box.value
    
    num_k = num_k_box.value
    centroids = num_k
    
    coherence = 0
    
    for i in range(max_iter):
        
        old_coherence = coherence
    
        s, centroids, coherence = kmeans(points, centroids, max_iter = 1)

        fig, ax = plt.subplots(figsize = (6, 6))

        for k in range(num_k):
            plt.plot(points[s == k][:,0], points[s == k][:,1], '.')
            plt.plot(*centroids[k], 'x', color = 'k')

        plt.xticks([])
        plt.yticks([])
        
        plt.axis('equal')
        plt.tight_layout()
    
        output_clusters.clear_output(wait = True)
    
        with output_clusters:
            print(f'Iteration {i + 1}')
            print(f'Coherence = {coherence:.2f}')
            plt.show()
            
        if coherence == old_coherence: break
            
        time.sleep(delay)
    
num_k_box = IntSlider(
    value = 3,
    min = 1,
    max = 8
)

speed_box = FloatSlider(
    value = 2.0,
    min = 0,
    max = 3,
    step = 0.1,
    readout_format = '0.1f'
)

cluster_button = Button(
    description = 'Find clusters'
)

cluster_button.on_click(cluster_on_click)

output_clusters = Output()

display(
    HBox([VBox([Label('Number of centroids: '), Label('Speed of Animation: ')]),
          VBox([num_k_box, speed_box])]),
    cluster_button,
    output_clusters
)

HBox(children=(VBox(children=(Label(value='Number of centroids: '), Label(value='Speed of Animation: '))), VBo…

Button(description='Find clusters', style=ButtonStyle())

Output()