In [48]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import *
#from ipywidgets import HBox, VBox, Label, IntSlider, FloatSlider, Button, Output
from IPython.display import display

def rand_2D_points(num_points, num_centroids, **kwargs):
    
    space = kwargs.get('space', 20)
    noise = kwargs.get('noise' , 0.1)
    
    points    = np.zeros(shape = (num_points, 2))
    centroids = np.zeros(shape = (num_centroids, 2))
    
    np.random.seed()
    
    for i in range(num_centroids):
        centroids[i] = space*(np.random.rand(2) - 0.5)
    
    for i in range(num_points):
        
        points[i] = np.random.normal(
            loc = centroids[np.random.choice(num_centroids)],
            scale = noise * space,
            size = (2)
        )
        
    return points

def euclid_dist(a, b):
    return np.sqrt(np.sum((a - b)**2))

## k-means algorithm
## if centroids is an int, they will be chosen randomly from points
def kmeans(data, centroids, max_iter = 100):
    
    n = data.shape[0]
    s = np.zeros(n, dtype = int)
    
    if type(centroids) == int:
        ## pick the centroids at random
        centroids = data[np.random.choice(n, centroids), :]
    
    num = centroids.shape[0]
    
    c_new = 0
    
    for i in range(max_iter):
        
        c_old = c_new
    
        for j in range(n):
            d = np.array([euclid_dist(data[j], centroids[k]) for k in range(num)])
            s[j] = np.argmin(d)
        
        ## update centroids
        for j in range(num):
            centroids[j] = np.mean(data[s == j], axis = 0)
            
        ## calculate coherence
        c_new = np.sum([np.linalg.norm(data[j] - centroids[s[j]]) for j in range(n)])
        
        if c_new == c_old: break

    return s, centroids, c_new

def kernel(d, h):
    return (1/(h * np.sqrt(2 * np.pi))) * np.exp(-0.5 * (d/h)**2)

def kernel_density(xi, X, bandwidth):
    
    dist = np.array([euclid_dist(xi, X[j]) for j in range(X.shape[0])])
    
    return np.sum(kernel(dist, bandwidth))

def mean_shift(X, bandwidth, max_iter = 5):
    
    new_X = np.copy(X)
    
    for iteration in range(max_iter):
        
        for i, xi in enumerate(new_X):
            
            numer, denom = 0, 0
            
            for xj in X:
                
                dist = euclid_dist(xi, xj)
                
                weight = kernel(dist, bandwidth)
                
                numer += weight * xj
                denom += weight
                
            new_X[i] = numer / denom
            
    return new_X
            
def mean_shift2(X, max_dist, bandwidth, max_iter = 5):
    
    new_X = np.zeros_like(X)
    
    for iteration in range(max_iter):
        
        for i, xi in enumerate(X):
            
            numer, denom = 0, 0
            
            for xj in X:
                
                dist = euclid_dist(xi, xj)
                
                if dist > max_dist: continue
                
                weight = kernel(dist, bandwidth)
                
                numer += weight * xj
                denom += weight
                
            new_X[i] = numer / denom
            
        X = new_X
            
    return X
                

In [2]:
points = np.array([])

def generate_on_click(b):
    
    global points
    
    output_points.clear_output(wait = True)
    
    num_points = points_box.value
    num_centroids = centroids_box.value
    noise = noise_box.value
    
    points = rand_2D_points(num_points, num_centroids, noise = noise)
    
    fig, ax = plt.subplots(figsize = (6,6))

    plt.plot(points[:,0], points[:,1], '.')
    
    plt.xticks([])
    plt.yticks([])
    
    plt.axis('equal')
    plt.tight_layout()
    
    with output_points:
        plt.show()
    
points_box = IntSlider(
    value = 100,
    min = 10,
    max = 1000
)

centroids_box = IntSlider(
    value = 3,
    min = 1,
    max = 8
)

noise_box = FloatSlider(
    value = 0.1,
    min = 0.01,
    max = 0.20,
    step = 0.01,
    readout_format = '0.2f'
)

generate_button = Button(
    description = 'Generate Points'
)

generate_button.on_click(generate_on_click)

output_points = Output()

display(
    HBox([VBox([Label('Number of points: '), Label('Number of centroids: '), Label('Noise: ')]),
         VBox([points_box, centroids_box, noise_box])]),
    generate_button,
    output_points
)

HBox(children=(VBox(children=(Label(value='Number of points: '), Label(value='Number of centroids: '), Label(v…

Button(description='Generate Points', style=ButtonStyle())

Output()

In [3]:
import time

def cluster_on_click(b):
    
    max_iter = 20
    delay = speed_box.value
    
    num_k = num_k_box.value
    centroids = num_k
    
    coherence = 0
    
    for i in range(max_iter):
        
        old_coherence = coherence
    
        s, centroids, coherence = kmeans(points, centroids, max_iter = 1)

        fig, ax = plt.subplots(figsize = (6, 6))

        for k in range(num_k):
            plt.plot(points[s == k][:,0], points[s == k][:,1], '.')
            plt.plot(*centroids[k], 'x', color = 'k')

        plt.xticks([])
        plt.yticks([])
        
        plt.axis('equal')
        plt.tight_layout()
    
        output_clusters.clear_output(wait = True)
    
        with output_clusters:
            print(f'Iteration {i + 1}')
            print(f'Coherence = {coherence:.2f}')
            plt.show()
            
        if coherence == old_coherence: break
            
        time.sleep(delay)
    
num_k_box = IntSlider(
    value = 3,
    min = 1,
    max = 8
)

speed_box = FloatSlider(
    value = 2.0,
    min = 0,
    max = 3,
    step = 0.1,
    readout_format = '0.1f'
)

cluster_button = Button(
    description = 'Find clusters'
)

cluster_button.on_click(cluster_on_click)

output_clusters = Output()

display(
    HBox([VBox([Label('Number of centroids: '), Label('Speed of Animation: ')]),
          VBox([num_k_box, speed_box])]),
    cluster_button,
    output_clusters
)

HBox(children=(VBox(children=(Label(value='Number of centroids: '), Label(value='Speed of Animation: '))), VBo…

Button(description='Find clusters', style=ButtonStyle())

Output()

In [56]:
#new_p = mean_shift(points, 2, max_iter = 10)

def contours_on_click(b):
    
    bandwidth = bandwidth_box.value
    
    fig, ax = plt.subplots(figsize = (6,6))

    plt.plot(points[:,0], points[:,1], '.')
    #plt.plot(new_p[:,0], new_p[:,1], '.')

    x1 = np.linspace(*ax.get_xlim())
    y1 = np.linspace(*ax.get_ylim())

    z = np.zeros((len(x1), len(y1)))

    for i in range(len(x1)):
        for j in range(len(y1)):
        
            z[j, i] = kernel_density((x1[i], y1[j]), points, bandwidth)
            progress_box.value = i/len(x1) + j/(len(y1) * len(x1))

    plt.contour(x1, y1, z)
    
    plt.xticks([])
    plt.yticks([])
    
    plt.title(f'KDE contour with bandwidth = {bandwidth}')
    
    plt.axis('equal')
    plt.tight_layout()
    
    output_contours.clear_output(wait = True)
    
    with output_contours:
        plt.show()

bandwidth_box = FloatSlider(
    value = 1.5,
    min = 0,
    max = 3,
    step = 0.1,
    readout_format = '0.1f'
)

progress_box = FloatProgress(
    value = 0,
    min = 0,
    max = 1,
    #description = 'Progress: '
)

contours_button = Button(
    description = 'Show KDE contours'
)

contours_button.on_click(contours_on_click)

output_contours = Output()

fig, ax = plt.subplots(figsize = (6,6))

plt.plot(points[:,0], points[:,1], '.')

plt.xticks([])
plt.yticks([])
    
plt.title('Raw points')
    
plt.axis('equal')
plt.tight_layout()

with output_contours:
    plt.show()

display(
    HBox([Label('Bandwidth: '), bandwidth_box]),
    contours_button,
    progress_box,
    output_contours
)


HBox(children=(Label(value='Bandwidth: '), FloatSlider(value=1.5, max=3.0, readout_format='0.1f')))

Button(description='Show KDE contours', style=ButtonStyle())

FloatProgress(value=0.0, max=1.0)

Output()