In [20]:
%matplotlib inline  

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [21]:
# Instead of arbitrarily generating points, this is a hack
# to generate random points in n distinct clusters

def create_samples(n_clusters, n_points, seed):
    np.random.seed(seed)
    slices=[]
    centroids=[]
    
    # for each centroid generate a cluster around it
    for i in range(n_clusters):
        embiggen_factor = 60
        samples = tf.random_normal((n_points, 2),
                               mean=0.0, stddev=5.0, dtype=tf.float32, seed=seed, name="cluster_{}".format(i))
        cur_centroid = (np.random.random((1, 2))* embiggen_factor) - (embiggen_factor/2)
        centroids.append(cur_centroid)
        samples += cur_centroid
        slices.append(samples)
    samples = tf.concat(0, slices, name='samples')
    centroids = tf.concat(0, centroids, name='centroids')
    return centroids, samples

In [22]:
def plot_clusters(all_samples, centroids, n_points):
    # colours for plotting clusters
    colour = plt.cm.rainbow(np.linspace(0,1,len(centroids)))
    for i, centroid in enumerate(centroids):
        samples = all_samples[i*n_points:(i+1)*n_points]
        plt.scatter(samples[:,0], samples[:,1], c=colour[i])
        plt.plot(centroid[0], centroid[1], markersize=4, marker="x", color='m', mew=5)
    plt.show()

In [23]:
# to implement the k-means algortithm 
# we first need to guess the positions of the centroids

def choose_random_centroids(samples, n_clusters,seed):
    n_samples = tf.shape(samples)[0]
    random_indices = tf.random_shuffle(tf.range(0, n_samples),seed=seed)
    begin = [0,]
    size = [n_clusters,]
    size[0] = n_clusters
    centroid_indices = tf.slice(random_indices, begin, size)
    initial_centroids = tf.gather(samples, centroid_indices)
    return initial_centroids

In [24]:
# Then we must assign each point to it's cluster
# based on which centroid it is closet to

def assign_to_cluster(sample,centroids):
    expanded_vectors = tf.expand_dims(samples, 0)
    expanded_centroids = tf.expand_dims(centroids, 1)
    # here euclidean distance is used
    distances = tf.reduce_sum( tf.square(
               tf.sub(expanded_vectors, expanded_centroids)), 2)
    mins = tf.argmin(distances, 0)
    nearest_indices = mins
    return nearest_indices

In [25]:
# After re-assigning we must update the centroids


def update_centroids(samples, nearest_indices, n_clusters):
    nearest_indices = tf.to_int32(nearest_indices)
    partitions = tf.dynamic_partition(samples, nearest_indices, n_clusters)
    new_centroids = tf.concat(0, [tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions])
    return new_centroids