In [2]:
import pandas as pd
import numpy as np
import multiprocessing as mp

In [3]:
class Kmeans:
    """
    KMeans algorithm:
    * initialization: assigns random positions to clusters given mean
    and standard deviation of data
    """
    def __init__(self,n_clusters=5,epsilon=0.01):
        self.n_clusters=n_clusters
        self.epsilon=epsilon
     
    #determine new means given clusters
    def __means(self,x):
        distances=[]
        for x_cl in self.clusters_centers:
            d=x-x_cl
            d=(d*d).sum(1)
            d=np.sqrt(d)
            distances.append(d.reshape(-1,1))
        distances=np.concatenate(distances,axis=1)
        self.clusters=distances.argmin(1)
        
        #re-evaluate cluster centers
        deviation=[]
        for i in range(self.n_clusters):
            idx=(self.clusters==i)
            i_mean=x[idx].mean(0)
            
            dev_=self.clusters_centers[i]-i_mean
            dev_=(dev_*dev_).sum()
            deviation.append(np.sqrt(dev_))
            
            self.clusters_centers[i]=i_mean
        
        return np.array(deviation)
    
    def fit(self,x):
        std_dev=x.std(0)
        idx=np.random.choice(np.arange(x.shape[0]),self.n_clusters,replace=False)
        self.clusters_centers=x[idx]+std_dev
        self.clusters=[]
        
        not_converged=True
        while not_converged:
            
            deviations=self.__means(x)
            deviations=np.abs(deviations)
            if (deviations>=self.epsilon).sum()==0:
                not_converged=False
                print('Converged')

In [4]:
x1=np.random.normal(0,1,(100,2))
x2=np.random.normal(1,1,(100,2))
x3=np.random.normal(2,1,(100,2))
x4=np.random.normal(3,1,(100,2))
x5=np.random.normal(4,1,(100,2))

x=np.concatenate((x1,x2,x3,x4,x5))

In [5]:
kmeans=Kmeans(5,0.001)

In [6]:
kmeans.fit(x)

Converged


In [None]:
kmeans.clusters

In [8]:
kmeans.clusters_centers

array([[ 0.87682169,  1.79857893],
       [ 4.51959439,  3.91503735],
       [ 2.92177033,  1.52078147],
       [ 0.25534296, -0.24810257],
       [ 2.8146445 ,  3.8743067 ]])