In [1]:
%matplotlib notebook

import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

# Make Data

In [2]:
X, y = make_blobs(n_samples=1000, centers=[[-5,-5],[5,5],[-4,5],[4,-5]], n_features=2, random_state=2, 
                  shuffle=True, cluster_std=1.8)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

plt.figure()
plt.scatter(X_train.T[0], X_train.T[1])
plt.xlim(-10,10)
plt.ylim(-10,10)

<IPython.core.display.Javascript object>

(-10.0, 10.0)

# Code

In [26]:
class MeanShift:
    def __init__(self, λ):
        self.λ = λ # Bandwidth
    
    def fit(self, X, n, kernel, max_iter=50):
        '''
        X = training data
        n = number of modes/clusters
        kernel = 'flat' or 'gaussian'
        '''
        
        # Selects n random rows from the training data
        np.random.shuffle(X)
        m = X[:n] # This will be iteratively updated in the next for loop
        
        for iteration in range(max_iter):
            # Updates the modes
            for i in range(n):
                m[i] = (self.__Kernel(X - m[i], self.λ, kernel) @ X) / np.sum(self.__Kernel(X - m[i], self.λ, kernel), axis=0)
        self.m = m
        self.labels = self.predict(X) # Assigns each training example its corresponding class
        
    def predict(self, x):
        return np.array([np.argmin(self.__predict_each(x_i)) for x_i in x])
    
    # Private methods
    def __Kernel(self, x, λ, kernel_type):
        if kernel_type=='flat':
            return np.array([1 if np.linalg.norm(x_i) <= λ else 0 for x_i in x])
            
        if kernel_type=='gaussian':
            # We remove the constant at the front because it suppresses the value of the exponential
            # and thus potentially contributing to slower convergence
            return np.array([np.exp(-np.linalg.norm(x_i)/(2*λ**2)) for x_i in x])
        
    def __predict_each(self, x):
        # To classify, one only computes the distance of a new point x to all the modes.
        # Its corresponding classification is then the mode nearest to it.
        # Alternatively, we can use the Gaussian and simply select the one with the highest probability
        return np.array([np.linalg.norm(m - x) for m in self.m])

# Test our code

### Flat kernel

In [66]:
λ = 2.5
n = 4
meanshift_test = MeanShift(λ)
meanshift_test.fit(X_train, n, 'flat', 100)
meanshift_test.m

plt.figure()
plt.scatter(X_train.T[0], X_train.T[1], c=meanshift_test.labels)
plt.scatter(X_test.T[0], X_test.T[1], c=meanshift_test.predict(X_test), marker='+')
plt.scatter(meanshift_test.m.T[0], meanshift_test.m.T[1], marker='x', c='red', s=50)
plt.title('Clustering of Multiple Classes Using Mean Shift\nwith a Flat Kernel')
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")

<IPython.core.display.Javascript object>

Text(0, 0.5, '$x_2$')

# Mean shift with a Gaussian kernel

In [31]:
λ = 0.5 # Bandwidth
n = 4
meanshift_test = MeanShift(λ)
meanshift_test.fit(X_train, n, 'gaussian')
meanshift_test.m # gives the centers of the clusters

array([[ 4.09008551, -5.15086821],
       [-5.06647277, -5.11132491],
       [ 4.86924973,  4.76485439],
       [-3.78794751,  4.71081543]])

In [32]:
plt.figure()
plt.scatter(X_train.T[0], X_train.T[1], c=meanshift_test.labels)
plt.scatter(X_test.T[0], X_test.T[1], c=meanshift_test.predict(X_test), marker='+')
plt.scatter(meanshift_test.m.T[0], meanshift_test.m.T[1], marker='x', c='red', s=50)
plt.title('Clustering of Multiple Classes Using Mean Shift\nwith a Gaussian kernel')
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")

<IPython.core.display.Javascript object>

Text(0, 0.5, '$x_2$')