# Exercise 4: Mean Shift Algorithm for Face Recognition

This is an example of implementation of Mean-Shift algorithm for segmentation of images. The algorithms aims at recognizing faces (light pixels) from the background (darker pixels).
Mean-Shift algorithm determines the number of clusters and formes the clusters.

## 1. Developing Environment

In [None]:
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans, MeanShift

from sklearn import metrics
import matplotlib.pyplot as plt

# Python Imaging Library
from PIL import Image

from scipy.spatial.distance import cdist


## 2. Loading Data

In [None]:
# Read the image file
OUR_IMAGE = '../images/Destructuring.jpg'
#OUR_IMAGE = '../images/TechBBQ.jpg'
image = Image.open(OUR_IMAGE)
image

The algorithm expects the data stored into a data frame. Each pixel is presented by two coordinates and three colors in the RGB schema: red, green, and Blue, each measured with a value between 0 and 255.

In [None]:
image.size

In [None]:
# Convert image pixels to numpy array
pix_array = np.array(image)
pix_array

In [None]:
# For test, get the values of pixel[0][0]
pix_array[10][20]

In [None]:
# Alternativ way to get the pixel color values
pixels = image.load()
pixels[20,10]

In [None]:
# Convert the pixels into pandas dataframe, add the coordinates and the RGB values together
df = pd.DataFrame(
    [[x, y, pixels[x, y][0], pixels[x, y][1], pixels[x, y][2]]
        for x in range(image.size[0])
        for y in range(image.size[1])
     ],
    columns=['x', 'y', 'r', 'g', 'b']
)

In [None]:
df.head()

## 3. Training a Model

In [None]:
# Train mean-shift model with the data frame
ms_model = MeanShift()
ms_model.fit(df)

In [None]:
# See the clusters and cluster centres
ms_model.cluster_centers_

In [None]:
len(ms_model.labels_)

In [None]:
# Create a function to iterate through the data and create clusters
# Remember the variables cluster_centers_ and labels_ from last time?
def image_clusters(model, output, MY_IMAGE):
    # for each cluster
    for i in range(len(model.cluster_centers_)):
        image = Image.open(MY_IMAGE)
        pixels = image.load()
        # for each pixel
        for j in range(len(df)):
            if (model.labels_[j] != i):
                # make the rest white
                pixels[int(df['x'][j]), int(df['y'][j])] = (255, 255, 255)
        image.save(output + str(i) + '.jpg')       

In [None]:
image_clusters(ms_model, 'meanshift_cluster', OUR_IMAGE)

In [None]:
clustered_image0 = Image.open('meanshift_cluster0.jpg')
clustered_image0

In [None]:
clustered_image1 = Image.open('meanshift_cluster1.jpg')
clustered_image1

## 4. Validating the Model with K-Means

In [None]:
# Determine k by minimizing the distortion - 
# the sum of the squared distances between each observation vector and its centroid
distortions = []
K = range(2,10)
for k in K:
    model = KMeans(n_clusters=k).fit(df)
    model.fit(df)
    distortions.append(sum(np.min(cdist(df, model.cluster_centers_, 'euclidean'), axis=1)) / df.shape[0]) 
print("Distortion: ", distortions)

In [None]:
# Plot the elbow
plt.title('Elbow Method for optimal K')
plt.plot(K, distortions, 'bx-')
plt.xlabel('K')
plt.ylabel('Distortion')
plt.show()

In [None]:
# K-means model fitting with specified number of clusters
k_means_model = KMeans(n_clusters=7)
k_means_model.fit(df)

In [None]:
# Create image clusters
image_clusters(k_means_model, 'kmeans_cluster', OUR_IMAGE)

In [None]:
clustered_image0 = Image.open('kmeans_cluster0.jpg')
clustered_image0

In [None]:
clustered_image1 = Image.open('kmeans_cluster1.jpg')
clustered_image1

In [None]:
clustered_image2 = Image.open('kmeans_cluster2.jpg')
clustered_image2

In [None]:
clustered_image3 = Image.open('kmeans_cluster3.jpg')
clustered_image3

In [None]:
clustered_image4 = Image.open('kmeans_cluster4.jpg')
clustered_image4

In [None]:
clustered_image5 = Image.open('kmeans_cluster5.jpg')
clustered_image5

In [None]:
clustered_image6 = Image.open('kmeans_cluster6.jpg')
clustered_image6

In [None]:
clustered_image7 = Image.open('kmeans_cluster7.jpg')
clustered_image7