In [1]:
from PIL import Image
from matplotlib.pyplot import imshow
import numpy as np
%matplotlib inline


# Functions for handeling the images 
def show_image(path):
    '''
    Read the image and show it
    Returns the image
    '''
    img = Image.open(path)
    return img

def read_image(image):
    '''
    Store the image as an array.
    Returns 3 dimensional image array
    
    '''
    img_array = np.array(image, dtype='int64')
    image.close()
    return img_array

    
def reshape_array_2D(img_array):
    
    '''
    reshape a 3D  image array into a 2D  image array
    input: 3 dimensional array
    '''
    r, c, l = img_array.shape
    #Flattening the image into a 2d array
    image_reshaped = np.reshape(img_array, (r*c, l),order='C')
    return image_reshaped

def preparation_k_methods(path):
    '''
    Main function for handeling the image and prepare it before k-means/medoids
    '''
    img = show_image(path)
    img_array = read_image(img)
    img_reshaped = reshape_array_2D(img_array)
    return img_reshaped

### K-means and K-medoids algorithms

In [2]:
#Initialize the cluster centers and choose k number of clusters
def init_clusters(arr_reshaped, k):
    '''
    k is the number of clusters.
    arr_reshaped is a 2 dimensional array of the image
    Output: randomized initial clusters
    '''
    centers = arr_reshaped[np.random.randint(arr_reshaped.shape[0], size=(1, k))[0]]
    return centers

In [3]:
def dist_matrix(matrix, centers, order=2):
    '''
    Calculating the distance matrix
    Input: image matrix array, centers and order of norm distance
    Output: Distance matrix nxk
    '''
    n = len(matrix)
    c = len(centers)
    Dmatrix = np.empty((n,c))
    if len(centers.shape) == 1:
        centers = centers.reshape(1,3)
    
    for i in range(n):
        d_i = np.linalg.norm(matrix[i,:] - centers, ord=order, axis=1)
        Dmatrix[i, :] = np.power(d_i, order)
    return Dmatrix

In [4]:
def cluster_assignment(Dmatrix):
    '''
    Seperating the data points into clusters
    Input: Distance matrix
    Output: Labels of the clusters for each data point
    '''
    labels = np.argmin(Dmatrix, axis=1)
    return labels

In [5]:
def J_sum(Dmatrix):
    '''
    Calculating the loss function value
    Input: Distance matrix
    Output: a scalar
    '''
    return np.sum(np.amin(Dmatrix, axis=1))

In [6]:
def cluster_update(matrix, labels, k):
    '''
    Finding the new center means for k-means
    input: Image matrix 2D, labels, number of clusters k
    '''
    n, d = matrix.shape
    new_centers = np.empty((k,d))
    
    for i in range(k):
        new_centers[i, :] = np.mean(matrix[labels==i, :], axis=0)

        
    return new_centers
        

In [7]:
def center_convergence(old_centers, new_centers):
    return [list(center) for center in old_centers] == [list(center) for center in new_centers]

In [None]:
def kmeans(matrix, k):
    
    centers = init_clusters(matrix, k)
    converged = False
    
    while (converged != True):
            
        distance_matrix = dist_matrix(matrix,centers)
        labels = cluster_assignment(distance_matrix)
        new_centers = cluster_update(matrix, labels, k) 
        converged = center_convergence(centers , new_centers)
        #updating
        centers = new_centers
    
    
    return centers, labels, J_sum(distance_matrix)
        
        
        

In [11]:
def medoids_update(matrix, labels, centers, k):
    '''
    Finding new center points for k-medoids
    input: Image matrix 2D, labels, number of clusters k
    '''
    old_centers = np.empty(centers.shape)
    for i in range(k):
        cluster_points = np.unique(matrix[labels == i, :],axis=0)
        print(i)
        #calculating matrix distances in a group and picking the value with the least sum of distances from it
        sum_of_distances = np.linalg.norm(cluster_points - cluster_points[:, None], axis =-1).sum(axis=1)
        best_datp_index = np.where(sum_of_distances == np.min(sum_of_distances))[0][0]
        old_centers[i, :] = cluster_points[best_datp_index]
    return old_centers
        
        

In [20]:
def kmedoids(matrix, k, order):
    centers = init_clusters(matrix, k)
    converged = False
    while (converged != True):
            
        distance_matrix = dist_matrix(matrix,centers, order)
        labels = cluster_assignment(distance_matrix)
        new_centers = medoids_update(matrix,labels, centers, k) 
        converged = center_convergence(centers , new_centers)
        #updating
        centers = new_centers
    
    
    return centers, labels, J_sum(distance_matrix)
    

In [21]:
#matrix = preparation_k_methods('beach.bmp')
