In [44]:
import numpy as np
import sys
from PIL import Image
from sklearn import preprocessing
from sklearn.metrics.pairwise import euclidean_distances

## input parameters

In [116]:
iter_size = 5 # number of iterations
K = 3 # number of clusters
inputName = 'input/8.jpg'
resultName = 'result/8-seg.jpg'

## open image

In [117]:
image = Image.open(inputName)
W = image.size[0] # width of image
H = image.size[1] # height of image
W,H

(280, 180)

## vectorize image data

In [118]:
# Initialise data vector with attribute r,g,b,x,y for each pixel
# x , y added for near pixels similarity
# each pixel would be a sample
vector = np.ndarray(shape=(W * H, 5), dtype=float)

# Initialise vector that holds which cluster a pixel is currently in
labels = np.ndarray(shape=(W * H), dtype=int)

# Populate data vector with data from input image
# vector has 5 fields: red, green, blue, x coord, y coord
for y in range(0, H):
    for x in range(0, W):
        xy = (x, y)
        rgb = image.getpixel(xy)
        vector[x+y*W] = rgb[0],rgb[1],rgb[2],x,y

# Standarize the values of our features
vector_scaled = preprocessing.normalize(vector)

## set centers with random intiation

In [119]:
minValue = np.amin(vector_scaled)
maxValue = np.amax(vector_scaled)

centers = np.ndarray(shape=(K,5))
for index, center in enumerate(centers):
    centers[index] = np.random.uniform(minValue, maxValue, 5)


## run algorithm

In [120]:
for iteration in range(iter_size):
    # Set pixels to nearest cluster center
    distances = np.sqrt(((vector_scaled - centers[:, np.newaxis])**2).sum(axis=2))
    closest = np.argmin(distances, axis=0)

    
    # Check if a cluster is ever empty, if so append a random datapoint to it
    clusterToCheck = np.arange(K)   #contains an array with all clusters

    clustersEmpty = np.in1d(clusterToCheck, labels)
    #^ [True True False True * n of clusters] False means empty
    for index, item in enumerate(clustersEmpty):
        if item == False:
            labels[np.random.randint(len(labels))] = index
    
    centers = np.array([vector_scaled[closest==k].mean(axis=0) for k in range(centers.shape[0])])
    
    print("Centers Iteration num", iteration, ": \n", centers)

Centers Iteration num 0 : 
 [[0.54440627 0.54458416 0.54383687 0.18108033 0.20692677]
 [0.48954623 0.29207569 0.16002083 0.54312436 0.36353725]
 [0.45919715 0.47687059 0.54698137 0.47507462 0.10802162]]
Centers Iteration num 1 : 
 [[0.56381263 0.5610025  0.51117441 0.14247062 0.20763164]
 [0.46251914 0.18650119 0.09002415 0.61081905 0.40138526]
 [0.47005156 0.48195199 0.52816268 0.46416673 0.15978554]]
Centers Iteration num 2 : 
 [[0.57424621 0.56856149 0.47537892 0.14594747 0.21046701]
 [0.42236855 0.08943713 0.08336023 0.67673074 0.42413621]
 [0.47254806 0.48353217 0.52500996 0.45759138 0.17011844]]
Centers Iteration num 3 : 
 [[0.57484252 0.56898718 0.47385802 0.145013   0.21075146]
 [0.42045325 0.08250413 0.08185019 0.68026362 0.42447637]
 [0.47315129 0.48392161 0.5237619  0.45627525 0.1717928 ]]
Centers Iteration num 4 : 
 [[0.57494879 0.56908077 0.47376855 0.14467047 0.21064788]
 [0.42043767 0.08196622 0.08157594 0.68045766 0.42445965]
 [0.47323521 0.4839669  0.52355782 0.4560166

## set the pixels on original image

In [121]:
# set the pixels on original image to be that of the pixel's cluster's centroid
for index, item in enumerate(closest):
    vector[index][0] = int(round(centers[item][0] * 255))
    vector[index][1] = int(round(centers[item][1] * 255))
    vector[index][2] = int(round(centers[item][2] * 255))

## save image

In [122]:
image = Image.new("RGB", (W, H))
for y in range(H):
    for x in range(W):
        image.putpixel((x, y), (int(vector[y * W + x][0]), 
                                int(vector[y * W + x][1]),
                                int(vector[y * W + x][2])))
image.save(resultName)