In [1]:
import numpy as np
import sys
from PIL import Image
from sklearn import preprocessing
from sklearn.metrics.pairwise import euclidean_distances

## input parameters

In [2]:
iter_size = 5 # number of iterations
K = 3 # number of clusters
inputName = 'input/burma.jpg'
resultName = 'result/burma-seg.jpg'

## open image

In [3]:
image = Image.open(inputName)
W = image.size[0] # width of image
H = image.size[1] # height of image
W,H

(300, 199)

## vectorize image data

In [4]:
# Initialise data vector with attribute r,g,b,x,y for each pixel
# x , y added for near pixels similarity
# each pixel would be a sample
vector = np.ndarray(shape=(W * H, 5), dtype=float)

# Initialise vector that holds which cluster a pixel is currently in
labels = np.ndarray(shape=(W * H), dtype=int)

# Populate data vector with data from input image
# vector has 5 fields: red, green, blue, x coord, y coord
for y in range(0, H):
    for x in range(0, W):
        xy = (x, y)
        rgb = image.getpixel(xy)
        vector[x+y*W] = rgb[0],rgb[1],rgb[2],x,y

# Standarize the values of our features
vector_scaled = preprocessing.normalize(vector)

## set centers with random intiation

In [5]:
minValue = np.amin(vector_scaled)
maxValue = np.amax(vector_scaled)

centers = np.ndarray(shape=(K,5))
for index, center in enumerate(centers):
    centers[index] = np.random.uniform(minValue, maxValue, 5)


In [6]:
np.zeros(K)

array([0., 0., 0.])

## run algorithm

In [7]:
for iteration in range(iter_size):
    # Set pixels to nearest cluster center
    distances = np.sqrt(((vector_scaled - centers[:, np.newaxis])**2).sum(axis=2))
    closest = np.argmin(distances, axis=0)

    
    # Check if a cluster is ever empty, if so append a random datapoint to it
    clusterToCheck = np.arange(K)   #contains an array with all clusters

    clustersEmpty = np.in1d(clusterToCheck, labels)
    #^ [True True False True * n of clusters] False means empty
    for index, item in enumerate(clustersEmpty):
        if item == False:
            labels[np.random.randint(len(labels))] = index
    
    centers = np.array([vector_scaled[closest==k].mean(axis=0) for k in range(centers.shape[0])])
    
    print("Centers Iteration num", iteration, ": \n", centers)

Centers Iteration num 0 : 
 [[0.34234772 0.31888813 0.0917427  0.74904202 0.26981676]
 [0.24453443 0.33176581 0.32827141 0.48665363 0.4503987 ]
 [0.2158215  0.41603366 0.59007741 0.63959099 0.06265251]]
Centers Iteration num 1 : 
 [[0.18728527 0.21652441 0.06687558 0.83095896 0.3953934 ]
 [0.27838869 0.30105651 0.24828567 0.40647834 0.64566384]
 [0.25246399 0.48676183 0.6762672  0.40667286 0.09029532]]
Centers Iteration num 2 : 
 [[0.15306786 0.19390145 0.07958516 0.82152718 0.4359952 ]
 [0.28330758 0.30175661 0.22365777 0.37317236 0.68524383]
 [0.28932374 0.48795628 0.65047247 0.3978906  0.1257928 ]]
Centers Iteration num 3 : 
 [[0.15145832 0.19425883 0.0886118  0.81045423 0.44939863]
 [0.27516157 0.29379023 0.20320463 0.35050379 0.716177  ]
 [0.30313258 0.48563717 0.63495406 0.39910364 0.14546686]]
Centers Iteration num 4 : 
 [[0.1580877  0.19974819 0.09508757 0.80272737 0.45269062]
 [0.25943625 0.28264886 0.1892551  0.32852721 0.7466041 ]
 [0.31111851 0.48355579 0.62396156 0.4019270

## set the pixels on original image

In [8]:
# set the pixels on original image to be that of the pixel's cluster's centroid
for index, item in enumerate(closest):
    vector[index][0] = int(round(centers[item][0] * 255))
    vector[index][1] = int(round(centers[item][1] * 255))
    vector[index][2] = int(round(centers[item][2] * 255))

## save image

In [9]:
image = Image.new("RGB", (W, H))
for y in range(H):
    for x in range(W):
        image.putpixel((x, y), (int(vector[y * W + x][0]), 
                                int(vector[y * W + x][1]),
                                int(vector[y * W + x][2])))
image.save(resultName)