# Read image, flatten image, input image matrix into dataVector

In [2]:
import numpy as np
import sys
import cv2
import matplotlib.pyplot as plt 
from PIL import Image
from sklearn import preprocessing
from sklearn.metrics.pairwise import euclidean_distances

# Set parameters
iterations = 4
C = 5

img = Image.open('burma.jpg')
img_w = img.size[0]
img_h = img.size[1]

print(img_w, img_h)

X = np.ndarray(shape=(img_w * img_h, 5), dtype=float)

for y in range(0, img_h):
      for x in range(0, img_w):
      	xy = (x, y)
      	rgb = img.getpixel(xy)
      	X[x + y * img_h, 0] = rgb[0]
      	X[x + y * img_h, 1] = rgb[1]
      	X[x + y * img_h, 2] = rgb[2]
      	X[x + y * img_h, 3] = x
      	X[x + y * img_h, 4] = y
        
print(X)

300 199
[[  1. 140. 223.   0.   0.]
 [  1. 142. 224.   1.   0.]
 [  0. 141. 223.   2.   0.]
 ...
 [  0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.]]


# Rescale image into range [0,1]

In [3]:
X_scaled = preprocessing.normalize(X)
print(X_scaled)

[[0.00379786 0.53170095 0.84692365 0.         0.        ]
 [0.00377045 0.53540323 0.84457975 0.00377045 0.        ]
 [0.         0.53440538 0.84519432 0.00758022 0.        ]
 ...
 [0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.        ]]


# Randomly take centers of clusters

In [4]:
minValue = np.amin(X_scaled)
maxValue = np.amax(X_scaled)

print(minValue, maxValue)

V = np.ndarray(shape=(C,5))
for index, v in enumerate(V):
    V[index] = np.random.uniform(minValue, maxValue, 5)

print(V)


0.0 0.997931139592263
[[0.8319717  0.93094431 0.04044363 0.44774744 0.49027798]
 [0.40739197 0.17822722 0.98765448 0.30705303 0.85504653]
 [0.37802277 0.00268153 0.35024555 0.22985995 0.77352313]
 [0.66170206 0.9223737  0.00651264 0.61389586 0.40712225]
 [0.37918222 0.8000322  0.98189093 0.92190538 0.19668888]]


# Calculate distance between each datapoint and center of cluster

In [10]:
distXToV = np.ndarray(shape=(C))
pixelClusterAppartenance = np.ndarray(shape=(img_w * img_h))

for iteration in range(iterations):
    for index, x in enumerate(X_scaled):
        for sub_index, v in enumerate(V):
            distXToV[sub_index] = euclidean_distances(x.reshape(1,-1), v.reshape(1, -1))
#             print(distXToV[sub_index], '')
        pixelClusterAppartenance[index] = np.argmin(distXToV)
#         print(pixelClusterAppartenance[index])

    clusterToCheck = np.arange(C)
    clustersEmpty = np.in1d(clusterToCheck, pixelClusterAppartenance)
    
    for index, item in enumerate(clustersEmpty):
        if item == False:
            pixelClusterAppartenance[np.random.randint(len(pixelClusterAppartenance))] = index
    
    for index in range(C):
        dataInCenter = []
        
        for sub_index, item in enumerate(pixelClusterAppartenance):
            if item == index:
                dataInCenter.append(X_scaled[sub_index])
        dataInCenter = np.array(dataInCenter)
        V[index] = np.mean(dataInCenter, axis = 0)
    
    print('Centers Iteration Num:', iteration, "\n", V)

Centers Iteration Num: 0 
 [[0.49731297 0.446022   0.30477839 0.36685771 0.48462837]
 [0.2424998  0.53995641 0.76809211 0.15130338 0.0857641 ]
 [0.         0.         0.         0.         0.        ]
 [0.11132857 0.18477221 0.15565723 0.45665641 0.77443486]
 [0.27876077 0.49583038 0.67120278 0.4249823  0.11264817]]
Centers Iteration Num: 1 
 [[0.48614312 0.44025867 0.30012225 0.39228386 0.48031866]
 [0.24031018 0.53931248 0.76772066 0.15653499 0.08510791]
 [0.         0.         0.         0.         0.        ]
 [0.10195198 0.17727837 0.15349982 0.43978871 0.79130635]
 [0.27984844 0.49584124 0.67173351 0.42559052 0.11134544]]
Centers Iteration Num: 2 
 [[0.47801999 0.43610028 0.29736296 0.40576604 0.47972773]
 [0.23952954 0.53910272 0.76765503 0.15815621 0.08482776]
 [0.         0.         0.         0.         0.        ]
 [0.0973216  0.17315216 0.15217934 0.42916873 0.80096403]
 [0.28062553 0.49612398 0.67231454 0.42513106 0.11032254]]
Centers Iteration Num: 3 
 [[0.47275617 0.4333