This code generates images that are segmented using 20 clusters of pixels and using different start points for k-means.

In [5]:
# Read small sunset image into 2d array of pixel values

from scipy import misc
from scipy.misc import toimage
import numpy as np 
from scipy.spatial import distance
from scipy.misc import logsumexp

# read sunset image file to 330*600*3 array
arr = misc.imread('smallsunset.jpg')
print(arr.shape)
# transform to 3*198000 array
arr = arr.transpose(2,0,1).reshape(3,-1)
arr.shape

(330, 600, 3)


(3, 198000)

In [6]:
k = 20
pixels = 198000
data = arr.transpose()

In [7]:
# Generated a segmented image using 20 clusters varying kmeans argument random_state from 100 to 500 in steps of 100.

from sklearn.cluster import KMeans
for start in [100,200,300,400,500]:
    print('start=',start)
    # train kmeans
    
    kmeans = KMeans(n_clusters=k,random_state=start)
    kmeans.fit(data)
    
    # Initialize pi and means (mu_j)
    
    pi = []
    clusterids = kmeans.predict(data)
    mu_j = kmeans.cluster_centers_.reshape(k,3)
    for i in range(0,k):
        pi.append(np.count_nonzero(clusterids==i)/pixels)
    print(sum(pi))
    logpi = np.log(pi).reshape(1,k)
    first = 1
    wij_diff = 0
    prev_wij = np.zeros((pixels,k))
    
    # Run EM algo until convergence of wij
    
    while(first == 1 or wij_diff > 1e-7):
        first = 0
        dist_matrix = distance.cdist(data,mu_j,'sqeuclidean')
        log_wij_num = logpi - 0.5*dist_matrix
        log_wij_den = np.apply_along_axis(logsumexp,1,log_wij_num).reshape(pixels,1)
        wij = np.exp(log_wij_num - log_wij_den)
        wij_diff = np.amax(np.absolute(wij - prev_wij))
        prev_wij = wij
        mu_j_num = np.dot(data.transpose(),wij)
        mu_j_den = np.apply_along_axis(sum,0,wij).reshape(1,k)
        new_mu = mu_j_num / mu_j_den
        mu_j = new_mu.transpose()
        new_pi = np.log(np.apply_along_axis(sum,0,wij)/pixels)
        logpi = new_pi
        
    # Replace pixel values with their cluster centers and save image 
    
    segmented_img = np.zeros((pixels,3))
    new_clusterids = np.apply_along_axis(np.argmax,1,wij)
    for i in range(0,pixels):
        segmented_img[i] = mu_j.astype(int)[new_clusterids[i]]
    # reshape array to 3 dimensions and display
    newimg = segmented_img.transpose().reshape(3,330,600)
    toimage(newimg).save('randomstart'+str(start)+'smallsunset20.jpg')    

start= 100
0.9999999999999999
start= 200
0.9999999999999999
start= 300
1.0
start= 400
1.0
start= 500
1.0
