In [25]:
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial.distance import cdist 

In [26]:
def padImageForPatches(img, patch_shape):
    return cv2.copyMakeBorder(src=img, left=0, right=patch_shape[1]*((img.shape[1]+31)//patch_shape[1])-img.shape[1], top=0, bottom=patch_shape[0]*((img.shape[0]+31)//patch_shape[0])-img.shape[0], borderType=cv2.BORDER_REFLECT)

def divideIntoPatches(img, patch_shape):
    img = padImageForPatches(img, patch_shape)
    img_shape = img.shape
    patch_array = np.empty((int((img_shape[0]*img_shape[1])/(patch_shape[0]*patch_shape[1])), patch_shape[0], patch_shape[1], 3), dtype=np.int32)
    for i in range(0, int(img_shape[0]/patch_shape[0])):
        for j in range(0, int(img_shape[1]/patch_shape[1])):
            patch_array[i*int(img_shape[1]/patch_shape[1]) + j] = img[i*patch_shape[0]:i*patch_shape[0]+patch_shape[0], j*patch_shape[1]:j*patch_shape[1]+patch_shape[1], :]

    return patch_array

def getColorHistFeatures(img, n_bins=8):
    img = img//int(256/n_bins)
    hist = np.bincount(img[:, :, 0].ravel(), minlength=8)
    hist = np.append(hist, np.bincount(img[:, :, 1].ravel(), minlength=8))
    hist = np.append(hist, np.bincount(img[:, :, 2].ravel(), minlength=8))
    return hist


def getHistForPatches(img, patch_shape):
    patch_array = divideIntoPatches(img, patch_shape)
    n, h, w, c = patch_array.shape
    hist_array = np.empty((n, 24), dtype=np.int32)
    for i in range(0, n):
        hist_array[i] = getColorHistFeatures(patch_array[i])
    return hist_array

def loadImagesFromDir(dir_path):
    images = {}
    
    for folder in os.listdir(dir_path):
        img_arr = []
        folder_path = dir_path + "/" + folder
        for file in os.listdir(folder_path):
            img_path = folder_path + "/" + file
            img = cv2.imread(img_path)
            if img is not None:
                img_arr.append(img)
        images[folder] = img_arr
    
    return images

def getHistForAllImages(images, patch_shape):
    images_hist_features = {}

    for key, img_arr in images.items():
        hist24_arr = []
        for img in img_arr:
            hist24_arr.append(getHistForPatches(img, patch_shape))
        images_hist_features[key] = hist24_arr
    
    return images_hist_features

In [27]:
train_images = loadImagesFromDir("Group21/Classification/Image_Group21/train")
test_images = loadImagesFromDir("Group21/Classification/Image_Group21/test")

In [28]:
train_imgs_hist = getHistForAllImages(train_images, patch_shape=(32, 32))
test_imgs_hist = getHistForAllImages(test_images, patch_shape=(32, 32))

In [29]:
train_imgs_hist_flattened = np.concatenate([y for x in train_imgs_hist.values() for y in x])

In [30]:
train_imgs_hist_flattened

array([[  0,   0,   0, ...,  24,  61, 858],
       [  0,   0,   0, ...,  43,  36, 893],
       [  0,   0,   1, ...,  47,  26, 862],
       ...,
       [  0,   0,   0, ..., 542, 240,  44],
       [  0,   0,   8, ..., 370, 224, 108],
       [  0,   2,   6, ..., 332, 342, 170]])

In [31]:
def kmeans(x, k, no_of_iterations):
    idx = np.random.choice(len(x), k, replace=False)
    print(idx)
    #Randomly choosing Centroids 
    centroids = x[idx, :] #Step 1
     
    #finding the distance between centroids and all the data points
    distances = cdist(x, centroids ,'euclidean') #Step 2
     
    #Centroid with the minimum Distance
    points = np.array([np.argmin(i) for i in distances]) #Step 3
    
    for i in range(k):
        points[idx[i]] = i
        
    
    #Repeating the above steps for a defined number of iterations
    for itr in range(no_of_iterations): 
        centroids = []
        print(f"*** Iteration {itr} ***")
        for idx in range(k):
            #Updating Centroids by taking mean of Cluster it belongs to
            print(x[points==idx])
            temp_cent = x[points==idx].mean(axis=0) 
            centroids.append(temp_cent)
 
        centroids = np.vstack(centroids) #Updated Centroids 
         
        distances = cdist(x, centroids ,'euclidean')
        points = np.array([np.argmin(i) for i in distances])
         
    return centroids

In [32]:
centroids = kmeans(train_imgs_hist_flattened, 32, 100)

172 472]
 [  0   0  30 ... 174 228 444]]
[[  1 634 172 ...  81   0   0]
 [  0 560 184 ... 136   0   0]
 [  0 511 235 ... 162   2   0]
 ...
 [ 14 296 457 ...   0   0   0]
 [ 97 335 314 ... 104  67  27]
 [104 288 255 ... 105  53  11]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[  0  18 464 ...  35  72 106]
 [  1 270 398 ...  11  19   0]
 [  0 161 480 ...  76  26   0]
 ...
 [  0   5 441 ...   0   0   0]
 [  0   8 543 ...   2   0   0]
 [  0  20 434 ...  96   0   0]]
[[1000   21    3 ...    0    0    0]
 [1024    0    0 ...    0    0    0]
 [ 496  246  168 ...  133   96   11]
 ...
 [ 733   93   64 ...   55    6   18]
 [ 831   99   25 ...   26   15    1]
 [ 678  190   60 ...   45   20   10]]
[[ 38 531 207 ...  83  22   0]
 [322 545 137 ...   0   0   0]
 [  0 561 247 ...  93  80  35]
 ...
 [132 632 158 ...  34  19   7]
 [ 44 691 278 ...   0   0   0]
 [171 631 206 ...   3   2   0]]
*** Iteration 97 ***
[[ 869   25   

In [33]:
def BoVW_image_feature_vector(img_col_hist, centroids):
    distances = cdist(img_col_hist, centroids ,'euclidean')
    points = np.array([np.argmin(i) for i in distances])
    feature = np.bincount(points, minlength=len(centroids))
    return feature

In [34]:
list(train_imgs_hist.values())[0][0]

array([[  0,   0,   0, ...,  24,  61, 858],
       [  0,   0,   0, ...,  43,  36, 893],
       [  0,   0,   1, ...,  47,  26, 862],
       ...,
       [  0,   0, 256, ..., 442,  67,   6],
       [  0,   0, 263, ..., 239,  92,  36],
       [  0,   0, 324, ..., 268,  16,   0]])

In [35]:
BoVW_image_feature_vector(list(train_imgs_hist.values())[0][0], centroids)

array([ 0,  0,  7, 15,  0,  6,  2,  0,  0,  0,  6,  2,  0,  0,  0,  0,  0,
       11, 13,  5,  1,  3,  0,  9,  4,  0,  6,  9,  0,  4,  0,  1],
      dtype=int64)

In [36]:
img_BoVW_all = {}
for img_type, img_hist_array in train_imgs_hist.items():
    img_BoVW = np.empty((len(img_hist_array), 32), dtype=np.int32)
    for i in range(len(img_hist_array)):
        img_BoVW[i] = BoVW_image_feature_vector(img_hist_array[i], centroids)
    img_BoVW_all[img_type] = img_BoVW

In [38]:
img_BoVW_all["batters_box"][0]

array([ 0,  0,  7, 15,  0,  6,  2,  0,  0,  0,  6,  2,  0,  0,  0,  0,  0,
       11, 13,  5,  1,  3,  0,  9,  4,  0,  6,  9,  0,  4,  0,  1])