## Import libraries

In [11]:
import numpy as np
import cv2
import os
import math
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.svm import LinearSVC
from tqdm.notebook import tqdm

## Load Dataset

In [12]:
cat_path = os.getcwd() + '/101_ObjectCategories'
cat_names = {}
count = 0
for name in os.listdir(cat_path):
    cat_names[count] = cat_path +  '/' + str(name)
    count += 1
print("Number of categories: ", len(cat_names))

# Initialise dataset
data, labels = [], []
for cat_id, cat_name in cat_names.items():
    cat_img_paths = os.listdir(cat_name)[:80]
    labels += (len(cat_img_paths)*[cat_id])
    
    for one_path in cat_img_paths:
        data.append(cv2.imread(cat_name + '/' + one_path, 0))

Number of categories:  102


## Split train, test

In [13]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, 
    test_size = 5/8, random_state = 42)
print("X shape: " + str(len(x_train)) + ", Y shape: " + str(len(x_test)))

X shape: 2270, Y shape: 3785


##  getFeatures

In [23]:
def getFeatures(img, kmeans, k, levels):
    width = img.shape[1]
    height = img.shape[0]
    hist_array = []
    
    dsift_step = 2
    
    for lvl in range(levels+1):
        width_step = math.floor(width/(2**lvl))
        height_step = math.floor(height/(2**lvl))
        row, col = 0, 0
        for i in range(1, 2**lvl+1):
            row = 0
            for j in range(1, 2**lvl+1):
                keypoints = []
                for y in range(0, height, dsift_step):
                    for x in range(0, width, dsift_step):
                        sift = cv2.xfeatures2d.SIFT_create()
                        keypoints.append(cv2.KeyPoint(x, y, dsift_step))
                descriptors = sift.compute(img[col:col+height_step,
                            row:row+width_step], keypoints)[1]
                
                predict = kmeans.predict(descriptors)
                histogram = np.bincount(predict, minlength = k).reshape(
                    1, -1).ravel()
                hist_array.append(2**(lvl-levels)*histogram)
                row = row + width_step
            col = col + height_step
    hist = np.array(hist_array).ravel()
    
    deviation = np.std(hist)
    hist = hist-np.mean(hist) - deviation

## getHistogram

In [15]:
def getHistogram(data, kmeans, k, levels):
    hist_array = []
    for i in range(len(data)):
        hist_array.append(getFeatures(data[i], kmeans, k, levels))
    return np.array(hist_array)

## getSIFT

In [25]:
def getSIFT(data):
    result = []
    step = 15
    for i in range(len(data)):
        sift = cv2.xfeatures2d.SIFT_create()
        keypoints = []
        for x in range(0, data[i].shape[0], step):
            for y in range(0, data[i].shape[1], step):
                keypoints.append(cv2.KeyPoint(x, y, step))
        result.append(sift.compute(data[i],  keypoints)[1])
    return result

x_train_sift = getSIFT(x_train)

In [17]:
train_descriptors = []
for i in tqdm(range(len(x_train_sift))):
    for j in range(x_train_sift[i].shape[0]):
        train_descriptors.append(x_train_sift[i][j,:])
train_descriptors = np.array(train_descriptors)                                    

  0%|          | 0/2270 [00:00<?, ?it/s]

## KMeans and Training

In [18]:
def kmeanFeatures(train_descriptors, k):
    return KMeans(n_clusters = k, random_state = 0).fit(train_descriptors)

In [33]:
def trainSPM(kmean, k, lvl):
    train_hist = getHistogram(x_train, kmean, k, lvl)
    test_hist = getHistogram(x_test, kmean, k, lvl)
    clf = LinearSVC(random_state = 0, C = 5E-4)
    clf.fit(train_hist, y_train)
    predict = clf.predict(test_hist)
    accuracy = np.mean(predict == y_test)*100
    print("Level: " + str(lvl) + ", Accuracy: " + str(accuracy))

## Weak: K = 16

In [36]:
kmean_16 = kmeanFeatures(train_descriptors, 16)
for one_level in tqdm(range(4)):
    trainSPM(kmean_16, 16, one_level)

Level: 0, Accuracy: 5.161644771622
Level: 1, Accuracy: 21.19274024223
Level: 2, Accuracy: 35.84759274932
Level: 3, Accuracy: 38.72337728189


## Strong: K = 200

In [37]:
kmean_200 = kmeanFeatures(train_descriptors, 200)
for one_level in tqdm(range(4)):
    trainSPM(kmean_200, 200, one_level)

Level: 0, Accuracy: 23.65532729112
Level: 1, Accuracy: 38.12022318293
Level: 2, Accuracy: 45.02830338195
Level: 3, Accuracy: 47.91739375926


## Analysis
Level 0 signifies Bag-of-Words (BoW) method, and we can observe that using Spatial Pyramid Matching (SPM) will yield much higher accuracy.