### A.	[LO 2] Build Visual Vocabulary from the dataset using the Visual Bag of Words algorithm. Explain comprehensively how the algorithms work! 


In [1]:
import cv2
import os
import numpy as np 
from scipy.spatial import distance
from sklearn.cluster import KMeans

In [2]:
def load_images(path):
    image_dict = {}
    for animal_type in os.listdir(path):
        animal_type_image = []
        type_path = f"{path}/{animal_type}"
        for image in os.listdir(type_path): 
            img = cv2.imread(f"{type_path}/{image}", 0)
            if img is not None: 
                img = cv2.resize(img, (100,100))
                animal_type_image.append(img)
        image_dict[animal_type] = animal_type_image
    return image_dict

In [3]:
train_set = load_images('cat-and-dog/training_set/training_set/')
# train_cats = load_images('cat-and-dog/training_set/training_set/cats/')
# train_dogs = load_images('cat-and-dog/training_set/training_set/dogs/')

In [29]:
test_set = load_images('cat-and-dog/test_set/test_set/')
# test_cats = load_images('cat-and-dog/test_set/test_set/cats/')
# test_dogs = load_images('cat-and-dog/test_set/test_set/dogs/')

Below is the function to get local features of the image using SIFT. In one dataset consists of two categories: cat and dog. These categories will be iterated separatedly, by getting the keypoints and descriptors for every image in both categories (cat and dog). The descriptor for all images will be combined in the descriptors list, while the descriptors for specific class will be stored in the sift_vectors dictionary. This function will return a list that has 2 values; the first value is the combined descriptors of all images and the second value is the visual dictionary of our descriptors from specific classes. 

In [5]:
def sift_features(dataset):
    sift_vectors = {}
    descriptors = []
    # surf = cv2.xfeatures2d.SURF_create(800)
    sift = cv2.SIFT_create()
    # cv2.features2d.SIFT_create()

    for category, animal_img in dataset.items():
        features = []
        for img in animal_img:
            kp, desc = sift.detectAndCompute(img, None)
            descriptors.extend(desc)
            features.append(desc)
        sift_vectors[category] = features

    return [descriptors, sift_vectors]

In [6]:
compute_sift = sift_features(train_set)
descriptor_list = compute_sift[0]
bovw_dictionary = compute_sift[1]

Next, after getting all the descriptors, we can pass it to the K-Means clustering algorithm to find the visual words, which are the center points for each cluster.

In [7]:
def kmeans(k, desc):
    kmeans = KMeans(n_clusters=k, n_init=10)
    kmeans.fit(desc)
    visual_bow = kmeans.cluster_centers_

    return visual_bow

In [8]:
visual_bow = kmeans(150, descriptor_list)

### B.	[LO 3] Use K-NN to predict the object (i.e., dog or cat) and explain the results!

In [30]:
test_sift = sift_features(test_set)
test_dictionary = test_sift[1]

Compute the histogram for train and test dataset so we may compare the accuracy of the prediction.

In [14]:
def data_hist(bovw, centroid):
    feature_dict = {}
    for category, value in bovw.items():
        histograms = []
        for img in value: 
            hist = cv2.calcHist([img], [0], None, [256], [0,256])
            histograms.append(hist)
        feature_dict[category] = hist
    return feature_dict

In [31]:
bovw_hist_train = data_hist(bovw_dictionary, visual_bow)
bovw_hist_test = data_hist(test_dictionary, visual_bow)

In [34]:
def knn(images, test):
    test_count = 0
    correct = 0
    cat_or_dog = {}

    for test_category, test_value in test.items():
        cat_or_dog[test_category] = [0,0]

        # iterating the images in test dataset
        for test_img in test_value:
            predict = 0
            min_dist = 0
            pred_category = "animal"
            # iterating the images in train dataset 
            for train_category, train_value in images.items():
                for train_img in train_value: 
                    if (predict==0): # setting the first data 
                        min_dist = distance.euclidean(test_img, train_img)
                        pred_category = train_category
                        predict += 1
                    else: # after the first data 
                        dist = distance.euclidean(test_img, train_img)

                        if (dist < min_dist): 
                            min_dist = dist
                            pred_category = train_category

            if (test_category == pred_category):
                correct += 1
                cat_or_dog[test_category][0] += 1
            test_count += 1
            cat_or_dog[test_category][1] += 1

    return [test_count, correct, cat_or_dog]


In [35]:
result = knn(bovw_hist_train, bovw_hist_test)

In [36]:
def accuracy(result):
    acc_avg = (result[1] / result[0]) * 100 #correct prediction / number of test made 
    print(f"Average accuracy: {acc_avg}%")
    print("Class based on accuracy: ")
    for category, value in result[2].items():
        print(f"Class {category}: {value[0]/value[1] * 100}%")

In [37]:
accuracy(result)

Average accuracy: 50.9765625%
Class based on accuracy: 
Class cats: 84.765625%
Class dogs: 17.1875%


In [38]:
print(f"Test counts: {result[0]} || Correct Prediction: {result[1]}")

Test counts: 512 || Correct Prediction: 261
