# Bag of Visual Words implementation for Dog and Cat detection

## Library import

In [12]:
import numpy as np
import cv2
import os
import pandas as pd
import matplotlib.pyplot as plt
from scipy import ndimage
from scipy.spatial import distance
from sklearn.cluster import KMeans
from sklearn.utils import shuffle
from tqdm.notebook import tqdm

In [8]:
print(os.getcwd())

/mnt/c/Users/Federico Arenas/Documents/Federico/UoE/MSC_AI/2021-1/IVC/Coursework/IVC_project


## Helper functions

In [23]:
# Get dataset of data split
def get_splits(split_n):
    '''

    '''
    path = './data/split_'+str(split_n)+'/split_'+str(split_n)+"_"
    train_df = pd.read_csv(path+'train.csv')
    val_df = pd.read_csv(path+'val.csv')

    return train_df, val_df

# Read and store images
def image_reader(dataframe):
    '''
    '''
    image_dict = {}
    file_locations = list(dataframe['image_id'])
    labels = list(dataframe['label'])
    category_0 = []
    category_1 = []
    for i in range(len(file_locations)):
        image = cv2.imread(file_locations[i], cv2.COLOR_RGB2BGR)
        try:
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        except: # if the image is gray
            image = cv2.cvtColor(image,cv2.COLOR_GRAY2RGB)
        
        if labels[i] == 0:
            category_0.append(image)
        else:
            category_1.append(image)

    image_dict[0] = category_0
    image_dict[1] = category_1

    return image_dict

# Get SIFT features and descriptors
def sift_features(images):
    '''
    Creates descriptors using sift. Takes one parameter that is images dictionary. Return an array whose first 
    index holds the decriptor_list without an order and the second index holds the sift_vectors dictionary which
    holds the descriptors but this is seperated class by class.
    '''
    sift_vectors = {}
    descriptor_list = []
    sift = cv2.xfeatures2d.SIFT_create()
    for key,value in tqdm(images.items()):
        features = []
        for img in tqdm(value):
            kp, des = sift.detectAndCompute(img,None)
            descriptor_list.extend(des) #dico
            features.append(des)
        sift_vectors[key] = features

    return descriptor_list, sift_vectors

# Perform kmeans clustering on descriptors
def kmeans(k, descriptor_list):
    '''
    A k-means clustering algorithm who takes 2 parameter which is number 
    of cluster(k) and the other is descriptors list(unordered 1d array)
    Returns an array that holds central points.
    '''
    kmeans = KMeans(n_clusters = k, n_init=1, verbose=1)
    kmeans.fit(descriptor_list)

    return kmeans

# Extract visual words from descriptors  
def get_histograms(images, k, kmeans):
    '''
    '''    
    hists = []
    classes = []
    sift = cv2.xfeatures2d.SIFT_create()
    for key,value in tqdm(images.items()):
        for img in tqdm(value):
            kp, des = sift.detectAndCompute(img,None)

            hist = np.zeros(k)
            nkp = np.size(kp)

            for d in des:
                index = kmeans.predict([d])
                hist[index] += 1/nkp # Normalization of histograms
        
            hists.append(hist)
            classes.append(int(key))
    
    return np.array(hists), np.array(classes)

## Convert dataframe to dictionary of images

In [14]:
print('Convert dataframe to dictionary of images')
split_n = 1
train_splits, val_splits = get_splits(split_n)

Convert dataframe to dictionary of images


## Get split of data

In [15]:
# Get split 1 data
print('Get split 1 data')
train_dict = image_reader(train_splits)
val_dict = image_reader(val_splits)

Get split 1 data


## Get full sift features for training data

In [16]:
print('Get full sift features for training data')
descriptor_list, sift_vectors = sift_features(train_dict) 
np.save('output/bovw/sift_vectors.npy', sift_vectors)

Get full sift features for training data


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/792 [00:00<?, ?it/s]

  0%|          | 0/800 [00:00<?, ?it/s]

## Perform kmeans training to get visual words

In [17]:
print('Takes the central points which is visual words')
k_means = kmeans(20, descriptor_list) 

Takes the central points which is visual words
Initialization complete
Iteration 0, inertia 69941430553.0
Iteration 1, inertia 45022804254.32941
Iteration 2, inertia 43993751736.92146
Iteration 3, inertia 43631456828.238144
Iteration 4, inertia 43443809473.122375
Iteration 5, inertia 43324308360.419624
Iteration 6, inertia 43241291731.39898
Iteration 7, inertia 43179010736.99799
Iteration 8, inertia 43126673679.55967
Iteration 9, inertia 43080536366.821205
Iteration 10, inertia 43041898410.71034
Iteration 11, inertia 43010715221.123314
Iteration 12, inertia 42985313234.73533
Iteration 13, inertia 42965641250.72398
Iteration 14, inertia 42950279757.44258
Iteration 15, inertia 42937798529.440094
Iteration 16, inertia 42927637812.49553
Iteration 17, inertia 42918953697.60425
Iteration 18, inertia 42910865908.812904
Iteration 19, inertia 42903510906.49575
Iteration 20, inertia 42897207450.185616
Iteration 21, inertia 42891901811.14063
Iteration 22, inertia 42887619289.824394
Iteration 23, 

## Get histograms from kmeans clustering

In [24]:
print("Get histograms from kmeans clustering")
histograms, classes = get_histograms(train_dict, 20, k_means)
np.save('output/bovw/train_visual_words.npy', histograms)
np.save('output/bovw/train_classes.npy', classes)

Get histograms from kmeans clustering


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/792 [00:00<?, ?it/s]

  0%|          | 0/800 [00:00<?, ?it/s]

In [19]:
histograms = np.load('output/bovw/visual_words.npy')

## Prepare SVM Classifier