<h1> Objective:- Assessment Task 4.1P: Object detection </h1>

<div style="text-align: right"> Done by: <b>Karan Murjani </b> </div>
<div style="text-align: right"> StudentId: <b> 2210470832 </b></div>

### 1. Bag-of-Words (BoW) model

In [2]:
#Importing all library
import pandas as pd
import numpy as np
import cv2 as cv
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.metrics import classification_report,confusion_matrix
import os
import pickle

In [3]:
#Defining class

class Dictionary(object):
    def __init__(self, name, img_filenames, num_words):
        self.name = name #name of your dictionary
        self.img_filenames = img_filenames #list of image filenames
        self.num_words = num_words #the number of words

        self.training_data = [] #this is the training data required by the K-Means algorithm
        self.words = [] #list of words, which are the centroids of clusters

    def learn(self):
        sift = cv.SIFT_create()
        num_keypoints = [] #this is used to store the number of keypoints in each image

     #load training images and compute SIFT descriptors
        for filename in self.img_filenames:
            img = cv.imread(filename)
            img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            list_des = sift.detectAndCompute(img_gray, None)[1]
            if list_des is None:
                num_keypoints.append(0)
            else:
                num_keypoints.append(len(list_des))
                for des in list_des:
                    self.training_data.append(des)

        #cluster SIFT descriptors using K-means algorithm
        kmeans = KMeans(self.num_words)
        kmeans.fit(self.training_data)
        self.words = kmeans.cluster_centers_

        #create word histograms for training images
        training_word_histograms = [] #list of word histograms of all training images
        index = 0
        for i in range(0, len(self.img_filenames)):
            #for each file, create a histogram
            histogram = np.zeros(self.num_words, np.float32)
            #if some keypoints exist
            if num_keypoints[i] > 0:
                for j in range(0, num_keypoints[i]):
                    histogram[kmeans.labels_[j + index]] += 1
                index += num_keypoints[i]
                histogram /= num_keypoints[i]
                training_word_histograms.append(histogram)

        return training_word_histograms

    def create_word_histograms(self, img_filenames):
        sift = cv.SIFT_create()
        histograms = []

        for filename in img_filenames:
            img = cv.imread(filename)
            img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            descriptors = sift.detectAndCompute(img_gray, None)[1]

            histogram = np.zeros(self.num_words, np.float32) #word histogram for the input image

            if descriptors is not None:
                for des in descriptors:
                #find the best matching word
                    min_distance = 1111111 #this can be any large number
                    matching_word_ID = -1 #initial matching_word_ID=-1 means no matching

                    for i in range(0, self.num_words): #search for the best matching word
                        distance = np.linalg.norm(des - self.words[i])
                        if distance < min_distance:
                            min_distance = distance
                            matching_word_ID = i

                    histogram[matching_word_ID] += 1

                histogram /= len(descriptors) #normalise histogram to frequencies

            histograms.append(histogram)

        return histograms

In [4]:
#Preparing training data
foods = ['Cakes', 'Pasta', 'Pizza']
path = 'FoodImages/'
training_file_names = []
training_food_labels = []

for i in range(0, len(foods)):
    sub_path = path + 'Train/' + foods[i] + '/'
    sub_file_names = [os.path.join(sub_path, f) for f in os.listdir(sub_path)]
    sub_food_labels = [i] * len(sub_file_names) #create a list of N elements, all are i
    training_file_names += sub_file_names
    training_food_labels += sub_food_labels

print(training_file_names)
print(training_food_labels)

['FoodImages/Train/Cakes/cake20.jpg', 'FoodImages/Train/Cakes/cake21.jpg', 'FoodImages/Train/Cakes/cake23.jpg', 'FoodImages/Train/Cakes/cake22.jpg', 'FoodImages/Train/Cakes/cake26.jpg', 'FoodImages/Train/Cakes/cake27.jpg', 'FoodImages/Train/Cakes/cake9.jpg', 'FoodImages/Train/Cakes/cake25.jpg', 'FoodImages/Train/Cakes/cake19.jpg', 'FoodImages/Train/Cakes/cake18.jpg', 'FoodImages/Train/Cakes/cake30.jpg', 'FoodImages/Train/Cakes/cake24.jpg', 'FoodImages/Train/Cakes/cake8.jpg', 'FoodImages/Train/Cakes/cake5.jpg', 'FoodImages/Train/Cakes/cake29.jpg', 'FoodImages/Train/Cakes/cake15.jpg', 'FoodImages/Train/Cakes/cake14.jpg', 'FoodImages/Train/Cakes/cake28.jpg', 'FoodImages/Train/Cakes/cake4.jpg', 'FoodImages/Train/Cakes/cake6.jpg', 'FoodImages/Train/Cakes/cake16.jpg', 'FoodImages/Train/Cakes/cake17.jpg', 'FoodImages/Train/Cakes/cake7.jpg', 'FoodImages/Train/Cakes/cake13.jpg', 'FoodImages/Train/Cakes/cake3.png', 'FoodImages/Train/Cakes/cake2.png', 'FoodImages/Train/Cakes/cake12.jpg', 'FoodIma

In [4]:
num_words = 50
dictionary_name = 'food'
dictionary = Dictionary(dictionary_name, training_file_names, num_words) #Creating an instance

In [5]:
#Calling function for an object
training_word_histograms = dictionary.learn()

In [6]:
#save dictionary
with open('food_dictionary.dic', 'wb') as f: #'wb' is for binary write
    pickle.dump(dictionary, f)

In [7]:
with open('food_dictionary.dic', 'rb') as f: #'rb' is for binary read
    dictionary = pickle.load(f)

### 2) KNN

In [8]:
num_nearest_neighbours = 5 #number of neighbours

knn = KNeighborsClassifier(n_neighbors = num_nearest_neighbours)
knn.fit(training_word_histograms, training_food_labels)

KNeighborsClassifier()

In [9]:
#Prediction of one image
test_file_name = ['FoodImages/Test/Pasta/pasta35.jpg']
word_histogram_img = dictionary.create_word_histograms(test_file_name)
predicted_food_label = knn.predict(word_histogram_img)
print('Food label: ', predicted_food_label)

Food label:  [1]


In [10]:
#Preparing testing data
testing_file_names = []
testing_food_labels = []

for i in range(0, len(foods)):
    sub_path = path + 'Test/' + foods[i] + '/'
    sub_file_names = [os.path.join(sub_path, f) for f in os.listdir(sub_path)]
    
    sub_food_labels = [i] * len(sub_file_names) #create a list of N elements, all are i
    
    testing_file_names += sub_file_names
    testing_food_labels += sub_food_labels

print(testing_file_names)
print(testing_food_labels)

['FoodImages/Test/Cakes/cake43.jpg', 'FoodImages/Test/Cakes/cake57.jpg', 'FoodImages/Test/Cakes/cake56.jpg', 'FoodImages/Test/Cakes/cake42.jpg', 'FoodImages/Test/Cakes/cake54.jpg', 'FoodImages/Test/Cakes/cake40.jpg', 'FoodImages/Test/Cakes/cake41.jpg', 'FoodImages/Test/Cakes/cake55.jpg', 'FoodImages/Test/Cakes/cake51.jpg', 'FoodImages/Test/Cakes/cake45.jpg', 'FoodImages/Test/Cakes/cake44.jpg', 'FoodImages/Test/Cakes/cake50.jpg', 'FoodImages/Test/Cakes/cake46.jpg', 'FoodImages/Test/Cakes/cake52.jpg', 'FoodImages/Test/Cakes/cake53.jpg', 'FoodImages/Test/Cakes/cake47.jpg', 'FoodImages/Test/Cakes/cake34.jpg', 'FoodImages/Test/Cakes/cake35.jpg', 'FoodImages/Test/Cakes/cake37.jpg', 'FoodImages/Test/Cakes/cake36.jpg', 'FoodImages/Test/Cakes/cake32.jpg', 'FoodImages/Test/Cakes/cake33.jpg', 'FoodImages/Test/Cakes/cake31.jpg', 'FoodImages/Test/Cakes/cake38.jpg', 'FoodImages/Test/Cakes/cake39.jpg', 'FoodImages/Test/Cakes/cake49.jpg', 'FoodImages/Test/Cakes/cake60.jpg', 'FoodImages/Test/Cakes/cake

In [11]:
#Prediction for entire folder
word_histograms = dictionary.create_word_histograms(testing_file_names)
predicted_food_labels = knn.predict(word_histograms)

#Evaluation
num_correct_predictions = np.sum(predicted_food_labels == testing_food_labels)
recognition_acc = num_correct_predictions/len(testing_food_labels)
print('Recognition Accuracy:', recognition_acc * 100)

Recognition Accuracy: 74.44444444444444


In [12]:
#Confusion matrix
cm = confusion_matrix(testing_food_labels, predicted_food_labels)
print(cm)

[[18  4  8]
 [ 0 26  4]
 [ 1  6 23]]


In [13]:
#Measuring accuracy of different number of nearest neighbours
num_nearest_neighbours = [5, 10,15,20,25,30]

for k in num_nearest_neighbours:
    #Train a model
    knn = KNeighborsClassifier(n_neighbors = k)
    knn.fit(training_word_histograms, training_food_labels)

    #Prediction
    predicted_food_labels = knn.predict(word_histograms)

    #Evaluation
    num_correct_predictions = np.sum(predicted_food_labels == testing_food_labels)
    recognition_acc_k = num_correct_predictions/len(testing_food_labels)
    print('Neighbours:', k)
    print('Recognition Accuracy:', recognition_acc_k * 100)
    
    #Confusion matrix
    cm = confusion_matrix(testing_food_labels, predicted_food_labels)
    print('Confusion matrix:')
    print(cm)
    print('----------------------------------------')

Neighbours: 5
Recognition Accuracy: 74.44444444444444
Confusion matrix:
[[18  4  8]
 [ 0 26  4]
 [ 1  6 23]]
----------------------------------------
Neighbours: 10
Recognition Accuracy: 71.11111111111111
Confusion matrix:
[[16  6  8]
 [ 0 27  3]
 [ 0  9 21]]
----------------------------------------
Neighbours: 15
Recognition Accuracy: 67.77777777777779
Confusion matrix:
[[15  6  9]
 [ 0 26  4]
 [ 0 10 20]]
----------------------------------------
Neighbours: 20
Recognition Accuracy: 66.66666666666666
Confusion matrix:
[[13  8  9]
 [ 0 26  4]
 [ 0  9 21]]
----------------------------------------
Neighbours: 25
Recognition Accuracy: 65.55555555555556
Confusion matrix:
[[10 10 10]
 [ 0 27  3]
 [ 0  8 22]]
----------------------------------------
Neighbours: 30
Recognition Accuracy: 63.33333333333333
Confusion matrix:
[[ 9  9 12]
 [ 0 27  3]
 [ 0  9 21]]
----------------------------------------


### 3) SVM

In [14]:
#Train a model with C=50
svm_classifier = svm.SVC(C = 50, kernel = 'linear')
svm_classifier.fit(training_word_histograms, training_food_labels)

SVC(C=50, kernel='linear')

In [15]:
predicted_food_labels = svm_classifier.predict(word_histograms)

#Evaluation
num_correct_predictions = np.sum(predicted_food_labels == testing_food_labels)
recognition_acc = num_correct_predictions/len(testing_food_labels)
print('Recognition Accuracy:', recognition_acc * 100)

Recognition Accuracy: 85.55555555555556


In [16]:
#Confusion matrix
cm = confusion_matrix(testing_food_labels, predicted_food_labels)
print(cm)

[[27  2  1]
 [ 0 25  5]
 [ 1  4 25]]


In [17]:
#Vary C in the range [10, 20, 30, 40, 50] and measure the corresponding accuracies. What is the best value for C?

C = [10,20,30,40,50]

for i in C:
    #Train a model
    svm_classifier = svm.SVC(C = i, kernel='linear')
    svm_classifier.fit(training_word_histograms, training_food_labels)

    #Prediction
    predicted_food_labels = svm_classifier.predict(word_histograms)

    #Evaluation
    num_correct_predictions = np.sum(predicted_food_labels == testing_food_labels)
    recognition_acc_i = num_correct_predictions/len(testing_food_labels)
    print('C:', i)
    print('Recognition Accuracy:', recognition_acc_i * 100)
    
    #Confusion matrix
    cm = confusion_matrix(testing_food_labels, predicted_food_labels)
    print('Confusion matrix:')
    print(cm)
    print('----------------------------------------')

C: 10
Recognition Accuracy: 81.11111111111111
Confusion matrix:
[[24  3  3]
 [ 0 22  8]
 [ 1  2 27]]
----------------------------------------
C: 20
Recognition Accuracy: 83.33333333333334
Confusion matrix:
[[26  2  2]
 [ 0 22  8]
 [ 1  2 27]]
----------------------------------------
C: 30
Recognition Accuracy: 82.22222222222221
Confusion matrix:
[[26  2  2]
 [ 0 23  7]
 [ 1  4 25]]
----------------------------------------
C: 40
Recognition Accuracy: 84.44444444444444
Confusion matrix:
[[26  2  2]
 [ 0 25  5]
 [ 1  4 25]]
----------------------------------------
C: 50
Recognition Accuracy: 85.55555555555556
Confusion matrix:
[[27  2  1]
 [ 0 25  5]
 [ 1  4 25]]
----------------------------------------


### 4) AdaBoost

In [18]:
from sklearn.ensemble import AdaBoostClassifier

adb_classifier = AdaBoostClassifier(n_estimators = 150, random_state = 123)
adb_classifier.fit(training_word_histograms, training_food_labels)

AdaBoostClassifier(n_estimators=150, random_state=123)

In [19]:
predicted_food_labels = adb_classifier.predict(word_histograms)

#Evaluation
num_correct_predictions = np.sum(predicted_food_labels == testing_food_labels)
recognition_acc = num_correct_predictions/len(testing_food_labels)
print('Recognition Accuracy:', recognition_acc * 100)

Recognition Accuracy: 71.11111111111111


In [20]:
#Confusion matrix
cm = confusion_matrix(testing_food_labels, predicted_food_labels)
print(cm)

[[19  2  9]
 [ 0 21  9]
 [ 0  6 24]]


In [21]:
#Vary n_estimators in the range [50, 100, 150, 200, 250] and measure accuracies. What is the best value for n_estimators?

n_estimators = [50, 100, 150, 200, 250]

for n in n_estimators:
    #Train a model
    adb_classifier = AdaBoostClassifier(n_estimators = n, random_state = 123)
    adb_classifier.fit(training_word_histograms, training_food_labels)

    #Prediction
    predicted_food_labels = adb_classifier.predict(word_histograms)

    #Evaluation
    num_correct_predictions = np.sum(predicted_food_labels == testing_food_labels)
    recognition_acc_n = num_correct_predictions/len(testing_food_labels)
    print('N-Estimators:', n)
    print('Recognition Accuracy:', recognition_acc_n * 100)
    
    #Confusion matrix
    cm = confusion_matrix(testing_food_labels, predicted_food_labels)
    print('Confusion matrix:')
    print(cm)
    print('----------------------------------------')

N-Estimators: 50
Recognition Accuracy: 71.11111111111111
Confusion matrix:
[[22  0  8]
 [ 0 20 10]
 [ 2  6 22]]
----------------------------------------
N-Estimators: 100
Recognition Accuracy: 72.22222222222221
Confusion matrix:
[[20  1  9]
 [ 0 21  9]
 [ 0  6 24]]
----------------------------------------
N-Estimators: 150
Recognition Accuracy: 71.11111111111111
Confusion matrix:
[[19  2  9]
 [ 0 21  9]
 [ 0  6 24]]
----------------------------------------
N-Estimators: 200
Recognition Accuracy: 71.11111111111111
Confusion matrix:
[[20  2  8]
 [ 0 22  8]
 [ 1  7 22]]
----------------------------------------
N-Estimators: 250
Recognition Accuracy: 75.55555555555556
Confusion matrix:
[[22  3  5]
 [ 0 24  6]
 [ 0  8 22]]
----------------------------------------
