In [53]:
import cv2
import numpy as np
import cPickle
from sklearn.cluster import MiniBatchKMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import time
from matplotlib import pyplot as plt

Let us first read the train and test files

In [54]:
train_images_filenames = cPickle.load(open('train_images_filenames.dat','r'))
test_images_filenames = cPickle.load(open('test_images_filenames.dat','r'))
train_labels = cPickle.load(open('train_labels.dat','r'))
test_labels = cPickle.load(open('test_labels.dat','r'))

In [55]:
train_images_filenames[12]

'../../Databases/MIT_split/train/Opencountry/fie26.jpg'

We create a SIFT object detector and descriptor

In [56]:
SIFTdetector = cv2.xfeatures2d.SIFT_create(nfeatures=900)
start = time.time()

We compute the SIFT descriptors for all the train images and subsequently build a numpy array with all the descriptors stacked together

In [None]:
Train_descriptors = []
Train_label_per_descriptor = []

for filename,labels in zip(train_images_filenames,train_labels):
    ima=cv2.imread(filename)
    gray=cv2.cvtColor(ima,cv2.COLOR_BGR2GRAY)
    
#     step = 40
#     kpt = []
    
#     for i in xrange(step/2, ima.shape[0], step):
#         for j in xrange(step/2, ima.shape[1], step):
#             kpt.append(cv2.KeyPoint(i, j, step))
     
#     kpt,des=SIFTdetector.compute(gray,kpt)

    kpt,des=SIFTdetector.detectAndCompute(gray,None)

#     outImage = ima
#     outImage = cv2.drawKeypoints(ima, kpt, outImage)
#     cv2.imshow("Image", outImage)
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()
    
    Train_descriptors.append(des)
    Train_label_per_descriptor.append(labels)

D=np.vstack(Train_descriptors)

We now compute a k-means clustering on the descriptor space

In [None]:
k = 170
codebook = MiniBatchKMeans(n_clusters=k, verbose=False, batch_size=k * 20,compute_labels=False,reassignment_ratio=10**-4,random_state=42)
codebook.fit(D)

And, for each train image, we project each keypoint descriptor to its closest visual word. We represent each of the images with the frequency of each visual word.

In [None]:
visual_words=np.zeros((len(Train_descriptors),k),dtype=np.float32)
for i in xrange(len(Train_descriptors)):
    words=codebook.predict(Train_descriptors[i])
    visual_words[i,:]=np.bincount(words,minlength=k)

We build a k-nn classifier and train it with the train descriptors

In [None]:
knn = KNeighborsClassifier(n_neighbors=5,n_jobs=-1,metric='euclidean')
knn.fit(visual_words, train_labels) 

We end up computing the test descriptors and compute the accuracy of the model

In [None]:
visual_words_test=np.zeros((len(test_images_filenames),k),dtype=np.float32)
for i in range(len(test_images_filenames)):
    filename=test_images_filenames[i]
    ima=cv2.imread(filename)
    gray=cv2.cvtColor(ima,cv2.COLOR_BGR2GRAY)
        
#     step = 40
#     kpt = []
    
#     for i in xrange(step/2, ima.shape[0], step):
#         for j in xrange(step/2, ima.shape[1], step):
#             kpt.append(cv2.KeyPoint(i, j, step))
     
#     kpt,des=SIFTdetector.compute(gray,kpt)
    
    kpt,des=SIFTdetector.detectAndCompute(gray,None)
    words=codebook.predict(des)
    visual_words_test[i,:]=np.bincount(words,minlength=k)

In [None]:
accuracy = 100*knn.score(visual_words_test, test_labels)
print(accuracy)
end = time.time()
print(end - start)

Dimensionality reduction, with PCA and LDA

In [None]:
pca = PCA(n_components=64)
VWpca = pca.fit_transform(visual_words)
knnpca = KNeighborsClassifier(n_neighbors=5,n_jobs=-1,metric='euclidean')
knnpca.fit(VWpca, train_labels) 
vwtestpca = pca.transform(visual_words_test)
accuracy = 100*knnpca.score(vwtestpca, test_labels)
print(accuracy)

In [None]:
lda = LinearDiscriminantAnalysis(n_components=64)
VWlda = lda.fit_transform(visual_words,train_labels)
knnlda = KNeighborsClassifier(n_neighbors=5,n_jobs=-1,metric='euclidean')
knnlda.fit(VWlda, train_labels) 
vwtestlda = lda.transform(visual_words_test)
accuracy = 100*knnlda.score(vwtestlda, test_labels)
print(accuracy)

**RESULTS AND DISCUSSION**

We test different amount of local features.
We keep the rest of the parameters with initial values (SIFT, Codebook size k = 128, neighbours k = 5, euclidean distance, no reduction)

In [None]:
number = [10, 100, 150, 200, 250, 300, 350, 400, 500, 600, 700, 800, 850, 900, 950, 1000, 1200, 1400]
accuracy = [22.18, 39.03, 47.21, 52.42, 53.53, 52.42, 55.64, 57.5, 54.77, 58.24, 56.88, 58.11, 57.74, 61.34, 57.49, 58.99, 58.36, 58.49]
plt.scatter(number, accuracy)
plt.plot(number[13], accuracy[13], 'g*')
plt.ylim((0, 100))
plt.xlabel("Number of features")
plt.ylabel("Accuracy (%)")
plt.show()

In [None]:
time = [53, 53, 53, 54, 57, 57, 59, 61, 63, 63, 63, 64, 64, 64, 65, 65, 63, 63]
plt.scatter(number, time)
plt.ylim((0, 70))
plt.xlabel("Number of features")
plt.ylabel("Computation time (s)")
plt.show()

We got the maximum accuracy with 900 features. The computation time is very similar using different number of features. Therefore, we conclude that, using SIFT feature detector, Codebook size 128, 5 k-nn neighbours, euclidean distance and no reduction, the best performance is with approximate 900 features.

We now use dense SIFT instead of detected keypoints. This way, the detected keypoints are equally distributed in the image and not only in the interesting points. There is a parameter (step size) we can vary to create different distributions. We again keep the rest of the parameters like before.

In [None]:
print("Example of dense SIFT")
filename=train_images_filenames[1]
ima=cv2.imread(filename)
gray=cv2.cvtColor(ima,cv2.COLOR_BGR2GRAY)
step = 10
kpt = []
    
for i in xrange(step/2, ima.shape[0], step):
    for j in xrange(step/2, ima.shape[1], step):
        kpt.append(cv2.KeyPoint(i, j, step))
     
kpt,des=SIFTdetector.compute(gray,kpt)

keypointsIma = ima
keypointsIma = cv2.drawKeypoints(ima, kpt, keypointsIma)
plt.imshow(keypointsIma)
plt.title('Image with dense keypoints')
plt.show()

In [None]:
step = [5, 10, 15, 20, 40, 60, 100]
accuracy = [9.91, 9.91, 9.91, 9.91, 14.62, 14.62, 11.64]
plt.scatter(step, accuracy)
plt.ylim((0, 100))
plt.xlabel("Step size")
plt.ylabel("Accuracy (%)")
plt.show()

Dense SIFT keypoint detector produces much worse results than normal SIFT keypoint detector because the keypoints are not only in the interesting regions.