# Dogs vs Cats - features

[Kaggle](https://www.kaggle.com/c/dogs-vs-cats)

1 = dog

0 = cat

Notes for report:
    analyse how the variability of nr_features affects
    try different detectors

In [3]:
import cv2
from matplotlib import pyplot as plt
import sklearn
import numpy as np
import pickle as pk
from os import listdir

plt.style.use('ggplot')
%matplotlib inline

In [None]:
NR_WORDS = 500

In [1]:
from os import listdir

def load_images(imgs_paths, gray=False):
    for path in imgs_paths:
        img = cv2.imread(path)
        
        if gray:
            yield cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        else:
            yield cv2.imread(path)

## Features Extraction

#### Features detectors, descriptors and matcher

In [15]:
# SIFT features detector and extractor
sift = cv2.xfeatures2d.SIFT_create()

In [19]:
# FLANN matcher
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50)   # or pass empty dictionary

flann = cv2.FlannBasedMatcher(index_params,search_params)

#### Bag of Words

In [1]:
def train_bow(imgs, detector, matcher, extractor=None):
    if extractor == None:
        extractor = detector
    
    bow_extractor = cv2.BOWImgDescriptorExtractor(extractor, matcher)
    
    vocabulary = pk.load(open('vocabulary', 'rb'))
    
    bow_extractor.setVocabulary(vocabulary)
    
    return bow_extractor

In [38]:
detector = sift
extractor = sift

In [39]:
sift_bow_extractor = train_bow(imgs_paths, detector, flann, extractor=extractor)

In [43]:
target_names = ['dog', 'cat']

## Prediction

In [92]:
pk.load(best_clf, open('best_clf.p', 'rb'))

## Testing

In [125]:
def save_labels_csv(labels):
    indexed_labels = np.concatenate((np.asmatrix(range(1, len(labels) + 1)).transpose(), np.asmatrix(labels)), axis=1)
    
    np.savetxt('result.csv', 
               indexed_labels,
               fmt='%d',
               delimiter=',',
               header='id,label',
               comments='')

In [111]:
test_folder = 'data/test1/'

In [130]:
test_imgs_paths = [test_folder + filepath for filepath in listdir(test_folder)]

In [131]:
test_imgs_paths = test_imgs_paths[:10]

In [132]:
pred = []

test_imgs = load_images(test_imgs_paths, gray=True)

for img in test_imgs:
    
    kp = detector.detect(img)
    img_features = sift_bow_extractor.compute(img, kp)
    
    p = best_clf.predict(img_features)
    
    pred.append(p)

In [133]:
save_labels_csv(pred)

[[ 1  1]
 [ 2  1]
 [ 3  1]
 [ 4  1]
 [ 5  1]
 [ 6  1]
 [ 7  1]
 [ 8  1]
 [ 9  1]
 [10  1]]
