# Dogs vs Cats - features

[Kaggle](https://www.kaggle.com/c/dogs-vs-cats)

1 = dog

0 = cat

In [71]:
import cv2
from matplotlib import pyplot as plt
import sklearn
import numpy as np
import pickle as pk

plt.style.use('ggplot')
%matplotlib inline

### Load training dataset

In [11]:
train_folder = 'data/train'

In [47]:
imgs_paths = [train_folder + '/' + filepath for filepath in listdir(train_folder)]

In [48]:
# select a subset
imgs_paths = imgs_paths[:100]

In [76]:
from os import listdir

def load_images(imgs_paths, gray=False):
    for path in imgs_paths:
        img = cv2.imread(path)
        
        if gray:
            yield cv2.imread(path, iscolor=cv2.CV_LOAD_IMAGE_GRAYSCALE)
        else:
            yield cv2.imread(path)

In [77]:
labels = [1 if "dog" in path else 0 for path in imgs_paths]

In [78]:
print('Nr dogs:', labels.count(1))

Nr dogs: 52


In [79]:
print('Nr cats:', labels.count(0))

Nr cats: 48


### Features Extraction

In [80]:
# SIFT features detector and extractor
sift = cv2.xfeatures2d.SIFT_create()

In [81]:
# SURF features detector and extractor
surf = cv2.xfeatures2d.SURF_create()

In [82]:
# FAST features detector
fast = cv2.FastFeatureDetector_create()

In [83]:
# BRISK descriptors extractor
br = cv2.BRISK_create()

In [84]:
# FLANN matcher
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50)   # or pass empty dictionary

flann = cv2.FlannBasedMatcher(index_params,search_params)

In [91]:
def get_descriptors(detector, extractor=None):
    imgs = load_images(imgs_paths, gray=True)
    for img in imgs:
        if extractor == None:
            yield detector(img, None)
        else:
            kp = detector(img, None)
            yield extractor(img, kp)

In [92]:
imgs_sift_des = get_descriptors(sift.detectAndCompute)

In [93]:
imgs_surf_des =  get_descriptors(surf.detectAndCompute)

In [95]:
imgs_fast_des = get_descriptors(detector=fast.detect, extractor=br.compute)