In [3]:
import matplotlib.pyplot as plt
import numpy as np
import glob

In [4]:
#imports for the Gabor filter
from scipy import ndimage as ndi
from skimage.filters import gabor_kernel
from scipy.stats import kurtosis, skew

Steps
1. load Santa and Nosanta images
2. use Gabor filters to keep the number of features constant
3. train test split
4. train  model
5. test model

In [27]:
# first we will define a function that will use Gabor filters to reduce the images to a constant set of features
#define Gabor features
def compute_feats(image, kernels):
    feats = np.zeros((len(kernels), 2), dtype=np.double)
    for k, kernel in enumerate(kernels):
        filtered = ndi.convolve(image, kernel, mode='wrap')
        #feats[k, 0] = filtered.mean()
        #feats[k, 1] = filtered.var()
        feats[k, 0] = kurtosis(np.reshape(filtered,-1))
        feats[k, 1] = skew(np.reshape(filtered,-1))
    return feats

In [28]:
# prepare Gabor filter bank kernels
kernels = []
for sigma in (1,4):
    theta = np.pi
    for frequency in (0.05, 0.25):
        print('theta = {}, sigma = {} frequency = {}'.format(theta, sigma, frequency) )
        kernel = np.real(gabor_kernel(frequency,theta=theta,sigma_x=sigma, sigma_y=sigma))
        kernels.append(kernel)
                         
np.shape(kernels)

theta = 3.141592653589793, sigma = 1 frequency = 0.05
theta = 3.141592653589793, sigma = 1 frequency = 0.25
theta = 3.141592653589793, sigma = 4 frequency = 0.05
theta = 3.141592653589793, sigma = 4 frequency = 0.25


(4,)

In [20]:
#load Santa images into an array
santafolder = './Santa_Claus/'
santa_images = glob.glob('{}*.jpg'.format(santafolder))

In [21]:
#load Santa images into an array
nosantafolder = './nosanta/'
nosanta_images = glob.glob('{}*.jpg'.format(nosantafolder))

Before staring the ML part, there is some preprocessing that needs to be done. The main issue with this dataset is that all the images are of random sizes. To use this as a train/test dataset, we can do two things:

1. Use Convolutional Neural Networks 
2. Use an image feature reduction technique.

We will use 2 in this tutorial (as I do not know how to implement a CNN!). The image reduction technique we will use is Gabor Filters to reduce the images to 8 features. See this post for details: http://scikit-image.org/docs/dev/auto_examples/features_detection/plot_gabor.html

**Note this will take some time!**

In [38]:
santa_feats = np.zeros((len(santa_images),9))
for i, image in enumerate(santa_images):
    im = plt.imread(image,format='jpeg')
    if len(im.shape) > 2:
        imean = im.mean(axis=2)
    else:
        imean = im
    imfeats = compute_feats(imean,kernels).reshape(-1)
    santa_feats[i,:-1] = imfeats 
    santa_feats[i,-1] = 1

In [40]:
nosanta_feats = np.zeros((len(nosanta_images),9))
for i, image in enumerate(nosanta_images):
    im = plt.imread(image,format='jpeg')
    imfeats = compute_feats(im.mean(axis=2),kernels).reshape(-1)
    nosanta_feats[i,:-1] = imfeats 
    nosanta_feats[i,-1] = 0

In [54]:
#combine the datasets
ds = np.concatenate((nosanta_feats,santa_feats), axis=0)

In [105]:
features = ds[:,:-1]

In [106]:
from sklearn.preprocessing import MaxAbsScaler
features = MaxAbsScaler().fit_transform(features)

In [107]:
target = ds[:,-1]

In [108]:
from sklearn.model_selection import train_test_split

In [109]:
X_train, X_test, y_train, y_test = train_test_split(features,target)

In [110]:
print('Training data and target sizes: \n{}, {}'.format(X_train.shape,y_train.shape))
print('Test data and target sizes: \n{}, {}'.format(X_test.shape,y_test.shape))

Training data and target sizes: 
(491, 8), (491,)
Test data and target sizes: 
(164, 8), (164,)


In [111]:
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics

In [146]:
# Create a classifier: a support vector classifier
classifier = svm.SVC(C=1,kernel='rbf',gamma=1)
#fit to the training data
classifier.fit(X_train,y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [147]:
# now to Now predict the value of the digit on the test data
y_pred = classifier.predict(X_test)

In [148]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred))

Confusion matrix:
[[86  5]
 [49 24]]


In [149]:
print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(y_test, y_pred)))

Classification report for classifier SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
             precision    recall  f1-score   support

        0.0       0.64      0.95      0.76        91
        1.0       0.83      0.33      0.47        73

avg / total       0.72      0.67      0.63       164


