In [1]:
import matplotlib.pyplot as plt
import numpy as np
import glob

In [2]:
#imports for the Gabor filter
from scipy import ndimage as ndi
from skimage.filters import gabor_kernel
from scipy.stats import kurtosis, skew

Steps
1. load "zebra" and "not zebra" images
2. use Gabor filters to keep the number of features constant
3. train test split
4. train  model
5. test model

In [3]:
# first we will define a function that will use Gabor filters to reduce the images to a constant set of features
#define Gabor features
def compute_feats(image, kernels):
    feats = np.zeros((len(kernels), 2), dtype=np.double)
    for k, kernel in enumerate(kernels):
        filtered = ndi.convolve(image, kernel, mode='wrap')
        #feats[k, 0] = filtered.mean()
        #feats[k, 1] = filtered.var()
        feats[k, 0] = kurtosis(np.reshape(filtered,-1))
        feats[k, 1] = skew(np.reshape(filtered,-1))
    return feats

In [4]:
# prepare Gabor filter bank kernels
kernels = []
for sigma in (1,4):
    theta = np.pi
    for frequency in (0.05, 0.25):
        print('theta = {}, sigma = {} frequency = {}'.format(theta, sigma, frequency) )
        kernel = np.real(gabor_kernel(frequency,theta=theta,sigma_x=sigma, sigma_y=sigma))
        kernels.append(kernel)
                         
np.shape(kernels)

theta = 3.141592653589793, sigma = 1 frequency = 0.05
theta = 3.141592653589793, sigma = 1 frequency = 0.25
theta = 3.141592653589793, sigma = 4 frequency = 0.05
theta = 3.141592653589793, sigma = 4 frequency = 0.25


(4,)

In [5]:
#load Santa images into an array
zebrafolder = './zebra/'
zebra_images = glob.glob('{}*.jpg'.format(zebrafolder))

In [6]:
#load Santa images into an array
nozebrafolder = './nozebra/'
nozebra_images = glob.glob('{}*.jpg'.format(nozebrafolder))

Before staring the ML part, there is some preprocessing that needs to be done. The main issue with this dataset is that all the images are of random sizes. To use this as a train/test dataset, we can do two things:

1. Use Convolutional Neural Networks 
2. Use an image feature reduction technique.

Here we're going to use Method 2. The image reduction technique we will use is Gabor Filters to reduce the images to 8 features. 

**Note this next cell will take some time to run!**

In [7]:
zebra_feats = np.zeros((len(zebra_images),9))
for i, image in enumerate(zebra_images):
    im = plt.imread(image,format='jpeg')
    if len(im.shape) > 2:
        imean = im.mean(axis=2)
    else:
        imean = im
    imfeats = compute_feats(imean,kernels).reshape(-1)
    zebra_feats[i,:-1] = imfeats 
    zebra_feats[i,-1] = 1

In [8]:
nozebra_feats = np.zeros((len(nozebra_images),9))
for i, image in enumerate(nozebra_images):
    im = plt.imread(image,format='jpeg')
    imfeats = compute_feats(im.mean(axis=2),kernels).reshape(-1)
    nozebra_feats[i,:-1] = imfeats 
    nozebra_feats[i,-1] = 0

In [9]:
#combine the datasets
ds = np.concatenate((nozebra_feats,zebra_feats), axis=0)

In [10]:
features = ds[:,:-1]

In [11]:
from sklearn.preprocessing import MaxAbsScaler
features = MaxAbsScaler().fit_transform(features)

In [12]:
target = ds[:,-1]

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(features,target)

In [15]:
print('Training data and target sizes: \n{}, {}'.format(X_train.shape,y_train.shape))
print('Test data and target sizes: \n{}, {}'.format(X_test.shape,y_test.shape))

Training data and target sizes: 
(552, 8), (552,)
Test data and target sizes: 
(185, 8), (185,)


In [16]:
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics

In [17]:
# Create a classifier: a support vector classifier
classifier = svm.SVC(C=1,kernel='rbf',gamma=1)
#fit to the training data
classifier.fit(X_train,y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [18]:
# now to Now predict the value of the digit on the test data
y_pred = classifier.predict(X_test)

In [19]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred))

Confusion matrix:
[[64 37]
 [29 55]]


In [20]:
print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(y_test, y_pred)))

Classification report for classifier SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
              precision    recall  f1-score   support

         0.0       0.69      0.63      0.66       101
         1.0       0.60      0.65      0.62        84

   micro avg       0.64      0.64      0.64       185
   macro avg       0.64      0.64      0.64       185
weighted avg       0.65      0.64      0.64       185


