In [17]:
import numpy as np
import scipy.misc
import cv2
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
import scipy.misc
import os
import warnings
warnings.filterwarnings("ignore")

In [18]:
cascPath = "haarcascade_frontalface_default.xml"
# Create the haar cascade
faceCascade = cv2.CascadeClassifier(cascPath)


In [19]:
## read the dataset
full_face = []
full_labels = []
all_images = os.listdir('yalefaces')
for j in range(1,11):    # there are 15 different people in the dataset
    img_list = [filename for filename in all_images if filename.startswith('subject%.2d'%j)]
#     img_list = glob.glob('yalefaces/subject%.2d*'%j)
    face1 = np.zeros((len(img_list),64,64))
    for k,img in enumerate(img_list):
        I = scipy.misc.imread(os.path.join('yalefaces',img))
        I = cv2.fastNlMeansDenoising(I, None, 9, 13)
        I = cv2.GaussianBlur(I, (5,5), 0)
        I = cv2.equalizeHist(I)
        faces = faceCascade.detectMultiScale(
            I,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(30, 30))
        x,y,h,w = faces[0]
        face1[k,...] = zoom(I[y:y+h,x:x+w],[64./h,64./w])
        
    labels = (j-1)*np.ones((11,))
    full_face.append(face1)
    full_labels.append(labels)
full_face = np.stack(full_face)    # contains the images of all faces
full_labels = np.stack(full_labels)   # contains the ID of all the faces
full_face = np.stack(full_face)    # contains the images of all faces
full_labels = np.stack(full_labels)   # contains the ID of all the faces

In [20]:
full_face = full_face/255 #normalise data

In [21]:
# divide the data into train and test
# pick 2 images from each dataset and put it into test set
train_set = np.zeros((full_face.shape[0],9,64,64))
test_set = np.zeros((full_face.shape[0],2,64,64))
train_labels = np.zeros((full_face.shape[0],9))
test_labels = np.zeros((full_face.shape[0],2))
for k in range(full_face.shape[0]):
    g = np.random.permutation(11)   # randomly select the 9 faces
    train_set[k,...] = full_face[k,g[:9],...]
    train_labels[k,...] = full_labels[k,g[:9]]
    test_set = full_face[:,g[9:],...]
    test_labels[k,...] = full_labels[k,g[9:]]

In [22]:
from skimage.feature import local_binary_pattern
train_set = np.reshape(train_set,[-1,64,64])
train_labels = np.reshape(train_labels,[-1,])
test_set = np.reshape(test_set,[-1,64,64])
test_labels = np.reshape(test_labels,[-1,])
train_feat = np.zeros_like(train_set)
test_feat = np.zeros_like(test_set)

for k in range(train_feat.shape[0]):
        train_feat[k,...] = local_binary_pattern(train_set[k,...],8,3)#,method='uniform')
for k in range(test_feat.shape[0]):
        test_feat[k,...] = local_binary_pattern(test_set[k,...],8,3)#,method='uniform')
print(np.amax(test_feat))

255.0


In [23]:
# set the number of grids in x and y direction
# divide the image into the specified number of grids
# for each grid compute the histogram with 64 bins
# concatenate the histogram from all grid regions to form a single feature
# now we have one feature vector per image
# Note that grid size and histogram bin sizes are variable
x_grid = 7
y_grid = 7
grid_size_x = train_feat.shape[1]//x_grid
grid_size_y = train_feat.shape[2]//y_grid
for k in range(train_feat.shape[0]):
    for x in range(x_grid):
        for y in range(y_grid):
            patch = train_feat[k,x*grid_size_x:(x+1)*grid_size_x,y*grid_size_y:(y+1)*grid_size_y]
            hist,_ = np.histogram(patch.reshape(-1),bins=64)
            if x==0 and y==0:
                patch_hist_list = hist
            else:
                patch_hist_list = np.concatenate([patch_hist_list,hist],0)
    if k==0:
        data_hist = patch_hist_list[np.newaxis,...]
    else:
        data_hist = np.concatenate([data_hist,patch_hist_list[np.newaxis,...]],0)
print(data_hist.shape)

grid_size_x = test_feat.shape[1]//x_grid
grid_size_y = test_feat.shape[2]//y_grid

for k in range(test_feat.shape[0]):
    for x in range(x_grid):
        for y in range(y_grid):
            patch = test_feat[k,x*grid_size_x:(x+1)*grid_size_x,y*grid_size_y:(y+1)*grid_size_y]
            hist,_ = np.histogram(patch.reshape(-1),bins=64)
            if x==0 and y==0:
                patch_hist_list = hist
            else:
                patch_hist_list = np.concatenate([patch_hist_list,hist],0)
    if k==0:
        test_data_hist = patch_hist_list[np.newaxis,...]
    else:
        test_data_hist = np.concatenate([test_data_hist,patch_hist_list[np.newaxis,...]],0)
print(test_data_hist.shape)

(90, 3136)
(20, 3136)


In [24]:
from sklearn.decomposition import PCA
pca = PCA(whiten=True)
train_feat = data_hist
test_feat = test_data_hist
print(train_feat.shape,test_feat.shape)
pca.fit(train_feat)

(90, 3136) (20, 3136)


PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=True)

In [25]:
# select number of components which explain 99% of the data variance
n_components = np.argmin(np.cumsum(pca.explained_variance_ratio_)<=0.99)
print(n_components)
train_feat_pca = pca.transform(train_feat)
train_feat_pca = train_feat_pca[:,:n_components]
test_feat_pca = pca.transform(test_feat)
test_feat_pca = test_feat_pca[:,:n_components]
print(train_feat_pca.shape)

79
(90, 79)


In [26]:
import itertools
img_pair = []
img_full_pair = []
label_pair = []
for pair in itertools.combinations_with_replacement(range(train_feat_pca.shape[0]),2):
    img_pair.append(train_feat_pca[pair,:])
    img_full_pair.append(train_set[pair,:])
    label_pair.append(train_labels[pair,])
img_pair = np.stack(img_pair)
img_full_pair = np.stack(img_full_pair)
label_pair = np.stack(label_pair)
label_pair = (label_pair[:,0] == label_pair[:,1])
label_pair = label_pair*1

In [27]:
test_img_pair = []
test_label_pair = []
for pair in itertools.combinations_with_replacement(range(test_feat.shape[0]),2):
    test_img_pair.append(test_feat_pca[pair,...])
    test_label_pair.append(test_labels[pair,])
test_img_pair = np.stack(test_img_pair)
test_label_pair = np.stack(test_label_pair)
test_label_pair = (test_label_pair[:,0] == test_label_pair[:,1])
test_label_pair = test_label_pair*1

In [28]:
img_pair = np.abs(img_pair[:,0,...] - img_pair[:,1,...])
test_img_pair = np.abs(test_img_pair[:,0,...] - test_img_pair[:,1,...])

In [29]:
# shuffle the training data
g = np.random.permutation(img_pair.shape[0])
img_pair = img_pair[g,:]
label_pair = label_pair[g,]




In [43]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(8, 10), random_state=1)
clf.fit(img_pair,label_pair)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(8, 10), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

In [44]:
print(clf.score(test_img_pair,test_label_pair))

0.9333333333333333


In [45]:
clf=DecisionTreeClassifier()
clf.fit(img_pair,label_pair)
print(clf.score(test_img_pair,test_label_pair))

0.9619047619047619


In [46]:
from sklearn.linear_model import LogisticRegression
clf=LogisticRegression()
clf.fit(img_pair,label_pair)
print(clf.score(test_img_pair,test_label_pair))

0.9476190476190476


In [47]:
from sklearn.svm import SVC
clf=SVC(kernel='rbf')
clf.fit(img_pair,label_pair)
print(clf.score(test_img_pair,test_label_pair))

0.8904761904761904


In [48]:
from sklearn.ensemble import RandomForestClassifier
clf=RandomForestClassifier()
clf.fit(img_pair,label_pair)
print(clf.score(test_img_pair,test_label_pair))

0.9761904761904762
