In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
from sklearn.decomposition import PCA
from scipy.spatial import distance
from PIL import Image
import os

The pictures that I used were of the band the xx. The reasoning behind this was that photographs of the band and its individual members were widely available on the internet. These images show a diverse set of backgrounds and facial expressions which allow for data training to be done successfully. In addition to finding the images, I attempted to crop them so that the faces were oriented the same way in the frame with the eyes and mouth being situated in the same region so that it would be easy to perform facial recognition on them.

In [2]:
def computeNearestNeighborNormalImages(images):
    #the total number of correct we find in the end
    totalCorrect = 0
    #peform for each of the images
    for image in images:
        #set the nearest distance to something outrageous and have the nearest image be empty
        nearestDistance = 100000000
        nearestFace = ""
        
        #for each of the images that is not the current image itself, peform a euclidean distance calculation
        #store the closest neighbor 
        for currentImage in images:
            if (image == currentImage):
                continue
            else:
                dist = np.abs(np.linalg.norm(image[0]-currentImage[0]))
                if (dist < nearestDistance):
                    nearestDistance = dist
                    nearestFace = currentImage[1]
        #if the name we recieve as the closest neighbor is the same as the image we are testing on we are correct
        if(str(image[1]) == nearestFace):
            totalCorrect = totalCorrect+1
    print(totalCorrect, "out of 30 are correct")

In [3]:
def computeNearestNeighborEigenvectors(vector,eigenVectors):

    #set the nearest distance to something outrageous and have the nearest vector be empty
    nearestDistance = 100000000
    nearestFace = ""

    #for each of the vectors that is not the current vector itself, peform a euclidean distance calculation
    #store the closest neighbor 
    for currectVector in eigenVectors:
        if (vector == currectVector):
            continue
        else:
            dist = distance.euclidean(vector[0], currectVector[0])
            if (dist < nearestDistance):
                nearestDistance = dist
                nearestFace = currectVector[1]
    #if the name we recieve as the closest neighbor is the same as the vector we are testing on we are correct
    if(str(vector[1]) == nearestFace):
        return 1
    else:
        return 0
    
    

In [4]:
#used to create a sliding window which can run through the image to test whether there is a face in each position
def slidingWindow(image, sizeOfStep, sizeOfWindow):
    #for each posible window position within the given range, yeild just the part of the image inside the window
    for y in range(0, image.shape[0], sizeOfStep):
        for x in range(0, image.shape[1], sizeOfStep):
            yield (x, y, image[y:y + sizeOfWindow[1], x:x + sizeOfWindow[0]])

In [5]:
def projectOntoEigenspace(image, mean, eigenvectors):
    # Project our image into the eigenspace (eigenvectors)
    image = np.array(image).flatten()
    
    # Normalize by substracting the average face
    image = image - mean
    image = np.reshape(image, (mean.shape[0], 1))
    
    # Projecting the image into the eigenspace
    projectedImage = eigenvectors.transpose() * image
    
    return projectedImage

In [6]:
def distance_eigenspace(window, mean, weights, eigenvectors):
    
    projectedImage = projectOntoEigenspace(window, mean, eigenvectors)
    
    # return the twice normalized value of the weights minus the window on the eigenspace
    norms = np.linalg.norm(np.linalg.norm(weights - projectedImage, axis=0), axis=0)
    
    return norms

In [7]:
def cropWindow(image, startY, startX, sizeY, sizeX):
    y,x = image.shape
    return image[startY:startY+sizeY,startX:startX+sizeX]

In [8]:
def classifyImage(image,mean, eigenvectors, weights):
    
    #find the image projected onto the eigenspace
    projectedImage = projectOntoEigenspace(image, mean, eigenvectors) 
    
    #find the difference between the importance of the eigenvector and the newly found projected image
    diff = weights - projectedImage
    
    #normalize the difference
    norms = np.linalg.norm(diff, axis=0)
    
    closestFaces = []
    
    #find the value representing the closest face
    for i in range(0, 20):
        closestFace = np.argmin(norms)
        closestFaces.append(closestFace)
        norms[closestFace] = 100000000
    
    print(closestFaces)
    return(closestFaces[0])

In [9]:
def obtainData():
    #obtain the file list for each of the sets of images
    #create a numpy array of each of the sets of images then create one which has the images flattened
    filelist = glob.glob('training_data/images1/*.*')

    x = np.array([np.array(cv2.imread(fname,0), dtype='float64') for fname in filelist])

    numImages1 = x.shape[0]
    sizeImages1 = x.shape[1]*x.shape[2]

    trainData1 = x.reshape([numImages1,sizeImages1])


    filelist2 = glob.glob('training_data/images2/*.*')
    y = np.array([np.array(cv2.imread(fname,0), dtype='float64') for fname in filelist2])

    numImages2 = y.shape[0]
    sizeImages2 = y.shape[1]*y.shape[2]

    trainData2 = y.reshape([numImages2,sizeImages2])


    filelist3 = glob.glob('training_data/images3/*.*')
    z = np.array([np.array(cv2.imread(fname,0), dtype='float64') for fname in filelist3])

    numImages3 = z.shape[0]
    sizeImages3 = z.shape[1]*z.shape[2]

    trainData3 = z.reshape([numImages3,sizeImages3])

    #create an array with all of the flattened images together in one
    totalData = np.swapaxes(np.concatenate((trainData1, trainData2, trainData3)),0,1)
    return(totalData)

In [12]:
#obtain the file list for each of the sets of images
#create a numpy array of each of the sets of images then create one which has the images flattened
filelist = glob.glob('training_data/images1/*.*')

x = np.array([np.array(cv2.imread(fname,0), dtype='float64') for fname in filelist])

numImages1 = x.shape[0]
print(x.shape)
sizeImages1 = x.shape[1]*x.shape[2]

trainData1 = x.reshape([numImages1,sizeImages1])


filelist2 = glob.glob('training_data/images2/*.*')
y = np.array([np.array(cv2.imread(fname,0), dtype='float64') for fname in filelist2])

numImages2 = y.shape[0]
sizeImages2 = y.shape[1]*y.shape[2]

trainData2 = y.reshape([numImages2,sizeImages2])


filelist3 = glob.glob('training_data/images3/*.*')
z = np.array([np.array(cv2.imread(fname,0), dtype='float64') for fname in filelist3])

numImages3 = z.shape[0]
sizeImages3 = z.shape[1]*z.shape[2]

trainData3 = z.reshape([numImages3,sizeImages3])

#create an array with all of the flattened images together in one
totalData = np.swapaxes(np.concatenate((trainData1, trainData2, trainData3)),0,1)


#find the pca representation all all three sets of training data and then transform the images using pca.transform
pca1 = PCA(n_components=10, svd_solver='randomized',whiten=True).fit(trainData1)
pca2 = PCA(n_components=10, svd_solver='randomized',whiten=True).fit(trainData2)
pca3 = PCA(n_components=10, svd_solver='randomized',whiten=True).fit(trainData2)



#plot each of the training data's explained variance ratios to show how many components we will need for accurate eigenfaces
plt.plot(np.cumsum(pca1.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
plt.show()

plt.plot(np.cumsum(pca2.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
plt.show()

plt.plot(np.cumsum(pca3.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
plt.show()

(10,)


IndexError: tuple index out of range

Clearly, as can be seen by the plot above, you do not need all of the vectors to represent the data as the cumulative variance reached 1 much sooner than the total number of vectors of the data. In addition, it can be seen that a very large percentage of the information is found within the first couple of vectors, with each additional vector providing less and less information. This is shown in the graphs for all three of the sets of images.

## Question 2.2 Nearest Neighbors

In [None]:
#create a list of all of the images together and then add labels to each of them to say which set they belong to
images = []

for image in x:
    images.append([image, "Jamie"])
for image in y:
    images.append([image, "Oliver"])
for vector in z:
    images.append([image, "Romy"])
#compute the l2 distance of all the images and output the ratio of images whose nearest neighbor is the same person
computeNearestNeighborNormalImages(np.array(images))

In [None]:
#compute the means and eigenvectors off all of the images
mean1, eigenvectors1 = cv2.PCACompute(trainData1, mean=None)
mean2, eigenvectors2 = cv2.PCACompute(trainData2, mean=None)
mean3, eigenvectors3 = cv2.PCACompute(trainData3, mean=None)

#create a list of all of the eigenvectors together and then add labels to each of them to say which set they belong to
eigenvectors = []
for vector in eigenvectors1:
    eigenvectors.append([vector, "Jamie"])
for vector in eigenvectors2:
    eigenvectors.append([vector, "Oliver"])
for vector in eigenvectors3:
    eigenvectors.append([vector, "Romy"])

    
#the total number of correct we find in the end
totalCorrect = 0
#convert eigenvectors to numpy array
eigenvectorArray = np.array(eigenvectors)

#compute the l2 distance of all the eigenvectors and output the ratio of eigenvectors whose nearest neighbor is the same person
for vector in eigenvectorArray:
    val = computeNearestNeighborEigenvectors(vector, eigenvectors)
    if val == 1:
        totalCorrect = totalCorrect+1


print(totalCorrect, "out of 30 are correct")

#### Discussion
One might be able to expect an improvement in the amount of images which are correctly mapped to their give set. This would be because the eigenvector representation will mitigate the effects that things like the backgroud of an image will have on the difference between the two images. For example, in the case where two images of different sets have the exact same background, in the normal image representation they are likely to have each other as nearest neighbors. However, in the eigenspace, these types of differences will impact the nearest neighbors calculation far less than in the normal space leading to a higher likelihood of correct nearest neighbors as is show above where in the original space only 20/30 were correct and in the eigenspace all 30 out of 30 were correct.

## Question 3 Face Detector and Classifier

In [None]:
#reobtain the data - I was having some issues with this so I'm just doing it again here
totalData = obtainData()

# Calculate the average face
meanFace = np.sum(totalData, axis=1) 
meanFace = meanFace / 30

# Normalize the dataset's images by substraction the average face
for j in range(0, 30):
    totalData[:, j] -= meanFace[:]

# Find the normalized covariance matrix of the data 
dataTrans= np.matrix(totalData.transpose())
cov_matrix = dataTrans * np.matrix(totalData)
cov_matrix = cov_matrix / 30

# Find the eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Sort the eigenvalues and eigenvectors in order of the size of the eigenvalue
# eigenvectors should be sorted to correspond with eigenvalues
sort_indices = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[sort_indices]
eigenvectors = eigenvectors[:,sort_indices]

#find the total value of all of the eigenvalues
eigenvalues_sum = 0
for i in range(0,len(eigenvalues)):
    eigenvalues_sum = eigenvalues_sum + eigenvalues[i]


enumEigenvalues = 0
totalVariance = 0.0

#find the number of eigenvalues needed to obtain 85% of the variance
for i in range (0, len(eigenvalues)):
    totalVariance = totalVariance + eigenvalues[i]/ eigenvalues_sum
    if totalVariance >= 0.85:
        enumEigenvalues = i+1
        break

#take only the eigenvectors which are pertinant
importantEigenvectors = eigenvectors[:,0:enumEigenvalues]


importantEigenvectors = totalData * importantEigenvectors
norms = np.linalg.norm(importantEigenvectors, axis=0)
importantEigenvectors = importantEigenvectors / norms

eigenTranspose = importantEigenvectors.transpose()
weights =  eigenTranspose * totalData

image = np.array(cv2.imread("sherlock_cast.png",0), dtype='float64')
faceDetectImage = image.copy()
imageList = []

for (x, y, window) in slidingWindow(image, sizeOfStep=32, sizeOfWindow=(imageShape)):
    # if the window does not meet our desired window size, ignore it
    if window.shape[0] != winH or window.shape[1] != winW:
        continue
        
    # find the similarity between the current window and our eigenspace for faces
    faceLikelihood = distance_eigenspace(window, meanFace, weights, importantEigenvectors)
    
    #set the threshold so that only faces come through from the data
    if (faceLikelihood < 56500):
        cv2.rectangle(faceDetectImage, (x, y), (x + winW, y + winH), (0,0,255), 2)
        window = cropWindow(image,y,x,winH,winW)
        imageList.append(np.array(window.tolist()))

for i in range(0,len(imageList)):
    plt.figure(figsize = (5,5))
    plt.subplot(111)
    plt.xticks([]), plt.yticks([])
    plt.imshow(imageList[i], cmap="gray")
    imageClassification = classifyImage(imageList[i],meanFace,importantEigenvectors,weights)
    if(imageClassification < 10):
        print('image classified as jamie')
    elif(imageClassification < 20):
        print('image classified as oliver')
    else:
        print('image classified as romy')
        
plt.figure(figsize = (10,10))
plt.title("Detected Faces"), plt.xticks([]), plt.yticks([])
plt.imshow(faceDetectImage, cmap="gray")

The method as a whole works somewhat well. By in large, it provides the correct person as the closest neighbour about 1/2 of the time (this scales as well when you set the threshold to a higher number and thus have more faces to work with) which is a higher rate than one would expect for random given three people. However, This is also not exactly a great face classification as the faces that are being provided are generally well centered and thus one might expect a higher success rate than 1/2. 

With regards to false positives for the facial detection parts, there are no "false positives" as all of the images under the threshold are in fact faces. However, none of the images are perfectly centered and there are two found for one of the three people in the photo. In addition, as will be again touched on later, the threshold is set based on the image itself and thus must be recalibrated for the specific image that is being worked on. For this reason, it is somewhat hard to guage "false positives" and more simple to say that the algoritm will generally detect faces in the frame but will not necesarily provide the best version of the faces and may provide each face more than once.

There are two mis-identified faces when working with the current threshold. The first and fourth detected faces are in face correct. The second face will take four tries to find the correct nearest neighbor and the third will take 13 which is very far off. 

The largest problem that I see regarding this algorithm is that it does not account for the different sizes of peoples head's in the frame. If one were to more accurately cut out only the faces of the people in the frame it would likely remove much of the error as was seen from the nearest neighbor calculations done on the test data. One would expect this to cause a large bit of the error as the well centered faces seem to be well mapped using the nearest neighbor calculation

## Question 4

In [None]:
# Get OpenCV datapath
data_path = cv2.data.haarcascades
face_cascade = cv2.CascadeClassifier( os.path.join(data_path, 'haarcascade_frontalface_default.xml'))
eye_cascade = cv2.CascadeClassifier( os.path.join(data_path, 'haarcascade_eye.xml'))

img = cv2.imread('TestImage.jpeg')
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)


#uses Viola-Jones implementation to detect faces in the test image that was input as shown intutorial
#should only run on faces rather than also executing for eyes
faces = face_cascade.detectMultiScale(img_gray, 1.1,2)

#should draw a rectangle around all of the faces that were found by the face detector
for (x,y,w,h) in faces:
    cv2.rectangle(img, (x,y), (x+w, y+h), (255,0,0), 2)

# display images
plt.figure(figsize=(15,15))
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title("Input Image"), plt.xticks([]), plt.yticks([])
plt.show()

As can be seen based on the two images, the Viola-Jones method does a better job of finding the correct matches in the data than the one that has been created from scratch. While my detector does not have a lot of false positives, this is in large part because of the threshold which has been set that is somewhat arbitrary. Given a different image set, one would need to use a different threshold for the image in order to ensure that detection was performed correctly while with Viola-Jones this does not need to be done which allows it to be more successful in detecting faces than my algorithm.