In [1]:
import numpy as np
import cv2 as cv
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
img = cv.imread('./testImages/lionel_messi.jpg')
img.shape

Shape has 3 dimensions: x, y, rgb 

In [3]:
plt.imshow(img)

In [4]:
grayImg = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
grayImg.shape

In [5]:
grayImg

In [6]:
plt.imshow(grayImg, cmap = 'gray')

## Face detection using Haar Cascades

<a href='https://docs.opencv.org/3.4/d2/d99/tutorial_js_face_detection.html' > openCV documentation on object detection using Haar feature-based cascade classifiers </a>

In [7]:
face_cascade = cv.CascadeClassifier('./opencv/haarcascades/haarcascade_frontalface_default.xml')
eye_cascade = cv.CascadeClassifier('./opencv/haarcascades/haarcascade_eye.xml')

faces = face_cascade.detectMultiScale(grayImg, 1.3, 5)
faces

In [8]:
( x, y, w, h) = faces[0]
x, y, w, h

### Detecting face

In [9]:
face_img = cv.rectangle(img, (x,y), (x+w, y+h), (255,0,0), 4)
plt.imshow(face_img)

### Plotting eyes next

In [10]:
cv.destroyAllWindows()
for (x,y,w,h) in faces:
    face_img = cv.rectangle(img,(x,y),(x+w,y+h),(255,0,0),4)
    roi_gray = grayImg[y:y+h, x:x+w]
    roi_color = face_img[y:y+h, x:x+w]
    eyes = eye_cascade.detectMultiScale(roi_gray)
    for (ex,ey,ew,eh) in eyes:
        cv.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),4)
        

plt.figure()
plt.imshow(face_img, cmap='gray')
plt.show()

### Cropping the facial region of the image

In [11]:
%matplotlib inline
plt.imshow(roi_color, cmap='gray')

## Creating a function to carry out all the above steps on any image

In [12]:
def getCroppedImageIf2Eyes(image_path):
    img = cv.imread(image_path)
    grayImg = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(grayImg, 1.3, 5)    
    for(x, y, w, h) in faces:
            roi_gray = grayImg[y:y+h, x:x+w]
            roi_color = img[y:y+h, x:x+w]
            eyes = eye_cascade.detectMultiScale(roi_gray)        
            if len(eyes) >=2:
                return roi_color 

In [13]:
originalImg = cv.imread('./testImages/lionel_messi.jpg')
plt.imshow(originalImg)

In [14]:
croppedImage = getCroppedImageIf2Eyes('./testImages/lionel_messi.jpg')
plt.imshow(croppedImage)

### Testing with image where eyes cannot be detected

In [15]:
originalImg = cv.imread('./testImages/lionel_messi_3.jpg')
plt.imshow(originalImg)

In [16]:
croppedImage2 = getCroppedImageIf2Eyes('./testImages/lionel_messi_3.jpg')
croppedImage2

## Cropping every image from all folders

In [17]:
path_to_data = "./dataset/"
path_to_person_data = "./dataset/cropped/"

In [18]:
import os
img_dirs = []
for entry in os.scandir(path_to_data):
    if entry.is_dir():
        img_dirs.append(entry.path)

In [19]:
img_dirs

In [20]:
import shutil
if os.path.exists(path_to_person_data):
    shutil.rmtree(path_to_person_data)
os.mkdir(path_to_person_data)

In [21]:
cropped_image_dirs = []
person_file_names_dict = {}


for img_dir in img_dirs:
    count = 1
    person_name = img_dir.split('/')[-1]
    person_file_names_dict[person_name] = []    
    for entry in os.scandir(img_dir):
        roi_color = getCroppedImageIf2Eyes(entry.path)
        if roi_color is not None:
            cropped_folder = path_to_person_data + person_name
            if not os.path.exists(cropped_folder):
                os.makedirs(cropped_folder)
                cropped_image_dirs.append(cropped_folder)
                print("Generating cropped images in folder: ", cropped_folder)
            cropped_file_name = person_name + str(count) + ".png"
            cropped_file_path = cropped_folder + "/" + cropped_file_name           
            cv.imwrite(cropped_file_path, roi_color)
            person_file_names_dict[person_name].append(cropped_file_path)
            count += 1

### Dictionary of image path

In [22]:
person_file_names_dict = {}
for img_dir in cropped_image_dirs:
    person_name = img_dir.split('/')[-1]
    file_list = []
    for entry in os.scandir(img_dir):
        file_list.append(entry.path)
    person_file_names_dict[person_name] = file_list
person_file_names_dict

### Using wavelet transform as a feature for traning our model.

In [23]:
import numpy as np
import pywt

def waveletTransform(img, mode='haar', level=1):
    imArray = img
    #Datatype conversions
    #convert to grayscale
    imArray = cv.cvtColor( imArray,cv.COLOR_RGB2GRAY )
    #convert to float
    imArray =  np.float32(imArray)   
    imArray /= 255;
    # compute coefficients 
    coeffs=pywt.wavedec2(imArray, mode, level=level)

    #Process Coefficients
    coeffs_H=list(coeffs)  
    coeffs_H[0] *= 0;  

    # reconstruction
    imArray_H=pywt.waverec2(coeffs_H, mode);
    imArray_H *= 255;
    imArray_H =  np.uint8(imArray_H)

    return imArray_H

In [24]:
im_har = waveletTransform(croppedImage,'db1',5)
plt.imshow(im_har, cmap='gray')

In [25]:
class_dict = {}
count = 0
for person_name in person_file_names_dict.keys():
    class_dict[person_name] = count
    count = count + 1
class_dict

In [26]:
X, y = [], []
for person_name, training_files in person_file_names_dict.items():
    for training_image in training_files:
        img = cv.imread(training_image)
        if img is None:
            continue
        scalled_raw_img = cv.resize(img, (32, 32))
        img_har = waveletTransform(img,'db1',5)
        scalled_img_har = cv.resize(img_har, (32, 32))
        combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1)))
        X.append(combined_img)
        y.append(class_dict[person_name])

In [27]:
len(X) #Each element in X is an Image

In [28]:
len(X[0]) #Size of image

In [29]:
X[0]

In [30]:
X = np.array(X).reshape(len(X),4096).astype(float) #Converting to float
X.shape

In [31]:
X[0]

# Model Training

### Using SVM to train our model

In [32]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC(kernel = 'rbf', C = 10))])
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)

### Classification report on SVM
<a href='https://en.wikipedia.org/wiki/F-score' > f1 score </a>

In [34]:
print(classification_report(y_test, pipe.predict(X_test)))

Using GridSearch to try out different models with different parameters.

In [35]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV

In [36]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto',probability=True),
        'params' : {
            'svc__C': [1,10,100,1000],
            'svc__kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'randomforestclassifier__n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'logisticregression__C': [1,5,10]
        }
    }
}

In [37]:
scores = []
best_estimators = {}
import pandas as pd
for algo, mp in model_params.items():
    pipe = make_pipeline(StandardScaler(), mp['model'])
    clf =  GridSearchCV(pipe, mp['params'], cv=5, return_train_score=False)
    clf.fit(X_train, y_train)
    scores.append({
        'model': algo,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    best_estimators[algo] = clf.best_estimator_
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df #df gives validation dataset results

In [38]:
best_estimators

### These are the results for test data

In [39]:
best_estimators['svm'].score(X_test,y_test)

In [40]:
best_estimators['random_forest'].score(X_test,y_test)

In [41]:
best_estimators['logistic_regression'].score(X_test,y_test)

In [42]:
best_classf = best_estimators['svm']

In [43]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, best_classf.predict(X_test))
cm

In [44]:
import seaborn as sn
plt.figure(figsize = (10,7))
sn.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')

In [45]:
class_dict

In [46]:
import joblib 
# Save the model as a pickle in a file 
joblib.dump(best_classf, 'saved_model.pkl') 

In [47]:
import json
with open("class_dictionary.json","w") as f:
    f.write(json.dumps(class_dict))