In [109]:
!pip install keras-tuner --upgrade

from time import time
import logging
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
import keras_tuner 
import seaborn as sbn

from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras import layers
from keras.layers import Conv2D, Flatten, Dense, Dropout 
from keras.models import Sequential
from kerastuner import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print(__doc__)

In [110]:
# # #############################################################################
# Download the data, if not already on disk and load it as numpy arrays

lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

# introspect the images arrays to find the shapes (for plotting)
n_samples, h, w = lfw_people.images.shape

# for machine learning we use the 2 data directly (as relative pixel
# positions info is ignored by this model)
X = lfw_people.data
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]

print("Total dataset size:")
print("Sample size: %d" % n_samples, h, w)
print("Features: %d" % n_features)
print("Total Labels: %d" % n_classes)
print("")
print("Target Name and no of sample images:")
for i in range(len(lfw_people.target_names)):   
    print("{} has {} samples".format(lfw_people.target_names[i], (y == i).sum()))

# #############################################################################
# Plotting the images of the persons from the dataset
fig, ax = plt.subplots(4, 4)
plt.subplots_adjust(wspace=0.8, hspace=0.5)

for i, axi in enumerate(ax.flat):
    axi.imshow(lfw_people.images[i])
    axi.set(xticks=[], yticks=[], xlabel=lfw_people.target_names[lfw_people.target[i]])
plt.show()

In [111]:
# Checking for missing values
df=pd.DataFrame(lfw_people.data)
df.isnull().sum()

In [112]:
# Checking data type
print(df.dtypes)

In [113]:
#############################################################################
# Split data into a training, validation and testing set

X_cv, X_test, y_cv, y_test = train_test_split(X,y,test_size=0.10,train_size=0.90, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_cv,y_cv,test_size = 0.10,train_size =0.90, random_state=15)



In [114]:
# #############################################################################
# Scaling and reshaping the data

X_train /= 255
X_val /= 255
X_test /= 255

X_train=X_train.reshape(X_train.shape[0], h, w, 1)
X_val=X_val.reshape(X_val.shape[0], h, w, 1)
X_test=X_test.reshape(X_test.shape[0], h, w, 1)

In [115]:
# Builind function for hyperparameter tuning

def build_model(hp):  
  cnnmodel = keras.Sequential([keras.layers.Conv2D(filters=hp.Int('conv_1_filter', min_value=32, max_value=128, step=16),
                                                   kernel_size=hp.Choice('conv_1_kernel', values = [3,5]), strides=(1, 1),
                                                   activation='relu',input_shape=(h,w,1)
                                                   ),
                               keras.layers.Conv2D(filters=hp.Int('conv_2_filter', min_value=32, max_value=64, step=16),
                                                   kernel_size=hp.Choice('conv_2_kernel', values = [3,5]),strides=(1, 1),
                                                   activation='relu'
                                                   ),
                               keras.layers.Flatten(),
                               keras.layers.Dense(units=hp.Int('dense_1_units', min_value=32, max_value=128, step=16),
                                                  activation='relu'
                                                  ),
#                                keras.layers.Dropout(0.5),
                               keras.layers.Dense(n_classes, activation='softmax')
                              ])

  
  cnnmodel.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3])),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
  
  return cnnmodel

In [116]:
# Using randomserach to identify best fitting hyperparameter
tuner = keras_tuner.RandomSearch(build_model,objective='val_accuracy',
                                 max_trials=5, directory='output', project_name="CourseWork_FaceRec")

In [117]:
# Searching to get best model
tuner.search(X_train,y_train,epochs=3,validation_data=(X_val, y_val))
bestmodel=tuner.get_best_models(num_models=1)[0]
bestmodel.summary()

In [118]:
bestmodel.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), initial_epoch=3)

In [119]:
# Validating the model with validation dataset
test_loss, test_acc = bestmodel.evaluate(X_val, y_val, verbose=2)
print('\n Test accuracy:', test_acc)

In [120]:
# Predicting the model with test data
cnn_y_pred=bestmodel.predict(X_test)
cnn_y_pred=np.argmax(cnn_y_pred, axis=1)
cnn_y_pred

## Confusion Matrix 
cnn_con_matrix = tf.math.confusion_matrix(y_test,cnn_y_pred)

%matplotlib inline
plt.figure(figsize=(10,10))
plt.title('Confusion Matrix for CNN')
sbn.heatmap(cnn_con_matrix, cmap="OrRd", annot=True,
            cbar_kws={"label":"Color Bar"}, fmt='d',
            xticklabels=target_names, yticklabels=target_names)
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.show()

print(classification_report(y_test, cnn_y_pred, target_names=target_names))

# #############################################################################
# Qualitative evaluation of the predictions using matplotlib

def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
    """Helper function to plot a gallery of portraits"""
    plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title(titles[i], size=12)
        plt.xticks(())
        plt.yticks(())


# plot the result of the prediction on a portion of the test set

def title(cnn_y_pred, y_test, target_names, i):
    pred_name = target_names[cnn_y_pred[i]].rsplit(' ', 1)[-1]
    true_name = target_names[y_test[i]].rsplit(' ', 1)[-1]
    return 'predicted: %s\ntrue:      %s' % (pred_name, true_name)

prediction_titles = [title(cnn_y_pred, y_test, target_names, i)
                     for i in range(cnn_y_pred.shape[0])]

plot_gallery(X_test, prediction_titles, h, w)

