In [None]:
# ignore warnings
import warnings
import logging
logging.captureWarnings(True)
warnings.filterwarnings('ignore')
warnings.warn("This is a DeprecationWarning",category=DeprecationWarning)

In [None]:
warnings.filterwarnings('ignore', message='out of')
warnings.filterwarnings('ignore', message='triggered')
warnings.filterwarnings('ignore', message='Tracing')

In [None]:
pip install mtcnn

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Data Preprocessing Using Facenet

In [None]:
import logging
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # FATAL
logging.getLogger('tensorflow').setLevel(logging.FATAL)
# logging.getLogger("tensorflow").setLevel(logging.ERROR)

In [None]:
import tensorflow as tf
# tf.logging.set_verbosity(tf.logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [None]:
from os import listdir
from os.path import isdir
from PIL import Image
from matplotlib import pyplot
from numpy import load
from numpy import savez_compressed
from numpy import asarray
from mtcnn.mtcnn import MTCNN


# extract a single face from a given photograph
def extract_face(filename, required_size=(160, 160)):
  image = Image.open(filename)
  image = image.convert('RGB')
  # print(filename)
  pixels = asarray(image)
  detector = MTCNN()
  results = detector.detect_faces(pixels)
  # print(results)
  x1, y1, width, height = results[0]['box']
  x1, y1 = abs(x1), abs(y1)
  x2, y2 = x1 + width, y1 + height
  face = pixels[y1:y2, x1:x2]
  image = Image.fromarray(face)
  image = image.resize(required_size)
  face_array = asarray(image)
  return face_array
 
# load images and extract faces for all images in a directory
def load_faces(directory, required_size=(160, 160)):
  count = 0
  faces = list()
  for filename in listdir(directory):
    #if count == 5:
    #  break
    path = directory + filename
    try:
      face = extract_face(path, required_size)
    except:
      continue
    faces.append(face)
    count += 1
  return faces
 
# load a dataset 
def load_dataset(directory, required_size=(160, 160)):
  X, y = list(), list()
  for subdir in listdir(directory):
    path = directory + subdir + '/'
    if not isdir(path):
      continue
    faces = load_faces(path, required_size)
    labels = [subdir for _ in range(len(faces))]	
    print('>loaded %d examples for class: %s' % (len(faces), subdir))
    X.extend(faces)
    y.extend(labels)
  return asarray(X), asarray(y)

In [None]:
from numpy import concatenate
#trainX, trainy = load_dataset('/content/drive/Shared drives/504 proj/OUR_DATASET/lfw_mixed_dataset/train_with_mask_masked_cloth_high/')
#print(trainX.shape, trainy.shape)
#train_data = load('/content/drive/Shared drives/504 proj/lfw_pure_data_train.npz')
#trainX = concatenate((trainX, train_data['arr_0']))
#trainy = concatenate((trainy, train_data['arr_1']))
#savez_compressed('/content/drive/Shared drives/504 proj/lfw_mixed_data_train_cloth_high.npz', trainX, trainy)

In [None]:
#testX, testy = load_dataset('/content/drive/Shared drives/504 proj/OUR_DATASET/lfw_mixed_dataset/test_with_mask_masked_cloth_high/')
#print(testX.shape, testy.shape)
#savez_compressed('/content/drive/Shared drives/504 proj/lfw_mixed_data_test_cloth_high.npz', testX, testy)

In [None]:
from numpy import load
from numpy import expand_dims
from numpy import asarray
from numpy import savez_compressed
from keras.models import load_model
 
# get the face embedding for one face
def get_embedding(model, face_pixels):
	face_pixels = face_pixels.astype('float32')
	mean, std = face_pixels.mean(), face_pixels.std()
	face_pixels = (face_pixels - mean) / std
	samples = expand_dims(face_pixels, axis=0)
	yhat = model.predict(samples)
	return yhat[0]
 
# load the face dataset
#data = load('/content/drive/Shared drives/504 proj/pure_dataset_data.npz')
train_data = load('/content/drive/Shared drives/504 proj/lfw_mixed_data_train_cloth_high.npz')
test_data = load('/content/drive/Shared drives/504 proj/lfw_mixed_data_test_cloth_high.npz')
trainX, trainy = train_data['arr_0'], train_data['arr_1']
testX, testy = test_data['arr_0'], test_data['arr_1']
#trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
print('Loaded: ', trainX.shape, trainy.shape, testX.shape, testy.shape)

# load the facenet model
model = load_model('/content/drive/Shared drives/504 proj/facenet_keras.h5')
print('Loaded Model')

# convert each face in the train set to an embedding
newTrainX = list()
for face_pixels in trainX:
	embedding = get_embedding(model, face_pixels)
	newTrainX.append(embedding)
newTrainX = asarray(newTrainX)
print(newTrainX.shape)

# convert each face in the test set to an embedding
newTestX = list()
for face_pixels in testX:
	embedding = get_embedding(model, face_pixels)
	newTestX.append(embedding)
newTestX = asarray(newTestX)
print(newTestX.shape)

In [None]:
# model.summary()

In [None]:
#savez_compressed('/content/drive/Shared drives/504 proj/faces-embeddings_lfw_mixed_dataset_cloth_high.npz', newTrainX, trainy, newTestX, testy)

In [None]:
from numpy import load
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
# load dataset
data = load('/content/drive/Shared drives/504 proj/faces-embeddings_lfw_mixed_dataset_cloth_high.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
#import numpy
#with numpy.printoptions(threshold=numpy.inf):
#    print(trainy)

# normalize input vectors
in_encoder = Normalizer(norm='l2')
trainX = in_encoder.transform(trainX)
testX = in_encoder.transform(testX)
# label encode targets
out_encoder = LabelEncoder()
out_encoder.fit(trainy)
trainy = out_encoder.transform(trainy)
testy = out_encoder.transform(testy)


In [None]:
# SVM linear
model = SVC(kernel='linear', probability=True)
model.fit(trainX, trainy)
# predict
yhat_train = model.predict(trainX)
yhat_test = model.predict(testX)
# score
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
# SVM kernel
model = SVC(kernel='poly', probability=True, degree=2)
model.fit(trainX, trainy)
yhat_train = model.predict(trainX)
yhat_test = model.predict(testX)
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
# Neural network
from sklearn.neural_network import MLPClassifier
NN = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=256, random_state=2).fit(trainX, trainy)
yhat_train = NN.predict(trainX)
yhat_test = NN.predict(testX)
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
# KNN
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=4,weights='distance').fit(trainX, trainy)
yhat_train = neigh.predict(trainX)
yhat_test = neigh.predict(testX)
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=256,criterion='entropy',random_state=3).fit(trainX, trainy)
yhat_train = rf.predict(trainX)
yhat_test = rf.predict(testX)
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

# Test

In [None]:

from random import choice
from numpy import load
from numpy import expand_dims
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
from matplotlib import pyplot
# load faces
#data = load('/content/drive/Shared drives/504 proj/mixed_dataset_data.npz')
train_data = load('/content/drive/Shared drives/504 proj/lfw_mixed_data_train.npz')
test_data = load('/content/drive/Shared drives/504 proj/lfw_mixed_data_test.npz')
testX_faces = test_data['arr_0']
# load face embeddings
data = load('/content/drive/Shared drives/504 proj/faces-embeddings_lfw_mixed_dataset_cloth_high.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
# normalize input vectors
in_encoder = Normalizer(norm='l2')
trainX = in_encoder.transform(trainX)
testX = in_encoder.transform(testX)
# label encode targets
out_encoder = LabelEncoder()
out_encoder.fit(trainy)
trainy = out_encoder.transform(trainy)
testy = out_encoder.transform(testy)
# fit model
model = SVC(kernel='linear', probability=True)
model.fit(trainX, trainy)

In [None]:
# test model on a random example from the test dataset
selection = choice([i for i in range(testX.shape[0])])
random_face_pixels = testX_faces[selection]
random_face_emb = testX[selection]
random_face_class = testy[selection]
random_face_name = out_encoder.inverse_transform([random_face_class])
# prediction for the face
samples = expand_dims(random_face_emb, axis=0)
yhat_class = model.predict(samples)

# get name
predict_names = out_encoder.inverse_transform(yhat_class)
print('Predicted: %s' % (predict_names[0]))
print('Expected: %s' % random_face_name[0])
# plot
pyplot.imshow(random_face_pixels)
title = 'Predicted: %s\n' % (predict_names[0]) + 'Expected: %s' % (random_face_name[0])
pyplot.title(title)
pyplot.show()

## VGG-face


In [None]:
# VGG-Face
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Convolution2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Activation
from keras.models import model_from_json
from keras.models import Model

model = keras.Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(224, 224, 3)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
 
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
 
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
 
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
 
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
 
model.add(Convolution2D(4096, (7, 7), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(4096, (1, 1), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(2622, (1, 1)))
model.add(Flatten())
model.add(Activation('softmax'))

model.load_weights('/content/drive/Shared drives/504 proj/vgg_face_weights.h5')
#model.summary()

vgg_face_descriptor = Model(inputs=model.layers[0].input,outputs=model.layers[-2].output)

In [None]:
#print('Loaded Data')
#trainX, trainy = load_dataset('/content/drive/Shared drives/504 proj/data/train/', (224,224))
#print(trainX.shape, trainy.shape)
#testX, testy = load_dataset('/content/drive/Shared drives/504 proj/data/val/', (224,224))
#print(testX.shape, testy.shape)

#savez_compressed('/content/drive/Shared drives/504 proj/data-VGGface.npz', trainX, trainy, testX, testy)

In [None]:
data = load('/content/drive/Shared drives/504 proj/data-VGGface.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
print('Loaded: ', trainX.shape, trainy.shape, testX.shape, testy.shape)

# get the face embedding for one face
def get_embedding_VGG(model, face_pixels):
	face_pixels = face_pixels.astype('float32')
	mean, std = face_pixels.mean(), face_pixels.std()
	face_pixels = (face_pixels - mean) / std
	samples = expand_dims(face_pixels, axis=0)
	yhat = vgg_face_descriptor.predict(samples)
	return yhat[0]

newTrainX = list()
for face_pixels in trainX:
	embedding = get_embedding_VGG(model, face_pixels)
	newTrainX.append(embedding)
newTrainX = asarray(newTrainX)
print(newTrainX.shape)

# convert each face in the test set to an embedding
newTestX = list()
for face_pixels in testX:
	embedding = get_embedding_VGG(model, face_pixels)
	newTestX.append(embedding)
newTestX = asarray(newTestX)
print(newTestX.shape)

savez_compressed('/content/drive/Shared drives/504 proj/faces-embeddings-VGGface.npz', newTrainX, trainy, newTestX, testy)

In [None]:
from numpy import load
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
# load dataset
data = load('/content/drive/Shared drives/504 proj/faces-embeddings-VGGface.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
# normalize input vectors
in_encoder = Normalizer(norm='l2')
trainX = in_encoder.transform(trainX)
testX = in_encoder.transform(testX)
# label encode targets
out_encoder = LabelEncoder()
out_encoder.fit(trainy)
trainy = out_encoder.transform(trainy)
testy = out_encoder.transform(testy)

In [None]:
# SVM linear
model = SVC(kernel='linear', probability=True)
model.fit(trainX, trainy)
# predict
yhat_train = model.predict(trainX)
yhat_test = model.predict(testX)
# score
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
# SVM kernel
model = SVC(kernel='poly', probability=True, degree=3)
model.fit(trainX, trainy)
yhat_train = model.predict(trainX)
yhat_test = model.predict(testX)
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
# Neural network
from sklearn.neural_network import MLPClassifier
NN = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=2262, random_state=4).fit(trainX, trainy)
yhat_train = NN.predict(trainX)
yhat_test = NN.predict(testX)
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
# KNN
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3,weights='distance').fit(trainX, trainy)
yhat_train = NN.predict(trainX)
yhat_test = NN.predict(testX)
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=128,criterion='entropy',random_state=2).fit(trainX, trainy)
yhat_train = rf.predict(trainX)
yhat_test = rf.predict(testX)
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))