# ws 02 Face embedding

download .h5 at https://www.kaggle.com/suicaokhoailang/facenet-keras 

In [None]:
!pip install mtcnn -q

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

!ls '/content/drive/MyDrive/Colab Notebooks/datasets'

In [None]:
import numpy as np

from PIL import Image 
from mtcnn.mtcnn import MTCNN
from tqdm import tqdm


from os import listdir
from tensorflow.keras.models import load_model


In [None]:
rootdir = "/content/drive/MyDrive/Colab Notebooks/datasets/face/"

filename = rootdir + 'train/Kobkiat/kobkiat1.jpg' 

im = Image.open(filename).convert("RGB")
print('img size:', im.size)

scale = 0.4

im = im.resize((round(im.size[0]*scale), round(im.size[1]*scale)))
print('img size:', im.size)
im

In [None]:
detector = MTCNN()

def extract_face(filename, required_size=(160, 160), detector=detector):
    image = Image.open(filename).convert('RGB')
    im_arr = np.array(image)
    faces = detector.detect_faces(im_arr) 

    x1, y1, width, height = faces[0]['box']
    x2, y2 = x1 + width, y1 + height
    face = im_arr[y1:y2, x1:x2]

    image = Image.fromarray(face)
    image = image.resize(required_size)
    face_array = np.asarray(image)
    return face_array

In [None]:
face_pixels = extract_face(filename)
print(face_pixels.shape)
Image.fromarray(face_pixels)

## Embedding

In [None]:
modelpath = '/content/drive/MyDrive/Colab Notebooks/'

facenet_model = load_model(modelpath + 'facenet_keras.h5')

print(facenet_model.inputs)
print(facenet_model.outputs)

In [None]:
print('min max:', face_pixels.min(), face_pixels.max())

In [None]:
face_pixels = face_pixels.astype('float32')  
mean = face_pixels.mean()                   
std = face_pixels.std()               
face_pixels = (face_pixels - mean)/std       
sample = np.expand_dims(face_pixels, axis=0)
emb_face = facenet_model.predict(sample)
emb_face.shape

In [None]:
emb_face[0][:10]

In [None]:
np.min(emb_face), np.max(emb_face) 

## ws03 similarity

In [None]:
def get_embedding(facenet_model, face):
    face_pixels = face.astype('float32') 
    mean = face_pixels.mean()                  
    std = face_pixels.std()                    
    face_pixels = (face_pixels - mean)/std       
    sample = np.expand_dims(face_pixels, axis=0)
    emb_face = facenet_model.predict(sample)
    return emb_face[0]

def euclideanDistance(source_represent, test_represent):
    euclidean_dist = source_represent - test_represent
    euclidean_dist = np.sum(np.multiply(euclidean_dist, euclidean_dist))
    euclidean_dist = np.sqrt(euclidean_dist)
    return euclidean_dist

def dist_face(model, file1, file2):
    im_arr1 = extract_face(file1)
    emd1 = get_embedding(model, im_arr1)

    im_arr2 = extract_face(file2)
    emd2 = get_embedding(model, im_arr2)
    return euclideanDistance(emd1, emd2)

In [None]:
kob1 = rootdir + 'kobkiat7.jpg'
kob2 = rootdir + 'kobkiat9.jpg'

print(dist_face(facenet_model, kob1, kob2))

In [None]:
def cosineSimilarity(source_represent, test_represent):
    a = np.matmul(np.transpose(source_represent), test_represent)
    b = np.sum(np.multiply(source_represent, source_represent))
    c = np.sum(np.multiply(test_represent, test_represent))
    return (a / (np.sqrt(b) * np.sqrt(c)))
 
def cosine_face(model, file1, file2):
    im_arr1 = extract_face(file1)
    emd1 = get_embedding(model, im_arr1)

    im_arr2 = extract_face(file2)
    emd2 = get_embedding(model, im_arr2)
    return cosineSimilarity(emd1, emd2)

print(cosine_face(facenet_model, kob1, kob2))
print(cosine_face(facenet_model, kob1, ben1))

cosine_similar = cosine_face(facenet_model, kob1, kob2)

print("cosine similarity: ", cosine_similar)
threshold = 0.5
if cosine_similar >= threshold:
    print("verified...  same person")
else:
    print("unverified!  Different person!")

# ws 04 recog

download faces at https://www.kaggle.com/dansbecker/5-celebrity-faces-dataset

In [None]:
# upload then unzip 

!unzip -q "/content/drive/MyDrive/Colab Notebooks/datasets/face_celebs_110.zip" -d "/content/drive/MyDrive/Colab Notebooks/datasets/"

In [None]:
!ls "/content/drive/MyDrive/Colab Notebooks/datasets/face_celebs_110/train/"

In [None]:
rootdir = "/content/drive/MyDrive/Colab Notebooks/datasets/face_celebs_110/"

30 mins for train (1s / face)

In [None]:
def load_face(dir):
    faces = list()
    for filename in tqdm(listdir(dir)):
        path = dir + filename
        face = extract_face(path) 
        faces.append(face)
    return faces

def load_dataset(dir):
    X, y = list(), list()
    for subdir in sorted(listdir(dir)):
        path = dir + subdir + '/'
        faces = load_face(path)
        labels = [subdir for i in range(len(faces))]
        print("loaded %d sample(s) for class: %s" % (len(faces),subdir) ) 
        X.extend(faces)
        y.extend(labels)
    return np.asarray(X), np.asarray(y)

# load train dataset
trainX, trainy = load_dataset(rootdir + 'train/')
print('X shape, y shape:', trainX.shape, trainy.shape)
# load test dataset
testX, testy = load_dataset(rootdir + 'val/')
print(testX.shape, testy.shape)

np.savez_compressed('5_cele_faces_dataset.npz', trainX, trainy, testX, testy)

In [None]:
# load the face dataset
data = np.load('5_cele_faces_dataset.npz')

trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3'] 
print('Loaded: ', trainX.shape, trainy.shape, testX.shape, testy.shape)

## Embedding

In [None]:
modelpath = '/content/drive/MyDrive/Colab Notebooks/'

facenet_model = load_model(modelpath + 'facenet_keras.h5')
print('Loaded Model')

In [None]:
model= facenet_model
print(model.layers[0].input_shape[0])
print(model.layers[-1].output_shape[-1])

In [None]:
#  train set
emdTrainX = list()
for face in tqdm(trainX):
    emd = get_embedding(facenet_model, face)
    emdTrainX.append(emd)
    
emdTrainX = np.asarray(emdTrainX)
print('Train Embedding shape', emdTrainX.shape)

# test set 
emdTestX = list()
for face in tqdm(testX):
    emd = get_embedding(facenet_model, face)
    emdTestX.append(emd)
emdTestX = np.asarray(emdTestX)
print('\nTest Embedding shape',emdTestX.shape)

# save arrays 
np.savez_compressed('5_celeb_faces_embeddings.npz', 
                    emdTrainX, trainy, emdTestX, testy)

In [None]:
print(emdTrainX.shape, trainy.shape)
print(emdTestX.shape, testy.shape)

In [None]:
np.unique(trainy)[:5]

In [None]:
len(np.unique(trainy))  # classes

## Train

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC

in_encoder = Normalizer()
emdTrainX_norm = in_encoder.transform(emdTrainX)
emdTestX_norm = in_encoder.transform(emdTestX)

out_encoder = LabelEncoder()
out_encoder.fit(trainy)
trainy_enc = out_encoder.transform(trainy)
testy_enc = out_encoder.transform(testy)

# fit model
model = SVC(kernel='linear', probability=True)
model.fit(emdTrainX_norm, trainy_enc)

# predict
yhat_train = model.predict(emdTrainX_norm)
yhat_test = model.predict(emdTestX_norm)

# score
score_train = accuracy_score(trainy_enc, yhat_train)
score_test = accuracy_score(testy_enc, yhat_test)

print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
from random import choice
import matplotlib.pyplot as plt

selection = choice([i for i in range(testX.shape[0])])
random_face = testX[selection]
random_face_emd = emdTestX_norm[selection]
random_face_class = testy_enc[selection]
random_face_name = out_encoder.inverse_transform([random_face_class])

# prediction
samples = np.expand_dims(random_face_emd, axis=0)
yhat_class = model.predict(samples)
yhat_prob = model.predict_proba(samples)

#  name
class_index = yhat_class[0]
class_probability = yhat_prob[0,class_index] * 100
predict_names = out_encoder.inverse_transform(yhat_class)

all_names = out_encoder.inverse_transform(np.unique(trainy_enc))  # 

print('Predicted: \n%s \n%s' % (all_names, yhat_prob[0]*100))
print('Expected: %s' % random_face_name[0])

# plot face
plt.imshow(random_face)

title = f'{predict_names[0]} {class_probability:.2f}'
plt.title(title)
plt.show()

In [None]:
filename = rootdir + 'val/name/kobkiat.jpg'

im = Image.open(filename).convert("RGB")
print('img size:', im.size)
im

In [None]:
im_arr = extract_face(filename)
print(im_arr.shape)
Image.fromarray(im_arr)

In [None]:
print(emdTestX.shape, emdTestX_norm.shape)
print(emd.shape)

In [None]:
emd = get_embedding(facenet_model, im_arr)
emd = np.expand_dims(emd, axis=0)  # 
emd_norm = in_encoder.transform(emd)

yhat_class = model.predict(emd_norm)
yhat_prob = model.predict_proba(emd_norm)

class_index = yhat_class[0]
class_probability = yhat_prob[0,class_index] * 100
predict_names = out_encoder.inverse_transform(yhat_class)
print(f'predicted: {predict_names[0]} {class_probability:.2f}%') # 

In [None]:
print('Predicted: \n%s \n%s' % (all_names, (yhat_prob[0]*100).round(2)))

In [None]:
emd = get_embedding(facenet_model, im_arr)
emd = np.expand_dims(emd, axis=0)  #  from (128,) -> (1,128)
emd_norm = in_encoder.transform(emd)

yhat_class = model.predict(emd_norm)
yhat_prob = model.predict_proba(emd_norm)

class_index = yhat_class[0]
class_probability = yhat_prob[0,class_index] * 100
predict_names = out_encoder.inverse_transform(yhat_class)
print(f'predicted: {predict_names[0]} {class_probability:.2f}%') # 

## TNSE

In [None]:
from sklearn.preprocessing import LabelEncoder

out_encoder = LabelEncoder()
out_encoder.fit(trainy)
trainy_enc = out_encoder.transform(trainy)

trainy_enc.shape, np.unique(trainy_enc)

In [None]:
from sklearn.preprocessing import Normalizer

in_encoder = Normalizer()
emdTrainX_norm = in_encoder.transform(emdTrainX)
emdTestX_norm = in_encoder.transform(emdTestX)

In [None]:
from sklearn.manifold import TSNE

import seaborn as sns
import pandas as pd

n_classes = len(np.unique(trainy_enc))

plt.figure(figsize=(6.5,4.5))

y_train = trainy_enc

x_mnist = emdTrainX_norm  

print('X shape', x_mnist.shape)

tsne = TSNE(n_components=2, verbose=1, random_state=123)
z = tsne.fit_transform(x_mnist)
 
df = pd.DataFrame()
df["y"] = y_train
df['y_cls'] = trainy
df["comp-1"] = z[:,0]
df["comp-2"] = z[:,1]


sns.scatterplot(x="comp-1", y="comp-2", s=30, hue=df.y_cls.tolist(), #7
                palette=sns.color_palette("hls", n_classes),
                # legend=False,
                data=df).set(title="t-SNE projection")

# Put the legend out of the figure
plt.legend(bbox_to_anchor=(1.02, 1), loc=2, borderaxespad=0.)
