In [None]:
import os
import cv2
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns
import insightface

In [None]:
image_train_path = '/DATA/datasets/celebrity_identification/train/'
image_test_path = '/DATA/datasets/celebrity_identification/test/'
image_dir_path = '/DATA/datasets/celebrity_identification/train/'

names = [ name for name in os.listdir(image_dir_path) if os.path.isdir(os.path.join(image_train_path, name)) ]
print(names)

In [None]:
model_dir_path = insightface.utils.download("models", "buffalo_l")
detector = insightface.model_zoo.RetinaFace(model_file=model_dir_path+"/det_10g.onnx")
detector.prepare(-1)

In [None]:
embedding_extractor = insightface.model_zoo.ArcFaceONNX(model_file=model_dir_path+"/w600k_r50.onnx")
embedding_extractor.prepare(-1)

In [None]:
# faces
faces_emb = []
label = []
for idx, name in enumerate(names):
    
    name_dir = image_dir_path + name
    image_filepaths = [os.path.join(name_dir, f)
                       for f in os.listdir(name_dir)]
#     print(image_filepaths)
    
    for image_file_path in image_filepaths:
        print(image_file_path)
        image = cv2.imread(image_file_path)
        bboxes, kpss = detector.detect(image, (640, 640))
        # Take only pictures with one face detected
        if bboxes.shape[0] == 1:
            bbox = bboxes[0, 0:4]
            det_score = bboxes[0, 4]
            kps = kpss[0]
            face = insightface.app.common.Face(bbox=bbox, kps=kps, det_score=det_score)
            embedding = embedding_extractor.get(image, face)
            faces_emb.append(embedding)
            label.append(name)

In [None]:
len(faces_emb)

In [None]:
len(label)

In [None]:
# Convert to Numpy array
faces_emb_train_np = np.array(faces_emb)
# define the labels
label_emb_train_np = np.array(label)

In [None]:
faces_emb_train_np.shape

In [None]:
# instantiate the tsne 
# https://towardsdatascience.com/why-you-are-using-t-sne-wrong-502412aab0c0
tsne = TSNE(n_iter=1500, n_components=2, perplexity=10)

In [None]:
embs_tsne = tsne.fit_transform(faces_emb_train_np)

## Visualisation of the embeddings space

In [None]:
import mpld3
mpld3.enable_notebook()

x = embs_tsne[:, 0]
y = embs_tsne[:, 1]
fig, ax = plt.subplots(figsize=(10,10))
sns.scatterplot(x=x, y=y, hue=label_emb_train_np)
plt.show()