In [1]:
from mtcnn import MTCNN
import numpy as np
from insightface.app import FaceAnalysis
import dlib
import hdbscan
import cv2
import os 
from collections import defaultdict
from sklearn.metrics.pairwise import cosine_similarity
from imutils import face_utils

In [20]:
class FaceClassifier:
    def __init__(self, path: str):
        self.base_dir = path
        self.detect = MTCNN()
        self.app = FaceAnalysis('buffalo_l')
        self.app.prepare(ctx_id=0, det_size=(256, 256))
        self.predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

    def detect_face(self, img_path: str):
        embedding_faces = []
        aligned_faces = []
        for obj in os.listdir(img_path):
            img = cv2.imread(os.path.join(img_path, obj))
            img, s_h, s_w = self.resize(img)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            faces = self.detect.detect_faces(img)
            face_val = self.landmarks(img, faces, s_h, s_w)
            if face_val:
                aligned_faces.extend(face_val)
                embedding_faces.extend(self.embedding(face_val))
        return aligned_faces, embedding_faces

    def resize(self, img):
        h, w = img.shape[:2]
        scale = 1024 / max(h, w)
        new_w, new_h = int(w * scale), int(h * scale)
        img = cv2.resize(img, (new_w, new_h))
        return img, scale, scale

    def landmarks(self, img, faces, s_h, s_w):
        face_list = []
        for face in faces:
            x, y, w, h = face['box']
            orig_w, orig_h = int(w * (1 / s_w)), int(h * (1 / s_h))
            x, y = max(0, x - 10), max(0, y - 10)
            w, h = min(x + w + 20, img.shape[1]) - x, min(y + h + 20, img.shape[0]) - y
            crop_img = img[y:y + h, x:x + w]
            resized_img = cv2.resize(crop_img, (orig_w, orig_h))

            shape = self.predictor(resized_img, dlib.rectangle(0, 0, resized_img.shape[1], resized_img.shape[0]))
            landmark = face_utils.shape_to_np(shape)
            aligned_img = self.alignment(resized_img, landmark)

            if aligned_img is not None:
                face_list.append(aligned_img)
        return face_list

    def alignment(self, img, landmark):
        left_eye = landmark[36:42].mean(axis=0)
        right_eye = landmark[42:48].mean(axis=0)
        dx, dy = right_eye[0] - left_eye[0], right_eye[1] - left_eye[1]
        angle = np.degrees(np.arctan2(dy, dx))
        centre = ((left_eye[0] + right_eye[0]) / 2, (left_eye[1] + right_eye[1]) / 2)
        dist = np.sqrt((dx ** 2) + (dy ** 2))
        actual_dist = 0.3 * img.shape[1]
        scale = actual_dist / dist

        M = cv2.getRotationMatrix2D(centre, angle, scale)
        M[0, 2] += 0.5 * img.shape[1] - centre[0]
        M[1, 2] += 0.35 * img.shape[0] - centre[1]

        img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]), flags=cv2.INTER_CUBIC)
        return self.check_alignment(img)

    def check_alignment(self, img):
        faces = self.detect.detect_faces(img)
        if not faces:
            return None
        x, y, w, h = faces[0]['box']
        cropped_img = img[y:y + h, x:x + w]

        shape = self.predictor(cropped_img, dlib.rectangle(0, 0, cropped_img.shape[1], cropped_img.shape[0]))
        landmark = face_utils.shape_to_np(shape)

        left_eye = landmark[36:42].mean(axis=0)
        right_eye = landmark[42:48].mean(axis=0)
        eye_alignment_error = abs(left_eye[1] - right_eye[1])

        return cropped_img if eye_alignment_error < h * 0.04 else None

    def embedding(self, face_list):
        embeddings = []
        for face in face_list:
            faces = self.app.get(face)
            if faces:
                embeddings.append(faces[0].embedding)
            else:
                print("Error: Face Not Detected By InsightFace")
        return embeddings

    def clustering(self, embedding,faces):
        if not embedding:
            return {}
        scan = hdbscan.HDBSCAN(min_cluster_size=3, cluster_selection_epsilon=0.3)
        embedding = np.array(embedding)
        labels = scan.fit_predict(embedding)
        clusters = defaultdict(list)
        for i, label in enumerate(labels):
            if label != -1:
                clusters[label].append((embedding[i],faces[i]))
        return clusters

    def find_best_cluster(self, embedding_vector, clusters):
        embedding_vector = np.array(embedding_vector).reshape(1, -1)
        max_similarities = []

        for cluster_id, embeddings_faces in clusters.items():
            embeddings = np.array([e for e, _ in embeddings_faces]) 
            if embeddings.size == 0:
                continue
            centroid = np.mean(embeddings, axis=0).reshape(1, -1)
            similarity = cosine_similarity(embedding_vector, centroid)[0][0]
            max_similarities.append((cluster_id, similarity))

        max_similarities.sort(key=lambda x: x[1], reverse=True)
        return max_similarities[:3]

    def find_best_match(self, embedding_vector, clusters):
        top_clusters = self.find_best_cluster(embedding_vector, clusters)
        best_similarity, best_cluster = -1, None

        for cluster_id, _ in top_clusters:
            embedding=clusters[cluster_id]
            for embedding_face in embedding :
                emb=embedding_face[0]
                sim = cosine_similarity(embedding_vector.reshape(1, -1), emb.reshape(1, -1))[0][0]
                if sim > best_similarity:
                    best_similarity = sim
                    best_cluster = cluster_id
                if best_similarity > 0.8:
                    return best_similarity, best_cluster

        return best_similarity, best_cluster

def main():
    base_dir = "D:/TensorFlow 2.0/Main Project"
    system = FaceClassifier(base_dir)
    
    train_path = os.path.join(base_dir, "test")
    train_faces, train_embeddings = [], []
    
    for obj in os.listdir(train_path):
        train_face, train_embedding = system.detect_face(os.path.join(train_path, obj))
        train_faces.extend(train_face)
        train_embeddings.extend(train_embedding)
    
    clusters = system.clustering(train_embeddings,train_faces)
    test_path = os.path.join(base_dir, "New folder")
    test_faces, test_embeddings = system.detect_face(test_path)

    for embedding in test_embeddings:
        similarity, cluster_id = system.find_best_match(embedding, clusters)
        if cluster_id is not None:
            print_img(cluster_id, clusters)

def print_img(cluster_id, clusters):
    if cluster_id not in clusters:
        print("No matching cluster found")
        return
    for i, (_,img) in enumerate(clusters[cluster_id]):
        if img is None or not isinstance(img, np.ndarray):
            print("Error: Invalid image format")
            continue
        cv2.imshow("Image", cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
        cv2.waitKey(0)
        if i == 10:
            break

if __name__ == '__main__':
    main()

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\Vyom/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\Vyom/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\Vyom/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\Vyom/.insightface\models\buffalo_l\genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\Vyom/.insightface\models\buffalo_l\w600k_r50.onnx recognition ['None', 3, 112, 112] 127.5 127

  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


Error: Face Not Detected By InsightFace
Error: Face Not Detected By InsightFace


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


In [2]:
from sklearn.model_selection import train_test_split
import pandas as pd

In [3]:
base_dir="D:/TensorFlow 2.0/Main Project"
i=0
train=pd.DataFrame(columns=["Image","Label"])
dir_path=os.path.join(base_dir,"train")
for img_path in os.listdir(dir_path):
    img1_path=os.path.join(dir_path,img_path)
    for img in os.listdir(img1_path):
        train=pd.concat([train,pd.DataFrame({"Image":[cv2.imread(os.path.join(img1_path,img))],"Label":[i]})],axis=0)
    i+=1
train_test,test=train_test_split(train,test_size=0.2,random_state=365)
train_test=train_test.reset_index(drop=True)
test=test.reset_index(drop=True)

In [4]:
print(test)

                                                  Image Label
0     [[[49, 60, 92], [60, 75, 108], [80, 97, 136], ...    68
1     [[[34, 36, 36], [31, 33, 33], [35, 37, 37], [1...    19
2     [[[244, 254, 254], [244, 254, 254], [244, 254,...    62
3     [[[135, 137, 137], [135, 137, 137], [136, 138,...    72
4     [[[215, 221, 220], [215, 221, 220], [215, 221,...    52
...                                                 ...   ...
6769  [[[111, 98, 250], [111, 98, 250], [111, 97, 25...    53
6770  [[[93, 72, 70], [96, 77, 74], [97, 77, 76], [9...    68
6771  [[[129, 152, 160], [126, 149, 157], [127, 149,...    25
6772  [[[250, 240, 240], [250, 240, 240], [250, 240,...    20
6773  [[[114, 135, 166], [104, 125, 156], [91, 112, ...    71

[6774 rows x 2 columns]


In [7]:
print(clusters)

NameError: name 'clusters' is not defined