In [4]:
import os
import cv2
import dlib
import numpy as np
from pathlib import Path
from sklearn.metrics.pairwise import cosine_similarity

BASE_FOLDER=os.getcwd()
INFLUENCER_FACE_DATA = BASE_FOLDER+"/influencer_face_data"
UNIQUE_INFLUENCER_FACE_DATA = BASE_FOLDER+"/unique_influencer_face_data"
FACE_IMG_FOLDER = os.path.join(INFLUENCER_FACE_DATA, "face_img")
FACE_SCORE_FOLDER = os.path.join(INFLUENCER_FACE_DATA, "face_score")
UNIQUE_FACE_IMG_FOLDER = os.path.join(UNIQUE_INFLUENCER_FACE_DATA, "face_img")
UNIQUE_FACE_SCORE_FOLDER = os.path.join(UNIQUE_INFLUENCER_FACE_DATA, "face_score")

os.makedirs(UNIQUE_FACE_IMG_FOLDER, exist_ok=True)
os.makedirs(UNIQUE_FACE_SCORE_FOLDER, exist_ok=True)

face_detector = dlib.get_frontal_face_detector()
shape_predictor_path = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
face_rec_model = dlib.face_recognition_model_v1("dlib_face_recognition_resnet_model_v1.dat")

def get_face_embedding(image_path):
    img = cv2.imread(image_path)
    if img is None:
        return None

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_detector(gray, 1)

    if len(faces) == 0:
        return None

    face = faces[0]
    shape = shape_predictor_path(img, face)
    face_embedding = np.array(face_rec_model.compute_face_descriptor(img, shape))
    return face_embedding

def process_faces():
    processed_embeddings = []
    unique_faces = {}

    for img_file in os.listdir(FACE_IMG_FOLDER):
        img_path = os.path.join(FACE_IMG_FOLDER, img_file)
        score_file = os.path.join(FACE_SCORE_FOLDER, f"{Path(img_file).stem}.txt")

        if not os.path.exists(score_file):
            print(f"Missing score file for {img_file}, skipping.")
            continue

        with open(score_file, "r") as sf:
            score = float(sf.read().strip())

        face_embedding = get_face_embedding(img_path)
        if face_embedding is None:
            print(f"Failed to extract face embedding for {img_file}, skipping.")
            continue


        is_unique = True
        for idx, emb in enumerate(processed_embeddings):
            similarity = cosine_similarity([face_embedding], [emb])[0][0]
            if similarity > 0.93:
                is_unique = False
                unique_faces[idx]["scores"].append(score)
                unique_faces[idx]["sources"].append(img_file)
                break

        if is_unique:
            processed_embeddings.append(face_embedding)
            unique_faces[len(processed_embeddings) - 1] = {
                "image_path": img_path,
                "scores": [score],
                "sources": [img_file],
            }

    for idx, face_data in unique_faces.items():
        avg_score = sum(face_data["scores"]) / len(face_data["scores"])
        unique_img_name = f"unique_face_{idx}.jpg"
        unique_img_path = os.path.join(UNIQUE_FACE_IMG_FOLDER, unique_img_name)
        unique_score_path = os.path.join(UNIQUE_FACE_SCORE_FOLDER, f"unique_face_{idx}.txt")

        img = cv2.imread(face_data["image_path"])
        cv2.imwrite(unique_img_path, img)

        with open(unique_score_path, "w") as sf:
            sf.write(f"Average Score: {avg_score}\n")
            sf.write("Sources:\n")
            sf.write("\n".join(face_data["sources"]))

    print("Unique face processing completed!")

# Run the process
process_faces()


Failed to extract face embedding for hd-992418235673669_frame310_face12.jpg, skipping.
Failed to extract face embedding for hd-960884234555681_frame160_face23.jpg, skipping.
Failed to extract face embedding for hd-992418235673669_frame290_face10.jpg, skipping.
Failed to extract face embedding for hd-888062106715252_frame310_face11.jpg, skipping.
Failed to extract face embedding for hd-876121221160846_frame320_face16.jpg, skipping.
Failed to extract face embedding for hd-905739711170399_frame320_face7.jpg, skipping.
Failed to extract face embedding for hd-876239523982490_frame620_face12.jpg, skipping.
Failed to extract face embedding for hd-954832972830686_frame110_face18.jpg, skipping.
Failed to extract face embedding for hd-992418235673669_frame580_face14.jpg, skipping.
Failed to extract face embedding for hd-6720438181324436_frame180_face3.jpg, skipping.
Failed to extract face embedding for hd-6764280623620361_frame360_face31.jpg, skipping.
Failed to extract face embedding for hd-876