In [1]:
from deepface import DeepFace
import cv2 as cv
import numpy as np
from tqdm.notebook import tqdm
import os
import shutil
import json
import uuid
import traceback
from sklearn.cluster import KMeans

In [2]:
session_id = uuid.uuid4().hex[:4]

# Clear out files and folders in data 

In [3]:
def setup_experiment():
    # Remove faces, embeddings, demography folders
    if os.path.exists("data/frames"):
        shutil.rmtree("data/frames")
    os.mkdir("data/frames")
    
    if os.path.exists("data/faces"):
        shutil.rmtree("data/faces")
    os.mkdir("data/faces")
    
    if os.path.exists("data/embeddings"):
        shutil.rmtree("data/embeddings")
    os.mkdir("data/embeddings")
    
    if os.path.exists("data/demography"):
        shutil.rmtree("data/demography")
    os.mkdir("data/demography")

# Capture the frames and display them

In [4]:
def run_experiment(path):
    cap = cv.VideoCapture(path)
    # Open capture loop
    ctr = 0
    while cap.isOpened():
        ret, frame = cap.read()    
        # ret is only true when frame is read properly
        if not ret:
            print("Cannot read frame.")
            break
        if frame is None:
            continue 
        # Display/Process every 15th frame
        if ctr % 15 == 0:
            # Add processing here
            process_frame(frame, ctr)
        if cv.waitKey(1) == ord('q'):
            break
        ctr += 1

    cap.release()
    cv.destroyAllWindows()

In [5]:
FACE_DETECTION_BACKEND = "ssd"
FACE_EMBEDDINGS_BACKEND = "Facenet"

In [6]:
def process_frame(frame, frame_id):
#     cv.imshow("face", frame)
#     print(f"frame_id: {frame_id}")
#     return
    # detect and highlight faces
    try:

        # Generate demography data
        frame_data = DeepFace.analyze(img_path = frame, detector_backend = FACE_DETECTION_BACKEND)
#         import ipdb; ipdb.set_trace()
        # Generate embeddings using deepface
        embedding_data = DeepFace.represent(img_path = frame, model_name = FACE_EMBEDDINGS_BACKEND,  detector_backend = FACE_DETECTION_BACKEND)
        # For every face detected, store the image, face, embeddings, and dump the demography json
        for idx, (f_data, e_data) in enumerate(zip(frame_data, embedding_data)):
            file_name = f"{session_id}_{frame_id}_{idx}"
            # Store the image
            cv.imwrite(f"data/frames/{file_name}.png", frame)
            # Extract the facial region and store/show the face
            x,y,w,h = f_data["region"]["x"], f_data["region"]["y"], f_data["region"]["w"], f_data["region"]["h"]
            # Make sure they represent the same face
            assert(x == e_data["facial_area"]["x"])
            assert(y == e_data["facial_area"]["y"])
            assert(w == e_data["facial_area"]["w"])
            assert(h == e_data["facial_area"]["h"])
            x = max(int(x),0)
            y = max(int(y),0)
            w = max(int(w),0)
            h = max(int(h),0)
            face_region = frame[y:y+h, x:x+w,:]
            cv.imwrite(f"data/faces/{file_name}.png", face_region)
            cv.imshow("face", face_region)
            # Store the embeddings
            np.save(f"data/embeddings/{file_name}.npy", e_data["embedding"])
            
            # Dump the dict as a json
            with open(f"data/demography/{file_name}.json", 'w') as f:
                json.dump(f_data, f)
            
#         if len(frame_data) > 1:
#             cv.imshow("face", frame)
#             cv2.setWindowProperty("face", cv2.WND_PROP_TOPMOST, 1)
        
    except ValueError as ve:
#         print(str(ve))
        pass
    except Exception as e:
        print(str(e))
        print(traceback.format_exc())
        import ipdb; ipdb.set_trace()
    
    
    

In [7]:
def cluster_embeddings(n_clusters=15):
    # List all embeddings
    embed_files = os.listdir("data/embeddings/")
    # Load all embeddings and their associated names(without extensions)
    embeddings = { f_name.split(".")[0] : np.load(f"data/embeddings/{f_name}") for f_name in embed_files}

    # Cluster the embeddings using sklearn, get the cluster centroids
    embed_vals = np.array([val for val in embeddings.values()])
#     import ipdb; ipdb.set_trace()
    k_means_results = KMeans(n_clusters=n_clusters, random_state=42, n_init=5).fit(embed_vals)
    # Assign the individual file prefix names to a centroid based on distance
    cluster_assignments = {k:v for k,v in zip(embeddings.keys(), k_means_results.labels_)}
    # Return file prefix - cluster number pairs
    return cluster_assignments

In [8]:
def ensemble_demography_assignment(centroids):
    # For each clustered face, get the demography data and use the one race and gender that appears the most and assign that
    # For the assigned race and gender, calculate the median confidence for those classes
    # Return the cluster labels, and the race and gender with the median confidence
    pass

In [9]:
# setup_experiment()
# run_experiment('data/movie/sample.mp4')
assignments = cluster_embeddings(n_clusters=8)

In [11]:
# View all files with face #5
import time
relevant_files = [f"data/faces/{k}.png" for k,v in assignments.items() if v == 1]
for f in relevant_files:
    frame = cv.imread(f)
    cv.imshow("Person-5", frame)
    # waits for user to press any key 
    # (this is necessary to avoid Python kernel form crashing) 
    cv.waitKey(0) 

    # closing all open windows 
    cv.destroyAllWindows() 
    