# DEEPFACE EMBEDDING AND SAVE

## Move correct folder

In [11]:
%cd data_generation

[Errno 2] No such file or directory: 'data_generation'
/mnt/d/deepface-video-recognition/data_generation


## IMPORT LIB

In [16]:
from deepface import DeepFace
from concurrent.futures import ThreadPoolExecutor

import os
import pickle
import numpy as np
import faiss
import queue
import threading

## Initial Parameters and Function

In [17]:
fixed_dim = 2622
workers = 10
faiss_index_dir = "faiss_index"
faces_emb_dir = "faces_emb"
movies = ['Calloused_Hands', 'Liberty_Kid', 'like_me', 'losing_ground', 'Memphis']
movie = "Liberty_Kid"
source_emb_faces = f"../{faces_emb_dir}/{movie}"
det_emb = (
    ("fastmtcnn", "ArcFace"),
    ("fastmtcnn", "VggFace"),
    ("fastmtcnn", "FaceNet"),
    ("fastmtcnn", "GhostFaceNet"),
    ("opencv", "ArcFace"),
    ("opencv", "VggFace"),
    ("opencv", "FaceNet"),
    ("opencv", "GhostFaceNet"),
    ("retinaface", "ArcFace"),
    ("retinaface", "VggFace"),
    ("retinaface", "FaceNet"),
    ("retinaface", "GhostFaceNet")
)

In [18]:
def push_task_to_queue(list_of_tasks):
    # Create a queue
    job_queue = queue.Queue()

    # Add task to the queue
    for task in list_of_tasks:
        job_queue.put(task)

    # Function to process the queue
    def process_queue():
        while not job_queue.empty():
            job, args = job_queue.get()
            job(*args)
            job_queue.task_done()

    # Create and start a thread to process the queue
    worker_thread = threading.Thread(target=process_queue)
    worker_thread.start()

    # Wait for the queue to be processed
    job_queue.join()

## Create Faiss index for specific model + embedding model

### Initial param

In [19]:
def save_read_unread_files(read_files: list, read_files_path: str, unread_files: list, unread_files_path: str):
    try:
        with open(read_files_path, "wb") as f:
            pickle.dump(read_files, f)
        with open(unread_files_path, "wb") as f:
            pickle.dump(unread_files, f)
        
        print(f"Read files saved to: {read_files_path}")
        print(f"Unread files saved to: {unread_files_path}")
    except Exception as e:
        print(f"Error saving read/unread files list: {e}")

In [20]:
def save_index_files(index, index_file_path: str):
    try:
        # Save the index to disk
        faiss.write_index(index, index_file_path)
        print(f"Successfully saved index to {index_file_path}")
    except Exception as e:
        print(f"Error saving index to {index_file_path}: {e}")
    # Close index when finished
    index.reset()

In [21]:
def add_pickle_to_index(pickle_file_path: str, index, read_files: list, read_files_path: str, unread_files: list, unread_files_path: str):
    try:
        if pickle_file_path in read_files:
            return
        with open(pickle_file_path, "rb") as f:
            embedding = pickle.load(f)
            index.add(embedding)  # Pass an integer index as the first argument
            print(f"Added to index: {pickle_file_path}")
            read_files.append(pickle_file_path)
            unread_files.remove(pickle_file_path)
    except Exception as e:
        save_read_unread_files(read_files, read_files_path, unread_files, unread_files_path)
        print(f"Error processing file {pickle_file_path}: {e}")

In [22]:
def traverse_to_create_index(det_face, emb_model):
    global fixed_dim, source_emb_faces, movie, os
    index = faiss.IndexFlatL2(fixed_dim)  # L2 distance for similarity search
    des_index_folder = f"../{faiss_index_dir}/{movie}/{emb_model}"
    files_to_read = []

    # Ensure the destination directory exists
    os.makedirs(des_index_folder, exist_ok=True)
    index_file_path = os.path.join(des_index_folder, f"movie_{movie}-det_{det_face}-emb_{emb_model}.pkl")
    read_files_path = os.path.join(des_index_folder, f"read-movie_{movie}-det_{det_face}-emb_{emb_model}.pkl")
    unread_files_path = os.path.join(des_index_folder, f"unread-movie_{movie}-det_{det_face}-emb_{emb_model}.pkl")
    if os.path.exists(unread_files_path):
        print(f"Reading from existing index: {index_file_path}")
        with open(unread_files_path, "rb") as f:
            unread_files = pickle.load(f)
        with open(read_files_path, "rb") as f:
            read_files = pickle.load(f)
        index = faiss.read_index(index_file_path)
        files_to_read = unread_files
    else:
        # Collect all files
        for root, _, files in os.walk(source_emb_faces):
            for file in files:
                if file.lower().endswith('.pkl') and f"emb_{emb_model}" in file and det_face in file:
                    files_to_read.append(os.path.join(root, file))
        
        read_files = list()
        unread_files = files_to_read

    print(f"Files to read: {len(files_to_read)}")
    for file in files_to_read:
        add_pickle_to_index(file, index, read_files, read_files_path, unread_files, unread_files_path)
    
    save_index_files(index, index_file_path)
    save_read_unread_files(read_files, read_files_path, unread_files, unread_files_path)

In [29]:
push_task_to_queue([(traverse_to_create_index, (det_face, emb_model)) for det_face, emb_model in det_emb])

Files to read: 23764
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-1/Liberty_Kid-1-shot_1/Liberty_Kid-1-shot_1-frame_555-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-1/Liberty_Kid-1-shot_1/Liberty_Kid-1-shot_1-frame_565-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-1/Liberty_Kid-1-shot_1/Liberty_Kid-1-shot_1-frame_575-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-1/Liberty_Kid-1-shot_1/Liberty_Kid-1-shot_1-frame_585-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-1/Liberty_Kid-1-shot_1/Liberty_Kid-1-shot_1-frame_595-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-1/Liberty_Kid-1-shot_1/Liberty_Kid-1-shot_1-frame_605-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-1/Liberty_Kid-1-shot_1/Liberty_Kid-1-shot_1-frame_615-det_fast

KeyboardInterrupt: 

Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-13/Liberty_Kid-13-shot_8/Liberty_Kid-13-shot_8-frame_15-det_fastmtcnn-emb_ArcFace-face_1.pkl


Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-13/Liberty_Kid-13-shot_8/Liberty_Kid-13-shot_8-frame_25-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-13/Liberty_Kid-13-shot_8/Liberty_Kid-13-shot_8-frame_35-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-13/Liberty_Kid-13-shot_8/Liberty_Kid-13-shot_8-frame_45-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-13/Liberty_Kid-13-shot_8/Liberty_Kid-13-shot_8-frame_55-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-13/Liberty_Kid-13-shot_8/Liberty_Kid-13-shot_8-frame_65-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-13/Liberty_Kid-13-shot_8/Liberty_Kid-13-shot_8-frame_75-det_fastmtcnn-emb_ArcFace-face_1.pkl
Added to index: ../faces_emb/Liberty_Kid/Liberty_Kid-13/Liberty_Kid-13-shot_8/Liberty_Kid-13-shot_8-frame_85-det_fastmtcnn-e