In [None]:
import cv2
import sys
import matplotlib.pyplot as plt
import os 
from tqdm import tqdm
from joblib import Parallel, delayed
import time
import concurrent.futures

In [None]:
# face detection using haar cascades // viola jones
# read images, crop and create new dir to store the crops
nimh_path = './NIMH-CHEFS'
out_path = './out'

def process_image(image_path):
    img = cv2.imread(image_path)
    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    face_classifier = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    face = face_classifier.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(40, 40))
    return face

def save_processed(face, img_path, file_name, folder):
    img = cv2.imread(img_path)
    x, y, w, h = face
    cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 4)
    faces = img[y:y + h, x:x + w]
    file_name = os.path.splitext(file_name)[0]
    cv2.imwrite(f'out/{folder}/{file_name}_cropped.jpg', faces) 

def process_face_and_save(face_coords, image_path, file_name, folder):
    try:
        save_processed(face_coords, image_path, file_name, folder)
    except Exception as e:
        print(f"Error processing image {file_name}: {e}")

def process_folder(folder):
    display(folder)
    folder_path_out = os.path.join(out_path, os.path.basename(folder))    
    if not os.path.exists(folder_path_out):
        os.makedirs(folder_path_out)
    image_files = [os.path.join(folder, file_name) for file_name in os.listdir(folder) if file_name.endswith('.jpg')]
    for image_path in image_files:
        face = process_image(image_path)
        for face_coords in face:
            process_face_and_save(face_coords, image_path, os.path.basename(image_path), os.path.basename(folder))

t1 = time.time()
folders = [os.path.join(nimh_path, folder) for folder in os.listdir(nimh_path) if folder != '.ipynb_checkpoints']
# display(folders)

# multithreading
# with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
#    executor.map(process_folder, folders)

# single-threading
for folder in folders:
    process_folder(folder)

t2 = time.time()
display(t2-t1)