In [None]:
import cv2
import sys
import matplotlib.pyplot as plt
import os 
from tqdm import tqdm
from joblib import Parallel, delayed
import time
import concurrent.futures

In [None]:
nimh_path = './NIMH-CHEFS'
out_path = './out'

### Folder Utilities

In [None]:
# returns a list of subfolders
def get_subfolders(path):
    return [os.path.join(path, folder)
            for folder in os.listdir(path) 
            if folder != '.ipynb_checkpoints' 
            and folder != '.DS_Store']

# gets inly the names of folders of a folder list
def get_folder_names(folder_list):
    return [os.path.basename(folder) for folder in folder_list]

# gets all the image files of a given folder
def get_image_files_from_folder(folder):
    return [os.path.join(folder, file_name)
            for file_name in os.listdir(folder) 
            if file_name.endswith('.jpg')]

### Image Utilities

In [None]:
# saves an image to a given path, creates path, if it doesn't exist, names it after the changes done, e.g. 'resized', 'cropped'
def save_image(image, folder_path_out, image_name, changes):
    image_name = os.path.splitext(image_name)[0]
    if not os.path.exists(folder_path_out):
        os.makedirs(folder_path_out)
    cv2.imwrite(f'{folder_path_out}/{image_name}_{changes}.jpg', image)

In [None]:
folders = get_subfolders(nimh_path)
folder_names = get_folder_names(folders)

### Grayscale

#### Grayscale utilities

In [None]:
def convert_to_grayscale(image_path):
    img = cv2.imread(image_path)
    return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

### Crop faces

#### Face detection utilities

In [None]:
# face detection using haar cascades // viola jones
# read images, crop and create new dir to store the crops

def find_face(image_path):
    img = cv2.imread(image_path)
    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    face_classifier = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    face = face_classifier.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(40, 40))
    return face

# returns cropped image in cv2 format
def crop_face(face, img_path):
    img = cv2.imread(img_path)
    x, y, w, h = face
    cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 4)
    return img[y:y + h, x:x + w]

def process_face_and_save(face_coords, image_path, file_name, folder):
    try:
        crop_and_save(face_coords, image_path, file_name, folder)
    except Exception as e:
        print(f"Error processing image {file_name}: {e}")

def process_folder_crop(folder_list):
    for folder in folder_list:
        display(folder_path_out)
        image_files_in_folder = get_image_files_from_folder(folder)
        for image_path in tqdm(image_files):
            face = find_face(image_path)
            for face_coords in face:
                img = crop_face(face, image_path)
                save_image(img, folder_path_out, os.path.basename(image_path), 'cropped')

#### Detect face and crop

In [None]:
t1 = time.time()
folders = [os.path.join(nimh_path, folder) for folder in os.listdir(nimh_path) if folder != '.ipynb_checkpoints' and folder != '.DS_Store']
# display(folders)

# multithreading
# with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
#    executor.map(process_folder, folders)

# single-threading
for folder in folders:
    process_folder_crop(folder)

t2 = time.time()
display(t2-t1)

### Resizing

#### Set parameters for resizing

In [None]:
size = (256, 256)
downsampling = cv2.INTER_AREA
upsampling = cv2.INTER_CUBIC

#### Resizing utilities

In [None]:
# resizes images based on given size and interpolation method
def resize_image(image_file, size, interpolation):
    img = cv2.imread(image_file)
    return cv2.resize(img, dsize=size, interpolation=interpolation)

# does all the magic
def process_images_resize(folder_list, size, interpolation):
    for folder in folder_list:
        display(folder)
        image_files_in_folder = get_image_files_from_folder(folder)
        for image_file in tqdm(image_files_in_folder):
            res_img = resize_image(image_file, size, downsampling)
            folder_path_out = f'out/resized/{os.path.basename(folder)}'
            img_name = os.path.basename(image_file)
            save_image(res_img, folder_path_out, img_name, 'resized')

#### Resize

In [None]:
process_images_resize(folders, size, interpolation)