In [12]:
import os

# Directories containing the uncropped images
directories = ["./raw_benny"]
file_exts = [[os.path.splitext(file)[1] for file in os.listdir(dir)] for dir in directories]
print(list(set([ext for exts in file_exts for ext in exts])))


['', '.MP4', '.PNG', '.JPG', '.jpg', '.mov', '.MOV', '.JPEG', '.mp4', '.jpeg']


In [13]:
from PIL import Image
import pillow_heif

pillow_heif.register_heif_opener()

for dir in directories:
    files = os.listdir(dir)
    for file in files:
        # Delete videos
        if file.endswith(".mov") or file.endswith(".MOV") or file.endswith(".mp4") or file.endswith(".MP4"):
            os.remove(os.path.join(dir, file))
            print(f"Deleted file: {os.path.join(dir, file)}")

        # Convert HEIC images to JPG
        if file.endswith(".HEIC") or file.endswith(".heic"):
            heic_image = Image.open(os.path.join(dir, file))
            jpg_file = os.path.splitext(file)[0] + ".jpg"
            heic_image.save(os.path.join(dir, jpg_file), format="JPEG")
            os.remove(os.path.join(dir, file))
            print(f"HEIC Image {os.path.join(dir, file)} has been converted to {os.path.join(dir, jpg_file)}")

Deleted file: ./raw_benny/filtered-2FC68982-EC6D-43B3-942C-3A872505B268.mp4
Deleted file: ./raw_benny/08e2e3f33e8b4958b21adab76430c416.mov
Deleted file: ./raw_benny/2024-02-22-144525-2892472603_L6ioSkeZ_3dc7f93e1ab3cf35f9f21e6d66a2909686651b00.mp4
Deleted file: ./raw_benny/IMG_7712.MOV
Deleted file: ./raw_benny/IMG_8838.MOV
Deleted file: ./raw_benny/IMG_4179.MOV
Deleted file: ./raw_benny/IMG_0135.MOV
Deleted file: ./raw_benny/IMG_4178.MOV
Deleted file: ./raw_benny/IMG_5869.MOV
Deleted file: ./raw_benny/recorded-41897256161138.mp4
Deleted file: ./raw_benny/IMG_3174.MOV
Deleted file: ./raw_benny/IMG_4182.MOV
Deleted file: ./raw_benny/IMG_3980.MOV
Deleted file: ./raw_benny/IMG_4180.MOV
Deleted file: ./raw_benny/recorded-604309983117.mp4
Deleted file: ./raw_benny/IMG_7503.MOV
Deleted file: ./raw_benny/sd1689772320_2.mp4
Deleted file: ./raw_benny/recorded-182102339851.mp4
Deleted file: ./raw_benny/IMG_2580.MOV
Deleted file: ./raw_benny/IMG_4126.MOV
Deleted file: ./raw_benny/recorded-3924747

In [14]:
# Just to make sure the extensions are correct now
converted_file_exts = [[os.path.splitext(file)[1] for file in os.listdir(dir)] for dir in directories]
print(list(set([ext for exts in converted_file_exts for ext in exts])))

['', '.PNG', '.JPG', '.jpg', '.JPEG', '.jpeg']


In [16]:
from mtcnn.mtcnn import MTCNN
import numpy as np

reached_last_image = True
last_face_count = 4000
last_cropped_image = "./raw_benny/IMG_1960.JPG"

if not os.path.exists("./faces"):
    os.makedirs("./faces")

for dir in directories:
    for file in os.listdir(dir):

        # For continuing from last image cropped
        if not reached_last_image:
            if os.path.join(dir, file) == last_cropped_image:
                reached_last_image = True
            continue
        if not (file.endswith(".jpg") or file.endswith(".JPG")
                or file.endswith(".png") or file.endswith(".PNG")
                or file.endswith(".jpeg") or file.endswith(".JPEG")):
            continue

        image = Image.open(os.path.join(dir, file))
        if image.mode != "RGB":
            image = image.convert("RGB")
        pixel_arr = np.array(image)

        detector = MTCNN()
        faces = detector.detect_faces(pixel_arr)
        for face in faces:
            x, y, width, height = face['box']
            cropped_face = image.crop((x, y, x + width, y + height))
            face_dir = os.path.join("./faces", f"face{str(last_face_count)}.jpg")
            cropped_face.save(face_dir)
            print(f"Cropped face {str(last_face_count)} from {os.path.join(dir, file)}")
            last_face_count += 1

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 