RUN THIS ON KAGGLE

In [1]:
!pip install facenet-pytorch

Collecting facenet-pytorch
  Downloading facenet_pytorch-2.6.0-py3-none-any.whl.metadata (12 kB)
Collecting Pillow<10.3.0,>=10.2.0 (from facenet-pytorch)
  Downloading pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting torch<2.3.0,>=2.2.0 (from facenet-pytorch)
  Downloading torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision<0.18.0,>=0.17.0 (from facenet-pytorch)
  Downloading torchvision-0.17.2-cp310-cp310-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading

In [2]:
import os
import cv2
from tqdm import tqdm
import shutil
import torch

In [3]:
from facenet_pytorch import MTCNN

In [4]:
torch.cuda.is_available()

True

In [5]:
class FaceDetector:
    def __init__(self):
        print("FaceDetector object initializing")
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.detector = MTCNN(keep_all=True, device=self.device)
        print("FaceDetector initialized successfully")

    def detect_all_faces_and_save(self, directory, save_directory):
        '''
        Detects all faces in images in a given directory

        Args:
            directory: input directory of images
            save_directory: directory where faces will be saved
        '''
        images = os.listdir(directory)
        print(f"Found {len(images)} images in {directory}")

        os.makedirs(save_directory, exist_ok=True)

        for image in tqdm(images, desc="Processing images"):
            if not image.lower().endswith('.jpg'):
                  tqdm.write(f"Skipping non-jpg file: {image}")
                  continue
            img_path = os.path.join(directory, image)
            img = cv2.imread(img_path)

            if img is None:
                tqdm.write(f"Failed to read {image}. Skipping.")
                continue
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            boxes, probs = self.detector.detect(img_rgb)

            if boxes is None:
                tqdm.write(f"No faces detected in {image}")
                continue

            #tqdm.write(f"Detected {len(boxes)} faces in {image}")

            #save faces with confidence > 0.97
            for i, (box, prob) in enumerate(zip(boxes, probs)):
                if prob > 0.97:
                    x1, y1, x2, y2 = [int(b) for b in box]
                    face_img = img[y1:y2, x1:x2]
                    if face_img.size > 0:
                      face_filename = f"{image}_face{i}_{prob:.2f}.jpg"
                      cv2.imwrite(os.path.join(save_directory, face_filename), face_img)

                      #tqdm.write(f"Face {i} in {image} with confidence {prob:.2f} saved successfully")

def face_detect(images_dir, save_face_dir):
    face_detector = FaceDetector()
    face_detector.detect_all_faces_and_save(images_dir, save_face_dir)

In [6]:
SNAPS_DIR = "/kaggle/input/batchsnaps-day-5"
FACES_SAVE_DIR = "/kaggle/working/facesd5"
face_detect(SNAPS_DIR, FACES_SAVE_DIR)

FaceDetector object initializing
FaceDetector initialized successfully
Found 416 images in /kaggle/input/batchsnaps-day-5


Processing images:  12%|█▎        | 52/416 [01:27<10:05,  1.66s/it]

No faces detected in D75_4167.JPG


Processing images:  34%|███▍      | 143/416 [03:57<06:24,  1.41s/it]

No faces detected in D75_4216.JPG


Processing images:  39%|███▉      | 164/416 [04:31<06:19,  1.51s/it]

No faces detected in D75_4219.JPG


Processing images:  51%|█████▏    | 214/416 [05:53<05:00,  1.49s/it]

No faces detected in D75_4169.JPG


Processing images:  52%|█████▏    | 217/416 [05:59<05:46,  1.74s/it]

No faces detected in D75_4220.JPG


Processing images:  54%|█████▍    | 224/416 [06:11<05:42,  1.78s/it]

No faces detected in D75_4218.JPG


Processing images:  62%|██████▏   | 256/416 [07:05<03:47,  1.42s/it]

No faces detected in D75_4221.JPG


Processing images:  67%|██████▋   | 278/416 [07:41<03:24,  1.48s/it]

No faces detected in D75_4170.JPG


Processing images:  69%|██████▉   | 288/416 [07:57<03:11,  1.49s/it]

No faces detected in D75_4217.JPG


Processing images:  84%|████████▍ | 350/416 [09:57<02:06,  1.91s/it]

No faces detected in D75_4047.JPG


Processing images:  89%|████████▉ | 371/416 [10:29<01:06,  1.48s/it]

No faces detected in 750_7415.JPG


Processing images:  99%|█████████▊| 410/416 [11:37<00:10,  1.73s/it]

No faces detected in D75_4048.JPG


Processing images: 100%|██████████| 416/416 [11:47<00:00,  1.70s/it]


In [7]:
import zipfile

def zip_folder_with_progress(folder_path, output_zip):
    total_size = sum(os.path.getsize(os.path.join(root, file))
                     for root, dirs, files in os.walk(folder_path)
                     for file in files)

    with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
        with tqdm(total=total_size, unit='B', unit_scale=True, desc="Zipping") as pbar:
            for root, dirs, files in os.walk(folder_path):
                for file in files:
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, os.path.relpath(file_path, folder_path))
                    pbar.update(os.path.getsize(file_path))

folder_path = '/kaggle/working/facesd5'
output_zip = '/kaggle/working/extractedfaces.zip'
zip_folder_with_progress(folder_path, output_zip)


Zipping: 100%|██████████| 59.8M/59.8M [00:02<00:00, 23.6MB/s]
