NOTE: This file needs to be run on Google Colaboratory with an account that has access to the Batchsnaps Drive
Directory Structure:

```
snap-sorter
    |
    |---drive_image_process.ipynb
    |
    |---Batchsnaps Folder Shortcut
    |
    |---imagedata.csv
```

!pip install facenet-pytorch

In [None]:
import os
import cv2
from tqdm import tqdm
import shutil
import torch

In [None]:
from facenet_pytorch import MTCNN

In [None]:
rt = "/content/drive/MyDrive/Colab Notebooks/snap-sorter/Batchsnaps’24/Day 2"
folders = os.listdir(rt)
total = 0
for x in folders:
  total += len(os.listdir(os.path.join(rt, x)))

print(total)

This next bit is to create metadata for images and its IDs

In [None]:
!apt install xattr &>log

In [None]:
from subprocess import getoutput
import pandas as pd

DAYPATH = "/content/drive/MyDrive/Colab Notebooks/snap-sorter/Batchsnaps’24/Day 3"
IMAGE_DATA_PATH = "/content/drive/MyDrive/Colab Notebooks/snap-sorter/imagedata.csv"
image_paths = []

for dirpath, dirnames, filenames in os.walk(DAYPATH):
  for filename in filenames:
    if filename.lower().endswith('.jpg'):
      full_path = os.path.join(dirpath, filename)
      image_paths.append(full_path)

image_data = []
print("Gathering image file IDs")
for image in tqdm(image_paths, desc="Retrieving Image IDs"):
    image_id = getoutput(f"xattr -p 'user.drive.id' '{image}'")
    image_data.append({'image_name': os.path.basename(image), 'image_id': image_id})

print("\nSaving data to csv")
new_df = pd.DataFrame(image_data)

if os.path.exists(IMAGE_DATA_PATH):
    old_df = pd.read_csv(IMAGE_DATA_PATH)
    combined_df = pd.concat([old_df, new_df], ignore_index=True)
else:
    combined_df = new_df

combined_df.to_csv(IMAGE_DATA_PATH, index=False)

In [None]:
os.makedirs('/content/images/', exist_ok=True)
os.makedirs('/content/faces/', exist_ok=True)

source_dir = '/content/drive/MyDrive/Colab Notebooks/snap-sorter/Batchsnaps’24/Day 2/24-70'
destination_dir = '/content/images/'

files = os.listdir(source_dir)

for file_name in tqdm(files, desc="Copying files", unit="file"):
    full_file_name = os.path.join(source_dir, file_name)
    if os.path.isfile(full_file_name):
        shutil.copy(full_file_name, destination_dir)

In [None]:
class FaceDetector:
    def __init__(self):
        print("FaceDetector object initializing")
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.detector = MTCNN(keep_all=True, device=self.device)
        print("FaceDetector initialized successfully")

    def detect_all_faces_and_save(self, directory, save_directory):
        '''
        Detects all faces in images in a given directory

        Args:
            directory: input directory of images
            save_directory: directory where faces will be saved
        '''
        images = os.listdir(directory)
        print(f"Found {len(images)} images in {directory}")

        os.makedirs(save_directory, exist_ok=True)

        for image in tqdm(images, desc="Processing images"):
            if not image.lower().endswith('.jpg'):
                  tqdm.write(f"Skipping non-jpg file: {image}")
                  continue
            img_path = os.path.join(directory, image)
            img = cv2.imread(img_path)

            if img is None:
                tqdm.write(f"Failed to read {image}. Skipping.")
                continue
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            boxes, probs = self.detector.detect(img_rgb)

            if boxes is None:
                tqdm.write(f"No faces detected in {image}")
                continue

            tqdm.write(f"Detected {len(boxes)} faces in {image}")

            #save faces with confidence > 0.97
            for i, (box, prob) in enumerate(zip(boxes, probs)):
                if prob > 0.97:
                    x1, y1, x2, y2 = [int(b) for b in box]
                    face_img = img[y1:y2, x1:x2]
                    if face_img.size > 0:
                      face_filename = f"{image}_face{i}_{prob:.2f}.jpg"
                      cv2.imwrite(os.path.join(save_directory, face_filename), face_img)

                      tqdm.write(f"Face {i} in {image} with confidence {prob:.2f} saved successfully")

def face_detect(images_dir, save_face_dir):
    face_detector = FaceDetector()
    face_detector.detect_all_faces_and_save(images_dir, save_face_dir)

In [None]:
SNAPS_DIR = "/content/images/"
FACES_SAVE_DIR = "/content/faces"
face_detect(SNAPS_DIR, FACES_SAVE_DIR)

In [None]:
files = os.listdir(FACES_SAVE_DIR)

for file_name in tqdm(files, desc="Copying files", unit="file"):
    full_file_name = os.path.join(FACES_SAVE_DIR, file_name)
    if os.path.isfile(full_file_name):
        shutil.copy(full_file_name, '/content/drive/MyDrive/Colab Notebooks/snap-sorter/faces')