In [1]:
import os
import shutil
import tensorflow as tf
from deepface import DeepFace
from tqdm import tqdm

In [2]:
# Force TensorFlow to use GPU
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    print("✅ GPU is enabled for DeepFace!")

✅ GPU is enabled for DeepFace!


In [3]:
# Define input and output directories
INPUT_FOLDER = "./temp/input"
OUTPUT_FOLDER = "./temp/output"

# Ensure output folder exists
if not os.path.exists(OUTPUT_FOLDER):
    os.makedirs(OUTPUT_FOLDER)

In [4]:
# Get all image paths in the input folder
image_paths = [
    os.path.join(INPUT_FOLDER, file) for file in os.listdir(INPUT_FOLDER)
    if file.lower().endswith((".jpg", ".jpeg", ".png"))
]

# List to store unique images
unique_images = []

print(f"Processing {len(image_paths)} images to find distinct ones...")

Processing 50 images to find distinct ones...


In [5]:
for img_path in tqdm(image_paths, desc="Checking for duplicates"):
    is_duplicate = False

    # Compare the current image with each already saved unique image
    for unique_img in unique_images:
        try:
            # Use DeepFace for similarity check (uses GPU automatically)
            result = DeepFace.verify(img1_path=img_path, img2_path=unique_img, enforce_detection=False)

            # If images are similar, mark as duplicate
            if result["verified"]:
                is_duplicate = True
                break  # No need to check further

        except Exception as e:
            print(f"Error processing {img_path} -> {e}")
            continue  # Skip to next image if error occurs

    # If it's distinct, save it
    if not is_duplicate:
        unique_images.append(img_path)
        shutil.copy(img_path, os.path.join(OUTPUT_FOLDER, os.path.basename(img_path)))

print(f"Processing complete. Found {len(unique_images)} distinct images.")
print(f"Distinct images saved in: {OUTPUT_FOLDER}")

Checking for duplicates: 100%|██████████| 50/50 [00:49<00:00,  1.02it/s]

Processing complete. Found 15 distinct images.
Distinct images saved in: ./temp/output



