# Face detection Notebook Example

In [None]:
# Import libraries
import os
import face_recognition
import cv2
from PIL import Image, ImageDraw 
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

: 

In [None]:
# For troubleshooting purposes only.
def detect_faces(image):
    return face_recognition.face_locations(image, model='hog', number_of_times_to_upsample=2)  # Using CNN for better accuracy.
def rotate_image(image, angle):
    return image.rotate(angle, expand=True)

def show_face_locations(image_path):
    # Load the image file into a numpy array
    image = face_recognition.load_image_file(image_path)
    
    # Convert the image to a PIL Image object
    pil_image = Image.fromarray(image)
    
    # Detect faces at original orientation
    face_locations = detect_faces(image)
    
    # If no faces, try rotating the image by 90, 180, 270 degrees
    angles = [0, 90, 180, 270]
    for angle in angles:
        if face_locations:
            break
        rotated_image = rotate_image(pil_image, angle)
        rotated_array = np.array(rotated_image)
        face_locations = detect_faces(rotated_array)

    # Draw rectangles around detected faces
    draw = ImageDraw.Draw(pil_image)
    for (top, right, bottom, left) in face_locations:
        draw.rectangle([left, top, right, bottom], outline="red", width=3)

    # Display the image
    pil_image.show()

# First load your images and adjust contrast for better facial recognition.
# Replace "/Users/bab226/Pictures/test_photos/alex" with your actual image directory
directory = "/Users/bab226/Pictures/test_photos/test_photos"
# Process each image in the directory
for filename in os.listdir(directory):
    if filename.endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(directory, filename)
        show_face_locations(image_path)
#images_with_faces = load_and_process_images(directory)

In [None]:
# Loads images from a directory, detects faces, and encodes each face.

def detect_faces(image):
    return face_recognition.face_locations(image, model='hog', number_of_times_to_upsample=2)  # Using CNN for better accuracy.
def rotate_image(image, angle):
    return image.rotate(angle, expand=True)

def tryVar(var):
    try:
        val = var
    except NameError:
        return None
    return val

def load_and_process_images(directory):
    images_with_faces = []
    for filename in os.listdir(directory):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(directory, filename)  # Get file path
            image = face_recognition.load_image_file(image_path)  # Load image
            pil_image = Image.fromarray(image)
            face_locations = detect_faces(image)  # Detect faces
    
            if face_locations:  # If face detected, append to list
                print("Face detected")
            else:
                # If no faces, try rotating the image by 90, 180, 270 degrees.
                angles = [0, 90, 180, 270]
                for angle in angles:
                    if face_locations:
                        print("Face detected")
                        break
                    else:
                        print("Face NOT detected. Trying again...")
                        rotated_image = rotate_image(pil_image, angle)
                        rotated_array = np.array(rotated_image)
                        face_locations = detect_faces(rotated_array)
                    
                print("Skipping...")
                
            face_encodings = face_recognition.face_encodings(image, face_locations)
            images_with_faces.append((image_path, image, face_locations, face_encodings))
    return images_with_faces

def extract_encodings_with_paths(images_with_faces):
    encodings_with_paths = []
    for item in images_with_faces:
        image_path, _, _, face_encodings = item
        for encoding in face_encodings:
            encodings_with_paths.append((image_path, list(encoding)))  # Ensure encoding is a 1D list
    return encodings_with_paths

def cluster_faces_with_paths(encodings_with_paths, n_clusters):
    encodings = [encoding for _, encoding in encodings_with_paths]
    kmeans = KMeans(n_clusters=n_clusters)
    labels = kmeans.fit_predict(encodings)
    
    clusters = {i: [] for i in range(n_clusters)}
    for label, (image_path, _) in zip(labels, encodings_with_paths):
        clusters[label].append(image_path)
    
    return clusters, labels

def find_optimal_clusters(encodings, max_k):
    """Find optimal clusters using elbow method."""
    iters = range(1, max_k+1)
    distortions = []
    
    for k in iters:
        kmeans = KMeans(n_clusters=k)
        kmeans.fit(encodings)
        distortions.append(kmeans.inertia_)
        
    plt.figure(figsize=(8, 6))
    plt.plot(iters, distortions, marker='o')
    plt.xlabel('Number of clusters')
    plt.ylabel('Inertia')
    plt.title('Elbow method for determining optimal number of clusters')
    plt.show()

def visualize_and_tag_clusters(clusters):
    """Displays images in each cluster and allows user to tag clusters.
    Returns a dictionary of cluster IDs to tags."""

    cluster_tags = {}
    for cluster_id, image_paths in clusters.items():
        print(f"Cluster {cluster_id}:")
        fig, axes = plt.subplots(1, len(image_paths), figsize=(20, 5))
        
        if len(image_paths) == 1:
            axes = [axes]
        
        for ax, path in zip(axes, image_paths):
            img = mpimg.imread(path)
            ax.imshow(img)
            ax.set_title(os.path.basename(path))
            ax.axis('off')
        
        plt.show()
        tag = input(f"Provide a label for cluster {cluster_id}: ")
        cluster_tags[cluster_id] = tag
    
    return cluster_tags

In [None]:
directory = "/Users/bab226/Pictures/test_photos/test_photos"
images_with_faces = load_and_process_images(directory)

In [5]:
# Unpack encodings and labels from images_with_faces:
encodings_with_paths = extract_encodings_with_paths(images_with_faces)

In [None]:
all_encodings = [encoding for _, encoding in encodings_with_paths]
find_optimal_clusters(all_encodings, max_k=2)  # Adjust max_k as needed


In [7]:
# Perform clustering into the optimal number of clusters (e.g., replace 5 with the optimal number)
optimal_clusters = 2  # Replace with the actual optimal number determined
clusters, cluster_labels = cluster_faces_with_paths(encodings_with_paths, n_clusters=optimal_clusters)

In [None]:
# Visualize and tag clusters
cluster_tags = visualize_and_tag_clusters(clusters)

print("Cluster tags:")
for cluster_id, tag in cluster_tags.items():
    print(f"Cluster {cluster_id}: {tag}")

In [None]:
cluster_tags

In [11]:
import json
from PIL import Image
import piexif

def save_tags_to_file(cluster_tags, file_path='cluster_tags.json'):
    with open(file_path, 'w') as file:
        json.dump(cluster_tags, file, indent=4)

def add_tags_to_jpeg_images(clusters, cluster_tags):
    """Adds a tag to each JPEG image in the specified directory, using the cluster tags.
    Saves the modified images with the tags.
    Args:
        clusters (dict): Dictionary of cluster IDs to image paths.
        cluster_tags (dict): Dictionary of cluster IDs to tags."""
    for cluster_id, image_paths in clusters.items():
        tag = cluster_tags.get(cluster_id, 'untagged')
        for image_path in image_paths:
            image = Image.open(image_path)
            exif_dict = piexif.load(image.info['exif'])
            exif_dict['Exif'][piexif.ExifIFD.UserComment] = tag.encode('utf-8')
            exif_bytes = piexif.dump(exif_dict)
            image.save(image_path, 'jpeg', exif=exif_bytes)
            print(f"Saved tag '{tag}' to image {image_path}")

def search_images_by_tag(directory, search_tag):
    """
    Searches for JPEG images in the specified directory with the specified tag.
    Returns a list of image paths that match the tag.
    Args:
        directory (str): Path to the directory containing JPEG images.
        search_tag (str): Tag to search for in the images."""

    print(f"Searching for images with tag '{search_tag}'...")
    print("Found:")
    
    matching_images = []
    for filename in os.listdir(directory):
        if filename.endswith('.jpg'):
            image_path = os.path.join(directory, filename)
            image = Image.open(image_path)
            exif_dict = piexif.load(image.info['exif'])
            user_comment = exif_dict['Exif'].get(piexif.ExifIFD.UserComment, b'').decode('utf-8')
            if search_tag == user_comment:
                matching_images.append(image_path)
    return matching_images

In [None]:
# Add tags to images
add_tags_to_jpeg_images(clusters, cluster_tags)

# Example verification with ExifTool
# Open Terminal and use ExifTool to verify tags
# exiftool -UserComment /path/to/your/image.jpg

In [None]:
# Example usage
directory = "/Users/bab226/Pictures/test_photos/test_photos"
search_tag = 'alex'

matching_images = search_images_by_tag(directory, search_tag)
print(f"Images with tag '{search_tag}':")
for image_path in matching_images:
    print(image_path)



In [None]:
import helper_functions as hf

# Main:
directory = "/Users/bab226/Pictures/test_photos/test_photos"
images_with_paths = hf.load_and_process_images(directory, 1.5)

# Options
kmax = len(images_with_paths)-1  # Maximum number of clusters to try

# Unpack encodings and labels from images_with_faces:
encodings_with_paths = hf.extract_encodings_with_paths(images_with_paths)
all_encodings = [encoding for _, encoding in encodings_with_paths]

# Find optimal cluster size.
optimal_k = hf.find_optimal_cluster(all_encodings, max_k=kmax)
print(f"Optimal number of clusters: {optimal_k}")