In [1]:
import os
import glob

# Define the correct path based on your folder structure
path = './Faces'  # Change this to the exact path where the image files are located

# Check if the folder exists to avoid errors
if not os.path.exists(path):
    print(f"Path {path} does not exist. Please verify the path.")
else:
    print(f"Path {path} exists and is accessible.")

# Debug: Print all files in the directory to see if images are present
print("Files found in the directory:")
for root, dirs, files in os.walk(path):
    for file in files:
        print(file)  # Print each file name found

# Initialize the lists to store image paths and person names
image_path_names = []
person_names = set()

# Adjust the glob pattern to match your image file names, including subfolders
for file_name in glob.glob(os.path.join(path, '**', '*.jpg'), recursive=True):
    image_path_names.append(file_name)
    # Extract the person name by splitting the filename
    person_names.add(os.path.basename(file_name).split('_')[0])

# Check the results
print(f"Total number of images: {len(image_path_names)}")  # Should print the total number of images
print(f"Unique person names: {person_names}")  # Should print the unique person names


Path ./Faces exists and is accessible.
Files found in the directory:
Jim_Carrey.jpg
Jim_Carrey2.jpg
Jim_Carrey1.jpg
Viktoriia_Drozdovska2.jpg
Viktoriia_Drozdovska1.jpg
Viktoriia_Drozdovska.jpg
Kim_Kardashian.jpg
Kim_Kardashian2.jpg
Kim_Kardashian1.jpg
Kim_Kardashian-checkpoint.jpg
Total number of images: 9
Unique person names: {'Viktoriia', 'Jim', 'Kim'}


In [2]:
import urllib.request

# Step 1: Download the Dlib CNN face detector model using Python's urllib
model_url = "http://dlib.net/files/mmod_human_face_detector.dat.bz2"
model_file = "mmod_human_face_detector.dat.bz2"

# Download the file and save it locally
urllib.request.urlretrieve(model_url, model_file)
print(f"Model file '{model_file}' downloaded successfully.")



Model file 'mmod_human_face_detector.dat.bz2' downloaded successfully.


In [3]:
import bz2
import shutil

# Step 2: Decompress the .bz2 file to get the .dat file
compressed_file = "mmod_human_face_detector.dat.bz2"
extracted_file = "mmod_human_face_detector.dat"

# Decompress the file if it exists
if not os.path.exists(extracted_file) and os.path.exists(compressed_file):
    with bz2.BZ2File(compressed_file, 'rb') as file, open(extracted_file, 'wb') as output:
        shutil.copyfileobj(file, output)
    print(f"Decompressed '{compressed_file}' to '{extracted_file}'.")
else:
    print(f"Either '{compressed_file}' is missing or '{extracted_file}' already exists.")



Either 'mmod_human_face_detector.dat.bz2' is missing or 'mmod_human_face_detector.dat' already exists.


In [4]:
# Step 3: Install necessary libraries using %pip in Jupyter
%pip install dlib opencv-python-headless matplotlib


Collecting dlib
  Using cached dlib-19.24.6-cp312-cp312-macosx_14_0_universal2.whl
Collecting opencv-python-headless
  Using cached opencv_python_headless-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Using cached opencv_python_headless-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl (54.8 MB)
Installing collected packages: dlib, opencv-python-headless
Successfully installed dlib-19.24.6 opencv-python-headless-4.10.0.84
Note: you may need to restart the kernel to use updated packages.


In [5]:
import cv2
import dlib
import matplotlib.pyplot as plt
import os

# Load the Dlib CNN face detector model
model_file = "mmod_human_face_detector.dat"

# Check if the model file exists before loading
if os.path.exists(model_file):
    dnnFaceDetector = dlib.cnn_face_detection_model_v1(model_file)
    print("Dlib CNN face detector model loaded successfully.")
else:
    print(f"Model file '{model_file}' not found. Please ensure it was extracted successfully.")


Dlib CNN face detector model loaded successfully.


In [6]:

import glob

# Define the path to the `face_rec_images` folder
path = '/Users/viktoriiadrozdovska/face_rec_images'  # The path to the main directory containing the Faces folder

# Check if the folder exists to avoid errors
if not os.path.exists(path):
    print(f"Path {path} does not exist. Please verify the path.")
else:
    print(f"Path {path} exists and is accessible.")

# Debug: Print all files and folders in the directory to see if images or subfolders are present
print("Files and directories found in 'face_rec_images':")
for root, dirs, files in os.walk(path):
    for name in dirs:
        print(f"Directory: {name}")
    for name in files:
        print(f"File: {name}")


Path /Users/viktoriiadrozdovska/face_rec_images exists and is accessible.
Files and directories found in 'face_rec_images':
Directory: face_rec_images
Directory: .ipynb_checkpoints
Directory: Faces
File: facerec.ipynb
File: mmod_human_face_detector.dat
File: mmod_human_face_detector.dat.bz2
Directory: .ipynb_checkpoints
File: facerec-checkpoint.ipynb
Directory: Jim_Carrey
Directory: Viktoriia Drozdovska
Directory: .ipynb_checkpoints
Directory: Kim_Kardashian
Directory: .ipynb_checkpoints
File: Jim_Carrey.jpg
File: Jim_Carrey2.jpg
File: Jim_Carrey1.jpg
File: Viktoriia_Drozdovska2.jpg
File: Viktoriia_Drozdovska1.jpg
File: Viktoriia_Drozdovska.jpg
Directory: .ipynb_checkpoints
File: Kim_Kardashian.jpg
File: Kim_Kardashian2.jpg
File: Kim_Kardashian1.jpg
File: Kim_Kardashian-checkpoint.jpg


In [7]:
import matplotlib.pyplot as plt



In [8]:
input_images_path = os.path.join(path, 'Faces')  # Folder containing input images (e.g., Person1.jpg, Person2.jpg)
cropped_images_path = os.path.join(path, 'Images_crop')  # Folder to save cropped images

# Create the 'Images_crop' directory if it doesn't exist
os.makedirs(cropped_images_path, exist_ok=True)

# Verify paths
print(f"Input Images Path: {input_images_path}")
print(f"Cropped Images Path: {cropped_images_path}")


Input Images Path: /Users/viktoriiadrozdovska/face_rec_images/Faces
Cropped Images Path: /Users/viktoriiadrozdovska/face_rec_images/Images_crop


In [9]:
# Get a list of all images in the input folder
image_files = [f for f in os.listdir(input_images_path) if f.endswith(('.jpg', '.jpeg', '.png'))]

# Check the number of images to be processed
print(f"Number of images found: {len(image_files)}")

# Display the image file names
print("Images found for processing:", image_files)


Number of images found: 0
Images found for processing: []


In [10]:
!pip install opencv-python numpy requests


Collecting opencv-python
  Using cached opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Using cached opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl (54.8 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.10.0.84


In [11]:
import os
import cv2
import numpy as np
import requests

# Define Paths
input_images_path = '/Users/viktoriiadrozdovska/face_rec_images/Faces'
cropped_images_path = '/Users/viktoriiadrozdovska/face_rec_images/Images_crop'
cascade_filename = 'haarcascade_frontalface_default.xml'
cascade_url = 'https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml'
cascade_path = os.path.join(cropped_images_path, cascade_filename)  # Save cascade in output directory

def download_haar_cascade(url, save_path):
    """
    Downloads the Haar Cascade XML file from the specified URL.
    """
    try:
        print(f"Downloading Haar Cascade classifier from {url}...")
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            os.makedirs(os.path.dirname(save_path), exist_ok=True)  # Ensure the directory exists
            with open(save_path, 'wb') as file:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:
                        file.write(chunk)
            print(f"Downloaded Haar Cascade classifier and saved to '{save_path}'.")
        else:
            print(f"Failed to download Haar Cascade classifier. Status code: {response.status_code}")
            exit(1)
    except Exception as e:
        print(f"An error occurred while downloading Haar Cascade classifier: {e}")
        exit(1)

# Check if Haar Cascade file exists; if not, download it
if not os.path.exists(cascade_path):
    download_haar_cascade(cascade_url, cascade_path)
else:
    print(f"Haar Cascade classifier found at '{cascade_path}'.")

# Initialize Haar Cascade face detector
face_cascade = cv2.CascadeClassifier(cascade_path)
if face_cascade.empty():
    print("Error loading Haar Cascade classifier. Check the cascade_path.")
    exit(1)
else:
    print("Haar Cascade face detector initialized successfully.")

# Supported image extensions
supported_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.tiff')

# Iterate over each person's folder in the input directory
for person_name in os.listdir(input_images_path):
    person_dir = os.path.join(input_images_path, person_name)
    if not os.path.isdir(person_dir):
        continue  # Skip files, only process directories

    # Iterate over each image for the person
    for filename in os.listdir(person_dir):
        if not filename.lower().endswith(supported_extensions):
            continue  # Skip unsupported file formats

        file_path = os.path.join(person_dir, filename)
        img = cv2.imread(file_path)

        if img is None:
            print(f"Failed to read '{file_path}'. Skipping.")
            continue

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Detect faces
        faces = face_cascade.detectMultiScale(
            gray,
            scaleFactor=1.8,
            minNeighbors=6,
            minSize=(60, 60)
        )

        if len(faces) == 1:
            (x, y, w, h) = faces[0]
            cropped_face = img[y:y+h, x:x+w]

            if cropped_face.size == 0:
                print(f"Invalid crop for '{file_path}'. Skipping.")
                continue

            # Create person's directory in cropped_images_path
            person_cropped_dir = os.path.join(cropped_images_path, person_name)
            os.makedirs(person_cropped_dir, exist_ok=True)

            # Save the cropped face
            cropped_filename = f"{os.path.splitext(filename)[0]}_cropped.jpg"
            cropped_path = os.path.join(person_cropped_dir, cropped_filename)
            success = cv2.imwrite(cropped_path, cropped_face)
            if success:
                print(f"Cropped face saved for '{filename}' at '{cropped_path}'.")
            else:
                print(f"Failed to save cropped face for '{filename}'.")
        elif len(faces) > 1:
            print(f"Multiple faces detected in '{file_path}'. Skipping.")
        else:
            print(f"No face detected in '{file_path}'. Skipping.")


Downloading Haar Cascade classifier from https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml...
Downloaded Haar Cascade classifier and saved to '/Users/viktoriiadrozdovska/face_rec_images/Images_crop/haarcascade_frontalface_default.xml'.
Haar Cascade face detector initialized successfully.
Cropped face saved for 'Jim_Carrey.jpg' at '/Users/viktoriiadrozdovska/face_rec_images/Images_crop/Jim_Carrey/Jim_Carrey_cropped.jpg'.
Cropped face saved for 'Jim_Carrey2.jpg' at '/Users/viktoriiadrozdovska/face_rec_images/Images_crop/Jim_Carrey/Jim_Carrey2_cropped.jpg'.
Cropped face saved for 'Jim_Carrey1.jpg' at '/Users/viktoriiadrozdovska/face_rec_images/Images_crop/Jim_Carrey/Jim_Carrey1_cropped.jpg'.
Cropped face saved for 'Viktoriia_Drozdovska2.jpg' at '/Users/viktoriiadrozdovska/face_rec_images/Images_crop/Viktoriia Drozdovska/Viktoriia_Drozdovska2_cropped.jpg'.
Cropped face saved for 'Viktoriia_Drozdovska1.jpg' at '/Users/viktoriiadrozd

In [12]:
import os
import shutil
import random

# Define Paths
cropped_images_path = '/Users/viktoriiadrozdovska/face_rec_images/Images_crop'
train_dir = '/Users/viktoriiadrozdovska/face_rec_images/train'
test_dir = '/Users/viktoriiadrozdovska/face_rec_images/test'
split_ratio = 0.8  # 80% training, 20% testing

# Create train and test directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Iterate over each person's folder
for person_name in os.listdir(cropped_images_path):
    person_cropped_dir = os.path.join(cropped_images_path, person_name)
    if not os.path.isdir(person_cropped_dir):
        continue  # Skip if not a directory

    images = [f for f in os.listdir(person_cropped_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))]
    random.shuffle(images)  # Shuffle to ensure randomness

    split_index = int(len(images) * split_ratio)
    train_images = images[:split_index]
    test_images = images[split_index:]

    # Create person's train and test directories
    person_train_dir = os.path.join(train_dir, person_name)
    person_test_dir = os.path.join(test_dir, person_name)
    os.makedirs(person_train_dir, exist_ok=True)
    os.makedirs(person_test_dir, exist_ok=True)

    # Copy training images
    for img in train_images:
        src = os.path.join(person_cropped_dir, img)
        dst = os.path.join(person_train_dir, img)
        shutil.copyfile(src, dst)

    # Copy testing images
    for img in test_images:
        src = os.path.join(person_cropped_dir, img)
        dst = os.path.join(person_test_dir, img)
        shutil.copyfile(src, dst)

    print(f"Person '{person_name}': {len(train_images)} training and {len(test_images)} testing images.")


Person 'Jim_Carrey': 2 training and 1 testing images.
Person 'Viktoriia Drozdovska': 2 training and 1 testing images.
Person 'Kim_Kardashian': 2 training and 1 testing images.


In [13]:
!pip install face_recognition




Collecting face_recognition
  Using cached face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Using cached face_recognition_models-0.3.0-py2.py3-none-any.whl
Using cached face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Installing collected packages: face-recognition-models, face_recognition
Successfully installed face-recognition-models-0.3.0 face_recognition-1.3.0


In [14]:
import os
import face_recognition
import pickle

# Paths
train_dir = '/Users/viktoriiadrozdovska/face_rec_images/train'
model_save_path = '/Users/viktoriiadrozdovska/face_rec_images/face_encodings.pkl'

# Initialize lists
known_face_encodings = []
known_face_names = []

for person_name in os.listdir(train_dir):
    person_train_dir = os.path.join(train_dir, person_name)
    if not os.path.isdir(person_train_dir):
        continue

    for img_name in os.listdir(person_train_dir):
        img_path = os.path.join(person_train_dir, img_name)
        image = face_recognition.load_image_file(img_path)
        encodings = face_recognition.face_encodings(image)

        if len(encodings) == 0:
            print(f"No faces found in '{img_path}'. Skipping.")
            continue

        known_face_encodings.append(encodings[0])
        known_face_names.append(person_name)

# Save encodings to a file
data = {"encodings": known_face_encodings, "names": known_face_names}
with open(model_save_path, "wb") as f:
    pickle.dump(data, f)

print("Face encodings saved successfully.")


Face encodings saved successfully.


In [16]:
print("per_person_accuracy:", per_person_accuracy)
print("Type of per_person_accuracy:", type(per_person_accuracy))
print("List of items:", list(per_person_accuracy.items()))
print("Type of list(per_person_accuracy.items()):", type(list(per_person_accuracy.items())))
for item in list(per_person_accuracy.items()):
    print("Item:", item, "Type:", type(item))
    for element in item:
        print(" - Element:", element, "Type:", type(element))


per_person_accuracy: {'Jim_Carrey': 100.0, 'Viktoriia Drozdovska': 100.0, 'Kim_Kardashian': 100.0}
Type of per_person_accuracy: <class 'dict'>
List of items: [('Jim_Carrey', 100.0), ('Viktoriia Drozdovska', 100.0), ('Kim_Kardashian', 100.0)]
Type of list(per_person_accuracy.items()): <class 'list'>
Item: ('Jim_Carrey', 100.0) Type: <class 'tuple'>
 - Element: Jim_Carrey Type: <class 'str'>
 - Element: 100.0 Type: <class 'float'>
Item: ('Viktoriia Drozdovska', 100.0) Type: <class 'tuple'>
 - Element: Viktoriia Drozdovska Type: <class 'str'>
 - Element: 100.0 Type: <class 'float'>
Item: ('Kim_Kardashian', 100.0) Type: <class 'tuple'>
 - Element: Kim_Kardashian Type: <class 'str'>
 - Element: 100.0 Type: <class 'float'>


In [36]:
import os
import face_recognition
import pickle
import matplotlib.pyplot as plt
from collections import defaultdict
import numpy as np
import pandas as pd
import cv2  # Import OpenCV

# Paths
test_dir = '/Users/viktoriiadrozdovska/face_rec_images/test'
model_load_path = '/Users/viktoriiadrozdovska/face_rec_images/face_encodings.pkl'
output_dir = '/Users/viktoriiadrozdovska/face_rec_images/test_annotated'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Load known faces and encodings
with open(model_load_path, "rb") as f:
    data = pickle.load(f)

known_face_encodings = data["encodings"]
known_face_names = data["names"]

# Initialize variables for accuracy calculation
correct = 0
total = 0

# Initialize per-person tracking
per_person_correct = defaultdict(int)
per_person_total = defaultdict(int)
misrecognized = []

# Iterate through each person in the test directory
for person_name in os.listdir(test_dir):
    person_test_dir = os.path.join(test_dir, person_name)
    if not os.path.isdir(person_test_dir):
        print(f"Skipping '{person_test_dir}': Not a directory.\n")
        continue  # Skip if not a directory

    # Iterate through each image of the person
    for img_name in os.listdir(person_test_dir):
        img_path = os.path.join(person_test_dir, img_name)
        print(f"Processing image: {img_path}")

        # Load the image using face_recognition
        try:
            image = face_recognition.load_image_file(img_path)
        except Exception as e:
            print(f"Error loading '{img_path}': {e}. Skipping.\n")
            continue

        # Verify that 'image' is a numpy array
        if not isinstance(image, np.ndarray):
            print(f"Loaded image is not a NumPy array for '{img_path}'. Skipping.\n")
            continue

        # Check image dimensions
        if image.ndim != 3 or image.shape[2] != 3:
            print(f"Unexpected image shape {image.shape} for '{img_path}'. Skipping.\n")
            continue

        # Detect face locations and encodings
        face_locations = face_recognition.face_locations(image)
        face_encodings = face_recognition.face_encodings(image, face_locations)

        # If no faces are found, skip the image
        if len(face_encodings) == 0:
            print(f"No faces found in '{img_path}'. Skipping.\n")
            continue

        # Convert the image to BGR color (which OpenCV uses)
        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Iterate through each face found in the image
        for (top, right, bottom, left), encoding in zip(face_locations, face_encodings):
            matches = face_recognition.compare_faces(known_face_encodings, encoding, tolerance=0.6)
            name = "Unknown"

            # Use the first match found
            if True in matches:
                first_match_index = matches.index(True)
                name = known_face_names[first_match_index]

            # Update counts
            per_person_total[person_name] += 1
            total += 1
            if name == person_name:
                correct += 1
                per_person_correct[person_name] += 1
            else:
                misrecognized.append({
                    'actual': person_name,
                    'recognized': name,
                    'image_path': img_path
                })

            # Print detailed per-face recognition result
            print(f"Detected: {name}")
            print(f"Actual: {person_name}")
            recognition_status = "Correct" if name == person_name else "Incorrect"
            print(f"Recognition Status: {recognition_status}\n")

            # Annotate the image with bounding box and label
            # Draw a box around the face
            cv2.rectangle(image_bgr, (left, top), (right, bottom), (0, 255, 0), 2)

            # Prepare the label with name
            label = name
            # Draw a filled rectangle below the face for the label background
            cv2.rectangle(image_bgr, (left, bottom - 35), (right, bottom), (0, 255, 0), cv2.FILLED)
            # Choose a font and put the text
            font = cv2.FONT_HERSHEY_DUPLEX
            cv2.putText(image_bgr, label, (left + 6, bottom - 6), font, 0.8, (255, 255, 255), 1)

        # Save the annotated image
        annotated_img_path = os.path.join(output_dir, f"annotated_{img_name}")
        cv2.imwrite(annotated_img_path, image_bgr)
        print(f"Annotated image saved to '{annotated_img_path}'.\n")

# Calculate overall accuracy
if total > 0:
    overall_accuracy = (correct / total) * 100
    print(f"\n--- Overall Accuracy ---")
    print(f"Total Faces Processed: {total}")
    print(f"Correct Recognitions: {correct}")
    print(f"Accuracy: {overall_accuracy:.2f}%")
else:
    print("No faces were processed.")

# Calculate per-person accuracy
print(f"\n--- Per-Person Accuracy ---")
per_person_accuracy = {}
for person in per_person_total:
    accuracy = (per_person_correct[person] / per_person_total[person]) * 100 if per_person_total[person] > 0 else 0
    per_person_accuracy[person] = accuracy
    print(f"{person}: {accuracy:.2f}% ({per_person_correct[person]}/{per_person_total[person]})")

# Debugging: Print per_person_accuracy, persons, and accuracies
print(f"\nPer-Person Accuracy Data: {per_person_accuracy}")

# Print misrecognized images details
if misrecognized:
    print(f"\n--- Misrecognized Images ---")
    for item in misrecognized:
        print(f"Image: {item['image_path']}")
        print(f"Actual: {item['actual']}, Recognized: {item['recognized']}\n")
else:
    print("\nNo misrecognized images.")


Processing image: /Users/viktoriiadrozdovska/face_rec_images/test/Jim_Carrey/Jim_Carrey_cropped.jpg
Detected: Jim_Carrey
Actual: Jim_Carrey
Recognition Status: Correct

Annotated image saved to '/Users/viktoriiadrozdovska/face_rec_images/test_annotated/annotated_Jim_Carrey_cropped.jpg'.

Processing image: /Users/viktoriiadrozdovska/face_rec_images/test/Viktoriia Drozdovska/Viktoriia_Drozdovska_cropped.jpg
Detected: Viktoriia Drozdovska
Actual: Viktoriia Drozdovska
Recognition Status: Correct

Annotated image saved to '/Users/viktoriiadrozdovska/face_rec_images/test_annotated/annotated_Viktoriia_Drozdovska_cropped.jpg'.

Processing image: /Users/viktoriiadrozdovska/face_rec_images/test/Kim_Kardashian/Kim_Kardashian_cropped.jpg
Detected: Kim_Kardashian
Actual: Kim_Kardashian
Recognition Status: Correct

Annotated image saved to '/Users/viktoriiadrozdovska/face_rec_images/test_annotated/annotated_Kim_Kardashian_cropped.jpg'.


--- Overall Accuracy ---
Total Faces Processed: 3
Correct Reco