In [1]:
import numpy as np
import tensorflow as tf
import os
import cv2
from sklearn.model_selection import train_test_split

In [2]:
img_folder = 'archive/lfw-deepfunneled/lfw-deepfunneled'

X = []
Y = []
labels = []

# Create haar cascade model
haar_cascade = cv2.CascadeClassifier('haar_face.xml')

In [3]:
name = []
def count_single_image(img_folder):
    count = 0
    
    # Loop through each folder in the img_folder
    for subdir in os.listdir(img_folder):
        subdir_path = os.path.join(img_folder, subdir)
        
        # Check if it's a directory
        if os.path.isdir(subdir_path):
            # Count how many image files are in the folder
            image_files = [f for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))]
            
            # Check if there's exactly one image file
            if len(image_files) == 1:
                count += 1
                name.append(subdir)
    
    return count
    # Call the function and print the result
count = count_single_image(img_folder)
print(f"Number of folders with exactly one image: {count}")
for name in name[:10]:
    print(f"Name of the first 10 folders with single image: {name}")

Number of folders with exactly one image: 4069
Name of the first 10 folders with single image: Aaron_Eckhart
Name of the first 10 folders with single image: Aaron_Guiel
Name of the first 10 folders with single image: Aaron_Patterson
Name of the first 10 folders with single image: Aaron_Pena
Name of the first 10 folders with single image: Aaron_Tippin
Name of the first 10 folders with single image: Abbas_Kiarostami
Name of the first 10 folders with single image: Abba_Eban
Name of the first 10 folders with single image: Abdel_Aziz_Al-Hakim
Name of the first 10 folders with single image: Abdel_Madi_Shabneh
Name of the first 10 folders with single image: Abdulaziz_Kamilov


In [4]:
def restrained_cpu():
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

In [5]:
def createLabels():
    for subdir in os.listdir(img_folder):
        subdir_path = os.path.join(img_folder, subdir)

        # Only process directories
        if os.path.isdir(subdir_path):
            image_files = [f for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))]
            
            # Check if there's exactly one image file
            if len(image_files) > 1:
                labels.append(subdir)

In [6]:
def importImages_Labels():
    for label in labels:
        path = os.path.join(img_folder, label)
        # Loop through each image in the sub-folder
        for image_name in os.listdir(path):
            image_path = os.path.join(path, image_name)

            # Load the image using OpenCV
            image = cv2.imread(image_path)

            if image is not None:
                # # Convert image to grayscale
                # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                #
                # # detect face in the image (increase image size 10% and minimal neighbors = 3)
                # faces_rect = haar_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=3)
                #
                # for (x, y, w, h) in faces_rect:
                #     faces_roi = gray[y:y + h, x:x + w]
                #     X.append(faces_roi)
                #     Y.append(labels.index(label))
                X.append(image)
                Y.append(labels.index(label))

            else:
                print(f"Warning: Could not load image {image_path}")

    print("Importing images and labels completed!")

In [7]:
def train_model(X_train, Y_train):
    model = cv2.face.LBPHFaceRecognizer_create()
    model.train(X_train, Y_train)
    print("Model training completed!")
    return model

In [8]:
def test_model(model, X_test, Y_test):
    # Initialize lists to hold the results
    predictions = []
    correct_labels = []
    loop = len(X_test)
    count = 0
    # Loop through each of the first 500 images
    for i in range(loop):
        test_image = X_test[i]
        
        gray = cv2.cvtColor(test_image, cv2.COLOR_BGR2GRAY)
        
        faces_rect = haar_cascade.detectMultiScale(gray, 1.3, 4)
        
        for (x, y, w, h) in faces_rect:
            faces_roi = gray[y:y + h, x:x + w]
            label, confidence = model.predict(faces_roi)
        
        if labels[label] == labels[Y_test[i]]:
            count += 1
            
        # Print the prediction and actual label
        correct_percent = count / loop * 100
        print(f'{i} / {loop} ({correct_percent}) Predicted Label: {labels[label]}, Confidence: {confidence}, Actual Label: {labels[Y_test[i]]}')

        # Check if the prediction is correct
        # if labels[label] == labels[Y_test[i]]:
        #     img = X_test[i]  # The original image in color or grayscale
        # 
        #     # Annotate the image with the predicted label
        #     cv2.putText(gray, str(labels[label]), (20, 20), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), thickness=2)
        #     cv2.rectangle(gray, (0, 0), (250, 250), (0, 255, 0), 2)
        #     cv2.imshow('Detected Face', gray)
        # 
        #     cv2.waitKey(0)
        #     cv2.destroyAllWindows()

        # Append the predicted label and the true label to the lists
        predictions.append(label)
        correct_labels.append(Y_test[i])

    # Calculate accuracy
    correct_predictions = sum([1 for p, c in zip(predictions, correct_labels) if p == c])
    accuracy = correct_predictions / len(correct_labels) * 100

    # Print results
    print(f"Total test images: {len(correct_labels)}")
    print(f"Correct predictions: {correct_predictions}")
    print(f"Accuracy: {accuracy:.2f}%")

In [9]:
restrained_cpu()

In [10]:
createLabels()
importImages_Labels()

Importing images and labels completed!


In [11]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

In [12]:
X_train_gray_rect = []
Y_train_gray_rect = []

for idx, img in enumerate(X_train):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces_rect = haar_cascade.detectMultiScale(gray, 1.3, 4)
    for (x, y, w, h) in faces_rect:
        faces_roi = gray[y:y + h, x:x + w]
        X_train_gray_rect.append(faces_roi)
        Y_train_gray_rect.append(Y_train[idx])
        

In [13]:
X_train_np = np.array(X_train_gray_rect, dtype='object')
Y_train_np = np.array(Y_train_gray_rect)

trained_model = train_model(X_train_np, Y_train_np)

Model training completed!


In [14]:
test_model(trained_model, X_test, Y_test)

0 / 2291 (0.043649061545176775) Predicted Label: Atal_Bihari_Vajpayee, Confidence: 65.41155728222422, Actual Label: Atal_Bihari_Vajpayee
1 / 2291 (0.043649061545176775) Predicted Label: Gerhard_Schroeder, Confidence: 78.73161897120718, Actual Label: David_Trimble
2 / 2291 (0.08729812309035355) Predicted Label: Donald_Rumsfeld, Confidence: 70.0177409905207, Actual Label: Donald_Rumsfeld
3 / 2291 (0.08729812309035355) Predicted Label: Donald_Rumsfeld, Confidence: 70.0177409905207, Actual Label: Cherie_Blair
4 / 2291 (0.08729812309035355) Predicted Label: Pascal_Lamy, Confidence: 67.5839999712264, Actual Label: John_Negroponte
5 / 2291 (0.08729812309035355) Predicted Label: Gerhard_Schroeder, Confidence: 69.45371052144826, Actual Label: Anthony_Hopkins
6 / 2291 (0.13094718463553034) Predicted Label: Jose_Maria_Aznar, Confidence: 68.38419208918782, Actual Label: Jose_Maria_Aznar
7 / 2291 (0.13094718463553034) Predicted Label: Stockard_Channing, Confidence: 82.98669577798573, Actual Label: 