Testing the celeb recognition package

https://pypi.org/project/celeb-detector/


https://www.kaggle.com/code/vinayakshanawad/celebrity-face-recognition-vggface-model/notebook 

In [12]:
import os
import random
import shutil

# Define paths for source, training, and test directories
source_dir = "celebrities"
train_dir = "celebrities_train"
test_dir = "celebrities_test"

# Number of images for training and test sets
train_count = 80
test_count = 20

# Step 1: Split celebrity folders into training and test sets with no overlap
def split_data():
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)
    
    # Iterate through each celebrity folder
    for celeb_folder in os.listdir(source_dir):
        celeb_path = os.path.join(source_dir, celeb_folder)
        
        if os.path.isdir(celeb_path):
            # List all image files
            images = [img for img in os.listdir(celeb_path) if img.endswith(('.jpg', '.jpeg'))]
            random.shuffle(images)
            
            # Ensure we have enough images
            if len(images) >= (train_count + test_count):
                # Select the first 80 images for training and the next 20 for testing
                train_images = images[:train_count]
                test_images = images[train_count:train_count + test_count]
                
                # Create directories in train/test folders
                train_celeb_folder = os.path.join(train_dir, celeb_folder)
                test_celeb_folder = os.path.join(test_dir, celeb_folder)
                os.makedirs(train_celeb_folder, exist_ok=True)
                os.makedirs(test_celeb_folder, exist_ok=True)
                
                # Copy images to train and test folders without overlap
                for img in train_images:
                    shutil.copy(os.path.join(celeb_path, img), train_celeb_folder)
                for img in test_images:
                    shutil.copy(os.path.join(celeb_path, img), test_celeb_folder)
            else:
                print(f"Not enough images in {celeb_folder} for splitting.")

# Step 2: Encode faces in the training set
def encode_training_faces():
    encodings = {}
    
    for celeb_folder in os.listdir(train_dir):
        celeb_path = os.path.join(train_dir, celeb_folder)
        if os.path.isdir(celeb_path):
            celeb_encodings = []
            
            # Process each image
            for img_name in os.listdir(celeb_path):
                img_path = os.path.join(celeb_path, img_name)
                img = face_recognition.load_image_file(img_path)
                
                # Get encodings (assuming one face per image)
                encoding = face_recognition.face_encodings(img)
                if encoding:
                    celeb_encodings.append(encoding[0])
            
            # Save encodings for each celebrity
            if celeb_encodings:
                encodings[celeb_folder] = np.mean(celeb_encodings, axis=0)
    
    return encodings


In [13]:

# Step 3: Identify a random celebrity photo from the test set
def identify_random_test_image(encodings):
    celeb_folder = random.choice(os.listdir(test_dir))
    celeb_path = os.path.join(test_dir, celeb_folder)
    test_image = random.choice(os.listdir(celeb_path))
    test_image_path = os.path.join(celeb_path, test_image)
    
    # Load and encode test image
    test_img = face_recognition.load_image_file(test_image_path)
    test_encoding = face_recognition.face_encodings(test_img)
    
    if test_encoding:
        test_encoding = test_encoding[0]
        
        # Compare with known encodings
        results = face_recognition.compare_faces(
            list(encodings.values()), test_encoding, tolerance=0.6
        )
        
        # Find match
        if True in results:
            match_index = results.index(True)
            matched_celebrity = list(encodings.keys())[match_index]
            print(f"Identified as: {matched_celebrity} from test image: {test_image_path}")
        else:
            print(f"No match found for {test_image_path}.")
    else:
        print(f"No face detected in test image {test_image_path}")

# Run steps
split_data()
celebrity_encodings = encode_training_faces()


In [14]:
def calculate_accuracy(encodings):
    total_images = 0
    correct_identifications = 0
    
    # Iterate through each celebrity folder in the test set
    for celeb_folder in os.listdir(test_dir):
        celeb_path = os.path.join(test_dir, celeb_folder)
        
        # Ensure it's a directory
        if os.path.isdir(celeb_path):
            # Process each image in the folder
            for test_image in os.listdir(celeb_path):
                test_image_path = os.path.join(celeb_path, test_image)
                
                # Load and encode test image
                test_img = face_recognition.load_image_file(test_image_path)
                test_encoding = face_recognition.face_encodings(test_img)
                
                if test_encoding:
                    test_encoding = test_encoding[0]
                    
                    # Compare with known encodings
                    results = face_recognition.compare_faces(
                        list(encodings.values()), test_encoding, tolerance=0.6
                    )
                    
                    # Find match
                    if True in results:
                        match_index = results.index(True)
                        matched_celebrity = list(encodings.keys())[match_index]
                        
                        # Check if the identified celebrity matches the folder name
                        if matched_celebrity == celeb_folder:
                            correct_identifications += 1
                        else:
                            print(f"Incorrect: {test_image_path} identified as {matched_celebrity}")
                    else:
                        print(f"No match found for: {test_image_path}")
                
                # Increment total image count
                total_images += 1
    
    # Calculate accuracy
    accuracy = (correct_identifications / total_images) * 100 if total_images > 0 else 0
    print(f"Accuracy Rate: {accuracy:.2f}%")
    print(f"Total Images: {total_images}, Correct Identifications: {correct_identifications}")

# Run accuracy calculation
calculate_accuracy(celebrity_encodings)


Incorrect: celebrities_test/Pedro-Pascal/73.jpg identified as John-Lennon
Incorrect: celebrities_test/Pedro-Pascal/94.jpg identified as John-Lennon
Incorrect: celebrities_test/Pedro-Pascal/87.jpg identified as John-Lennon
Incorrect: celebrities_test/Margot-Robbie/09.jpeg identified as John-Lennon
Accuracy Rate: 90.61%
Total Images: 330, Correct Identifications: 299
