In [2]:
import cv2
import os
import numpy as np

# Load Haar cascade classifiers
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')

# Directory setup
input_dir = 'training_images'  # Your folder
output_dir = 'cropped_training_faces'
os.makedirs(output_dir, exist_ok=True)

def process_image(image_path, output_path):
    # Read the image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Failed to load: {image_path}")
        return
    
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Detect faces
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    for i, (x, y, w, h) in enumerate(faces):
        # Crop face region
        face_roi = img[y:y+h, x:x+w]
        
        # Detect eyes within face (optional validation)
        gray_face = gray[y:y+h, x:x+w]
        eyes = eye_cascade.detectMultiScale(gray_face)
        
        # Save only if at least one eye is detected
        if len(eyes) > 0:
            cv2.imwrite(output_path, face_roi)
            print(f"Saved: {output_path}")

# Process all subfolders in training_images
for label in os.listdir(input_dir):
    label_input_dir = os.path.join(input_dir, label)
    label_output_dir = os.path.join(output_dir, label)
    os.makedirs(label_output_dir, exist_ok=True)
    
    if os.path.isdir(label_input_dir):
        for filename in os.listdir(label_input_dir):
            if filename.endswith(('.jpg', '.png')):
                input_path = os.path.join(label_input_dir, filename)
                output_path = os.path.join(label_output_dir, f'face_{filename}')
                process_image(input_path, output_path)

Saved: cropped_training_faces\Barack\face_download (2).jpg
Saved: cropped_training_faces\Barack\face_download (5).jpg
Saved: cropped_training_faces\Barack\face_images.jpg
Saved: cropped_training_faces\Bond\face_download (2).jpg
Saved: cropped_training_faces\Bond\face_images (10).jpg
Saved: cropped_training_faces\Bond\face_images (9).jpg
Saved: cropped_training_faces\Lesnar\face_images (12).jpg
Saved: cropped_training_faces\Lesnar\face_images (4).jpg
Saved: cropped_training_faces\Murphy\face_download (1).jpg
Saved: cropped_training_faces\Murphy\face_download (10).jpg
Saved: cropped_training_faces\Murphy\face_download (12).jpg
Saved: cropped_training_faces\Murphy\face_download (13).jpg
Saved: cropped_training_faces\Murphy\face_download (2).jpg
Saved: cropped_training_faces\Murphy\face_download (3).jpg
Saved: cropped_training_faces\Murphy\face_download (4).jpg
Saved: cropped_training_faces\Murphy\face_download (5).jpg
Saved: cropped_training_faces\Murphy\face_download (6).jpg
Saved: cropp

In [4]:
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import cv2
import os
import joblib

# Feature extraction function
def extract_features(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return None
    img = cv2.resize(img, (64, 64))  # Standardize size
    features, _ = hog(img, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)
    return features

# Load data from cropped_training_faces
data = []
labels = []
base_dir = 'cropped_training_faces'

for label in os.listdir(base_dir):
    label_dir = os.path.join(base_dir, label)
    if os.path.isdir(label_dir):
        for filename in os.listdir(label_dir):
            if filename.endswith('.jpg'):
                features = extract_features(os.path.join(label_dir, filename))
                if features is not None:
                    data.append(features)
                    labels.append(label)

data = np.array(data)
labels = np.array(labels)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Train SVM model
model = SVC(kernel='linear', probability=True)  # Probability=True for confidence scores
model.fit(X_train, y_train)

# Test the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy * 100:.2f}%")

# Save the model
joblib.dump(model, 'face_recognition_model.pkl')

Model accuracy: 80.00%


['face_recognition_model.pkl']

In [7]:
def predict_face(image_path, model):
    features = extract_features(image_path)
    if features is None:
        return "Error processing image"
    prediction = model.predict([features])[0]
    confidence = model.predict_proba([features])[0].max() * 100
    return f"{prediction} ({confidence:.2f}% confidence)"

# Test on a new image
test_image = './cropped_training_faces/Murphy/face_download (2).jpg'  # Replace with your test image path
model = joblib.load('face_recognition_model.pkl')
result = predict_face(test_image, model)
print(f"Prediction: {result}")

Prediction: Murphy (87.55% confidence)
