In [None]:
# Linear Discriminant Analysis on Face Embeddings (PyTorch/facelib implementation)
# Complete implementation with dataset preparation, embedding extraction, LDA training and evaluation

import numpy as np
import os
import matplotlib.pyplot as plt
import torch
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import cv2
from tqdm import tqdm
from PIL import Image
from torchvision import transforms

# Import facelib modules
from facelib.detection import RetinaFace
from facelib.recognition import ArcFace

In [None]:

def train_lda_model(X_train, y_train, n_components=None):
    """
    Train LDA model on face embeddings
    
    Args:
        X_train: Training embeddings
        y_train: Training labels
        n_components: Number of components for LDA (default: min(n_features, n_classes-1))
        
    Returns:
        lda: Trained LDA model
        scaler: Fitted StandardScaler
    """
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    
    # Train LDA model
    lda = LinearDiscriminantAnalysis(n_components=n_components)
    lda.fit(X_train_scaled, y_train)
    
    return lda, scaler

def evaluate_model(lda, scaler, X_test, y_test, person_names=None):
    """
    Evaluate LDA model on test data
    
    Args:
        lda: Trained LDA model
        scaler: Fitted StandardScaler
        X_test: Test embeddings
        y_test: Test labels
        person_names: List of person names corresponding to label indices
        
    Returns:
        accuracy: Classification accuracy
        report: Classification report
    """
    # Standardize test features
    X_test_scaled = scaler.transform(X_test)
    
    # Predict
    y_pred = lda.predict(X_test_scaled)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    # Generate report with proper target names if available
    target_names = person_names if person_names else None
    report = classification_report(y_test, y_pred, target_names=target_names)
    
    # Create confusion matrix if needed (optional)
    # cm = confusion_matrix(y_test, y_pred)
    
    return accuracy, report

def visualize_lda_projection(lda, scaler, X, y, title="LDA Projection of Face Embeddings", person_names=None):
    """
    Visualize LDA projection (for 2 components)
    
    Args:
        lda: Trained LDA model
        scaler: Fitted StandardScaler
        X: Face embeddings
        y: Labels
        title: Plot title
        person_names: Optional list of person names to use in legend
    """
    # Standardize features
    X_scaled = scaler.transform(X)
    
    # Transform data to LDA space (first 2 components)
    X_lda = lda.transform(X_scaled)
    
    # Plot first two LDA components
    plt.figure(figsize=(12, 8))
    
    # Get unique classes
    unique_classes = np.unique(y)
    colors = plt.cm.rainbow(np.linspace(0, 1, len(unique_classes)))
    
    for i, class_id in enumerate(unique_classes):
        # Get name for legend
        if person_names and len(person_names) > class_id:
            label = person_names[class_id]
        else:
            label = f"Person {class_id}"
            
        plt.scatter(X_lda[y == class_id, 0], 
                   X_lda[y == class_id, 1],
                   alpha=0.7,
                   color=colors[i],
                   label=label)
    
    plt.xlabel("LD 1")
    plt.ylabel("LD 2")
    plt.title(title)
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.savefig('lda_projection.png')  # Save the visualization
    plt.show()

def save_model(lda, scaler, output_dir="models"):
    """
    Save the trained LDA model and scaler
    
    Args:
        lda: Trained LDA model
        scaler: Fitted StandardScaler
        output_dir: Directory to save the models
    """
    import pickle
    import os
    
    # Create directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Save LDA model
    with open(os.path.join(output_dir, 'lda_model.pkl'), 'wb') as f:
        pickle.dump(lda, f)
    
    # Save scaler
    with open(os.path.join(output_dir, 'scaler.pkl'), 'wb') as f:
        pickle.dump(scaler, f)
    
    print(f"Models saved to {output_dir}")

def load_model(model_dir="models"):
    """
    Load the trained LDA model and scaler
    
    Args:
        model_dir: Directory where the models are saved
    
    Returns:
        lda: Trained LDA model
        scaler: Fitted StandardScaler
    """
    import pickle
    
    # Load LDA model
    with open(os.path.join(model_dir, 'lda_model.pkl'), 'rb') as f:
        lda = pickle.load(f)
    
    # Load scaler
    with open(os.path.join(model_dir, 'scaler.pkl'), 'rb') as f:
        scaler = pickle.load(f)
    
    return lda, scaler



In [None]:
# Set your dataset path here
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load dataset (adjust max_samples_per_class as needed)
images, labels, person_names = load_face_dataset(dataset_path, max_samples_per_class=20)

if len(images) == 0:
    print("No images loaded. Please check your dataset path.")
    return

# Extract face embeddings using facelib
print("Extracting face embeddings with facelib...")
embeddings, valid_indices = extract_face_embeddings_facelib(images, device=device)

if len(embeddings) == 0:
    print("No faces detected in the dataset. Please check your images.")
    return

# Get valid labels
valid_labels = np.array(labels)[valid_indices]

print(f"Extracted {len(embeddings)} face embeddings with dimension {embeddings.shape[1]}")

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    embeddings, valid_labels, test_size=0.2, random_state=42, stratify=valid_labels
)

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

# Train LDA model
print("Training LDA model...")
n_components = min(embeddings.shape[1], len(np.unique(valid_labels)) - 1)
lda, scaler = train_lda_model(X_train, y_train, n_components=n_components)

# Evaluate model
accuracy, report = evaluate_model(lda, scaler, X_test, y_test, person_names)

print(f"LDA Model Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)

# Print explained variance ratio
if hasattr(lda, 'explained_variance_ratio_'):
    print("Explained variance ratio by components:")
    print(lda.explained_variance_ratio_)
    print(f"Total variance explained: {np.sum(lda.explained_variance_ratio_):.4f}")

# Save the trained model
save_model(lda, scaler)

# Visualize results (if we have at least 2 components)
if n_components >= 2:
    print("Visualizing LDA projection...")
    visualize_lda_projection(lda, scaler, embeddings, valid_labels, person_names=person_names)
