# LDA Ensemble for Face Recognition

Use the provided face data, and the same data partition into training and testing as in Q1.

Try PCA-LDA and its ensemble learning, along with the NN classifier. Compare and discuss face recognition results.

## PCA-LDA Ensemble

Perform the PCA-LDA based face recognition with the NN classifier. Report and discuss, including:


*   Recognition acuracies by varying the parameter values, M_pca and M_lda
*   Ranks of the scatter matrices
*   Confusion matrix, example of success and failure cases

Explain your observations and reasons, and discuss the results in comparison to those of Q1.



---



1. Import necessary libraries

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as spio
import sklearn.model_selection as ms
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns


ValueError: mount failed

2. Get dataset

In [None]:
face_mat = spio.loadmat('/content/drive/MyDrive/face.mat')
# print(face_mat.keys())
face_data = face_mat['X']
# print(face_data)

face_label = face_mat['l']
# print(face_label)

3. Split data into training set and test set

In [None]:
# Reshape face_label to be a 1D array
face_label = face_label.flatten()
print("Reshaped face_label shape:", face_label.shape)

# Unique identities in the dataset
unique_identities = np.unique(face_label)

# Lists to hold the train and test splits
X_train = []
y_train = []
X_test = []
y_test = []

# Loop over each unique identity
for identity in unique_identities:
    # Get all data and labels where the label matches the current identity
    identity_data = face_data[:, face_label == identity].T
    identity_labels = face_label[face_label == identity]

    # Use train_test_split to split 8 images for training, 2 for testing
    identity_X_train, identity_X_test, identity_y_train, identity_y_test = ms.train_test_split(
        identity_data, identity_labels, train_size=8, test_size=2, random_state=42
    )

    # Append to respective lists
    X_train.append(identity_X_train)
    y_train.append(identity_y_train)
    X_test.append(identity_X_test)
    y_test.append(identity_y_test)

# Concatenate all individual arrays into final train and test datasets
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)
X_test = np.concatenate(X_test)
y_test = np.concatenate(y_test)

# Print the shapes to confirm the split
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


4. Define pca, lda function for M_pca and M_lda



In [None]:
# Step 1: Perform PCA for dimensionality reduction
def apply_pca(X_train, X_test, n_components):
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    return X_train_pca, X_test_pca, pca

# Step 2: Perform LDA for class separability
def apply_lda(X_train, X_test, y_train, n_components):
    lda = LDA(n_components=n_components)
    X_train_lda = lda.fit_transform(X_train, y_train)
    X_test_lda = lda.transform(X_test)
    return X_train_lda, X_test_lda, lda



In [None]:
# Define PCA and LDA parameters
M_pca = 100  # Adjust based on experimentation
M_lda = 51   # Maximum LDA components (C - 1, where C is the number of classes)

# Apply PCA
X_train_pca, X_test_pca, pca = apply_pca(X_train, X_test, M_pca)
print("PCA-reduced X_train shape:", X_train_pca.shape)
print("PCA-reduced X_test shape:", X_test_pca.shape)

# Apply LDA
X_train_lda, X_test_lda, lda = apply_lda(X_train_pca, X_test_pca, y_train, M_lda)
print("LDA-reduced X_train shape:", X_train_lda.shape)
print("LDA-reduced X_test shape:", X_test_lda.shape)

5. Calculate accuracy using NN classifier

In [None]:
# Step 3: Train and test the NN classifier
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train_lda, y_train)
y_pred = knn.predict(X_test_lda)

# Step 4: Evaluate model accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Recognition accuracy:", accuracy)

6. Calculate confusion matrix

In [None]:
# Step 5: Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

7. Calculate rank of scatter matrices
8. Example of success and fail cases

In [None]:
# Step 6: Calculate ranks of scatter matrices in LDA
def calculate_lda_scatter_matrices(X, y):
    n_features = X.shape[1]
    classes = np.unique(y)
    mean_overall = np.mean(X, axis=0)

    # Initialize within-class scatter matrix (Sw) and between-class scatter matrix (Sb)
    Sw = np.zeros((n_features, n_features))
    Sb = np.zeros((n_features, n_features))

    for cls in classes:
        X_c = X[y == cls]
        mean_class = np.mean(X_c, axis=0)

        # Within-class scatter matrix
        Sw += np.dot((X_c - mean_class).T, (X_c - mean_class))

        # Between-class scatter matrix
        n_c = X_c.shape[0]
        mean_diff = (mean_class - mean_overall).reshape(n_features, 1)
        Sb += n_c * mean_diff.dot(mean_diff.T)

    return Sw, Sb

# Calculate scatter matrices and their ranks
Sw, Sb = calculate_lda_scatter_matrices(X_train_pca, y_train)
rank_Sw = np.linalg.matrix_rank(Sw)
rank_Sb = np.linalg.matrix_rank(Sb)
print("Rank of within-class scatter matrix (Sw):", rank_Sw)
print("Rank of between-class scatter matrix (Sb):", rank_Sb)

# Example success and failure cases
success_cases = np.where(y_pred == y_test)[0]
failure_cases = np.where(y_pred != y_test)[0]

print("Example of correct classifications:", success_cases[:])
print("Example of incorrect classifications:", failure_cases[:])

## PCA-LDA Ensemble

Show, measure and discuss the results, including:

* Randomisation in features space
* Randomisation on data samples (i.e. bagging)
* Number of base models, the randomness parameter
* Error of the committee machine vs. Average error of individual models
* Fusion rules
* Recognition accuracy and confusion matrix

Observe and discuss the above by varying the parameter values/architectures you used. Give insights and reasons behind all your answers.

---

1. Import necessary libraries

In [None]:
# Import required libraries
import numpy as np
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

2. Create randomized models to calculate average error and accuracy of individual models
 - randomized features
 - randomized samples (i.e. bagging)
 - set number of models, randomness parameter

In [None]:
# Ensemble parameters
num_models = 10                # Number of base models
randomness_param = 0.7         # Randomness parameter (fraction of features and samples)

# Lists to hold results
individual_accuracies = []
individual_predictions = []

# Step 1: Define function for randomized feature space (PCA) and bagging (random data samples)
def create_randomized_model(X_train, y_train, X_test, y_test, randomness_param, M_pca, M_lda):
    # Randomly sample features for PCA
    num_features = int(X_train.shape[1] * randomness_param)
    selected_features = np.random.choice(X_train.shape[1], num_features, replace=False)

    # Randomly sample data points for bagging
    num_samples = int(X_train.shape[0] * randomness_param)
    sample_indices = np.random.choice(X_train.shape[0], num_samples, replace=True)

    X_train_sample = X_train[sample_indices][:, selected_features]
    y_train_sample = y_train[sample_indices]
    X_test_sample = X_test[:, selected_features]

    # Apply PCA and LDA
    pca = PCA(n_components=min(M_pca, num_features))
    X_train_pca = pca.fit_transform(X_train_sample)
    X_test_pca = pca.transform(X_test_sample)

    lda = LDA(n_components=min(M_lda, len(np.unique(y_train_sample)) - 1))
    X_train_lda = lda.fit_transform(X_train_pca, y_train_sample)
    X_test_lda = lda.transform(X_test_pca)

    # Train NN classifier
    knn = KNeighborsClassifier(n_neighbors=1)
    knn.fit(X_train_lda, y_train_sample)
    y_pred = knn.predict(X_test_lda)

    accuracy = accuracy_score(y_test, y_pred)
    return y_pred, accuracy

# Step 2: Train ensemble of PCA-LDA models with randomization
for i in range(num_models):
    y_pred, accuracy = create_randomized_model(X_train, y_train, X_test, y_test, randomness_param, M_pca=100, M_lda=51)
    individual_predictions.append(y_pred)
    individual_accuracies.append(accuracy)

# Step 3: Evaluate individual model performance
average_individual_accuracy = np.mean(individual_accuracies)
individual_errors = [1 - acc for acc in individual_accuracies]
average_individual_error = np.mean(individual_errors)

print(f"Average individual error: {average_individual_error:.4f}")
print(f"Average accuracy of individual models: {average_individual_accuracy:.4f}")

3. Error and accuracy of committee machine

In [None]:
# Step 4: Implement committee fusion rules
# Majority voting (default fusion rule)
def majority_vote(predictions):
    predictions = np.array(predictions)
    majority_vote_predictions = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
    return majority_vote_predictions

# Committee prediction
committee_predictions = majority_vote(individual_predictions)
committee_accuracy = accuracy_score(y_test, committee_predictions)
committee_error = 1 - committee_accuracy

print(f"Committee error: {committee_error:.4f}")
print(f"Committee (majority vote) accuracy: {committee_accuracy:.4f}")

4. Confusion matrix

In [None]:
# Step 5: Confusion matrix for committee predictions
conf_matrix = confusion_matrix(y_test, committee_predictions)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix - Committee Model")
plt.show()