In [None]:
'''
Used LabelEncoder to encode the labels
Created the training and testing sets
'''

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
import pickle
labels_file = r"C:\Users\user\Desktop\labels.npy"
embeddings_file = r"C:\Users\user\Desktop\embeddings.npy"

# Load embeddings and labels
labels = np.load(labels_file, allow_pickle=True)
embeddings = np.load(embeddings_file, allow_pickle=True)

# Flatten embeddings
embeddings = np.squeeze(embeddings)

# Encode labels
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

with open(r"C:\Users\user\Desktop\label_encoder.pkl", "wb") as file:
    pickle.dump(label_encoder, file)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(embeddings, encoded_labels, test_size=0.2, random_state=42)

print("Data prepared:")
print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from itertools import product

# Define the hyperparameter grid
kernel_options = ['rbf', 'linear', 'poly', 'sigmoid']
C_values = [0.1, 1, 10, 100]
gamma_values = ['scale', 'auto', 0.01, 0.1, 1]
class_weight_options = [None, 'balanced']

# Initialize variables to track the best model
best_model = None
best_accuracy = 0
best_params = {}
all_results = []

# Test all combinations of hyperparameters
for kernel, C, gamma, class_weight in product(kernel_options, C_values, gamma_values, class_weight_options):
    print(f"Testing SVM with kernel={kernel}, C={C}, gamma={gamma}, class_weight={class_weight}...")
    
    # Train the SVM classifier
    classifier = SVC(kernel=kernel, C=C, gamma=gamma, class_weight=class_weight, probability=True)
    classifier.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = classifier.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Save the results
    all_results.append({
        "kernel": kernel,
        "C": C,
        "gamma": gamma,
        "class_weight": class_weight,
        "accuracy": accuracy,
    })
    
    # Check if this is the best model so far
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = classifier
        best_params = {
            "kernel": kernel,
            "C": C,
            "gamma": gamma,
            "class_weight": class_weight,
        }

# Print the best model and its accuracy
print("\nBest Model:")
print(f"Kernel: {best_params['kernel']}")
print(f"C: {best_params['C']}")
print(f"Gamma: {best_params['gamma']}")
print(f"Class Weight: {best_params['class_weight']}")
print(f"Best Accuracy: {best_accuracy * 100:.2f}%")

# Generate classification report for the best model
y_pred_best = best_model.predict(X_test)
print("\nClassification Report for Best Model:")
print(classification_report(y_test, y_pred_best))

'''
Best Model:
Kernel: rbf
C: 1
Gamma: scale
Class Weight: balanced
Best Accuracy: 99.06%
'''

In [None]:
with open(r"C:\Users\user\Desktop\face_recognition_classifier.pkl", "wb") as file:
    pickle.dump(best_model, file)
print("Classifier saved")