In [None]:
# !cp kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json

# !kaggle datasets download -d kaustubhchaudhari/pubfig-dataset-256x256-jpg
# !unzip pubfig-dataset-256x256-jpg.zip

In [None]:
# import os
# import shutil
# import random

# # Set the path to your original dataset folder
# original_dataset_path = '/content/CelebDataProcessed'

# # Set the path to the folder where you want to create the train and test folders
# base_dir = '/content/Facesfolder1'
# os.makedirs(base_dir, exist_ok=True)

# # Set the percentage of images to use for the test set
# test_split_percentage = 5

# # Create the train and test folders
# train_dir = os.path.join(base_dir, 'train')
# test_dir = os.path.join(base_dir, 'test')
# os.makedirs(train_dir, exist_ok=True)
# os.makedirs(test_dir, exist_ok=True)

# # Iterate through each actor folder in the original dataset folder
# for actor_name in os.listdir(original_dataset_path):
#     actor_dir = os.path.join(original_dataset_path, actor_name)
#     if not os.path.isdir(actor_dir):
#         continue

#     # Create a folder for the actor in the train and test directories
#     train_actor_dir = os.path.join(train_dir, actor_name)
#     test_actor_dir = os.path.join(test_dir, actor_name)
#     os.makedirs(train_actor_dir, exist_ok=True)
#     os.makedirs(test_actor_dir, exist_ok=True)

#     # Iterate through each image in the actor folder
#     actor_images = os.listdir(actor_dir)
#     random.shuffle(actor_images)  # Shuffle the images randomly
#     test_split_index = int(len(actor_images) * (test_split_percentage / 100))
#     train_images = actor_images[test_split_index:]
#     test_images = actor_images[:test_split_index]

#     # Copy the train images to the train actor folder
#     for image_name in train_images:
#         image_path = os.path.join(actor_dir, image_name)
#         target_path = os.path.join(train_actor_dir, image_name)
#         shutil.copy(image_path, target_path)

#     # Copy the test images to the test actor folder
#     for image_name in test_images:
#         image_path = os.path.join(actor_dir, image_name)
#         target_path = os.path.join(test_actor_dir, image_name)
#         shutil.copy(image_path, target_path)


In [None]:
import numpy as np

def train_eigenfaces(X, num_components):
    # Reshape the array of images to 2 dimensions
    N, H, W = X.shape
    X = X.reshape(N, H*W)

    # Compute the mean face
    mean_face = np.mean(X, axis=0)

    # Subtract the mean face from each image
    X = X - mean_face

    # Compute the covariance matrix
    cov = np.cov(X, rowvar=False)

    # Compute the eigenvectors and eigenvalues of the covariance matrix
    eigenvalues, eigenvectors = np.linalg.eigh(cov)

    # Sort the eigenvectors in descending order of eigenvalues
    idx = np.argsort(eigenvalues)[::-1]
    eigenvectors = eigenvectors[:,idx]

    # Keep only the top num_components eigenvectors
    eigenvectors = eigenvectors[:,:num_components]

    # Compute the eigenfaces

    # print(eigenvectors.dim)

    ##x is n*10k, eigenvectors is n*10
    eigenfaces = np.dot(X, eigenvectors)

    # Normalize the eigenfaces

    print(eigenfaces.shape) ## should be 10k*10

    eigenfaces = eigenfaces / np.linalg.norm(eigenfaces, axis=0)

    # Return the mean face and eigenfaces
    return mean_face, eigenfaces



In [None]:
def extract_features(X, mean_face, eigenfaces):
    # Subtract the mean face from each image
    N, H, W = X.shape

    X = X.reshape(N, H*W)

    X = X - mean_face
    

    ## x is testimages*10k 
    ## eigenfaces.T is 
    # Project the images onto the eigenfaces
    features = np.dot(X.T, eigenfaces.T)

    # Return the features
    return features


In [None]:
import cv2

def load_dataset(data_dir):
  test_images = []
  test_labels = []
  for label, folder_name in enumerate(os.listdir(data_dir)):
      folder_path = os.path.join(data_dir, folder_name)
      for filename in os.listdir(folder_path):
          image_path = os.path.join(folder_path, filename)
          image = cv2.imread(image_path, 0)
          image = cv2.resize(image, (100, 100))
          test_images.append(image)
          test_labels.append(label)
  test_images = np.array(test_images)
  test_labels = np.array(test_labels)

  return test_images,test_labels

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


def train(train_dir,num_components):
    # Load the dataset
    X_train, y_train = load_dataset(train_dir)
    
    # Train the eigenfaces model
    mean_face, eigenfaces = train_eigenfaces(X_train, num_components)

    return mean_face,eigenfaces


def evaluate(train_dir,test_dir,mean_face,eigenfaces):

    X_test, y_test = load_dataset(test_dir)
    X_train, y_train = load_dataset(train_dir)

    # Extract features using the eigenfaces
    X_train_features = extract_features(X_train, mean_face, eigenfaces)
    X_test_features = extract_features(X_test, mean_face, eigenfaces)

    # Train a SVM classifier on the features
    clf = SVC(kernel='linear', C=1.0)
    clf.fit(X_train_features, y_train)

    # Evaluate the classifier on the test set
    y_pred = clf.predict(X_test_features)
    acc = accuracy_score(y_test, y_pred)

    # Print the accuracy
    return acc


In [None]:
mean_face,eigenfaces=train("/content/Facesfolder1/test",10)

acc=evaluate("/content/Facesfolder1/test","/content/Facesfolder1/test",mean_face,eigenfaces)

print("Accuracy of the model: ",acc)

In [None]:
# Step 5: Test the model
test_dir = '/content/Facesfolder1/test'
test_images = []
test_labels = []
for label, folder_name in enumerate(os.listdir(test_dir)):
    folder_path = os.path.join(test_dir, folder_name)
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        image = cv2.imread(image_path, 0)
        # image = cv2.resize(image, (100, 100))
        test_images.append(image)
        test_labels.append(label)
test_images = np.array(test_images)
test_labels = np.array(test_labels)

test_projections = [project_face(image) for image in test_images]
test_projections = np.squeeze(test_projections)  # Remove extra dimensions
test_predictions = svm.predict(test_projections)
accuracy = accuracy_score(test_labels, test_predictions)
print('Accuracy:', accuracy)


In [None]:
import os

folder_path = '/content/Facesfolder1/test'
subfolders = [f.path for f in os.scandir(folder_path) if f.is_dir()]

total=0

for subfolder_path in subfolders:
    subfolder_name = os.path.basename(subfolder_path)
    subfolder_items = len(os.listdir(subfolder_path))
    total+=subfolder_items
    # print(f"Subfolder: {subfolder_name}, Number of items: {subfolder_items}")


print(total)    


In [None]:
import cv2
import numpy as np

# Step 1: Read training set of NxN images
training_set = []
for i in range(1, num_of_images+1):
    img = cv2.imread(f"image_{i}.jpg", cv2.IMREAD_GRAYSCALE)
    training_set.append(img)

# Step 2: Resize image dimensions to N^2x1
N = 100  # for example
training_set_resized = [cv2.resize(img, (N**2, 1)) for img in training_set]

# Step 3: Select training set of N^2xM (dimensions, M: number of sample images)
M = 10  # for example
training_set_selected = np.array(training_set_resized[:M]).T

# Step 4: Find average face, subtract from the faces in the training set, create matrix A
avg_face = np.mean(training_set_selected, axis=1).reshape(-1, 1)
A = training_set_selected - avg_face

# Step 5: Calculate covariance matrix: AA'
cov_matrix = np.dot(A.T, A)

# Step 6: Calculate eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Step 7: Calculate eigenfaces
eigenfaces = np.dot(A, eigenvectors)

# Step 8: Create reduced eigenface space
k = 5  # for example
eigenfaces_reduced = eigenfaces[:, :k]

# Step 9: Calculate eigenface of image in question
img = cv2.imread("image_to_be_recognized.jpg", cv2.IMREAD_GRAYSCALE)
img_resized = cv2.resize(img, (N**2, 1))
img_eigenface = np.dot(eigenfaces_reduced.T, (img_resized - avg_face))

# Step 10: Calculate Euclidean distances between the image and the eigenfaces
distances = np.linalg.norm(eigenfaces_reduced - img_eigenface, axis=1)

# Step 11: Find the minimum Euclidean distance
min_distance_index = np.argmin(distances)

# Step 12: Output image with the minimum Euclidean distance or "image unrecognizable"
if distances[min_distance_index] < threshold:  # threshold can be set based on the distances
    recognized_image = training_set[min_distance_index]
    cv2.imshow("Recognized Image", recognized_image)
else:
    print("Image unrecognizable")
