In [47]:
# !cp kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json

# !kaggle datasets download -d kaustubhchaudhari/pubfig-dataset-256x256-jpg
# !unzip pubfig-dataset-256x256-jpg.zip

In [48]:
import os
import shutil
import random

# Set the path to your original dataset folder
original_dataset_path = '/content/CelebDataProcessed'

# Set the path to the folder where you want to create the train and test folders
base_dir = '/content/Facesfolder'
os.makedirs(base_dir, exist_ok=True)

# Set the percentage of images to use for the test set
test_split_percentage = 20

# Create the train and test folders
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Iterate through each actor folder in the original dataset folder
for actor_name in os.listdir(original_dataset_path):
    actor_dir = os.path.join(original_dataset_path, actor_name)
    if not os.path.isdir(actor_dir):
        continue

    train_actor_dir = os.path.join(train_dir, actor_name)
    test_actor_dir = os.path.join(test_dir, actor_name)
    os.makedirs(train_actor_dir, exist_ok=True)
    os.makedirs(test_actor_dir, exist_ok=True)

    actor_images = os.listdir(actor_dir)
    random.shuffle(actor_images)
    test_split_index = int(len(actor_images) * (test_split_percentage / 100))
    train_images = actor_images[test_split_index:]
    test_images = actor_images[:test_split_index]

    for image_name in train_images:
        image_path = os.path.join(actor_dir, image_name)
        target_path = os.path.join(train_actor_dir, image_name)
        shutil.copy(image_path, target_path)

    for image_name in test_images:
        image_path = os.path.join(actor_dir, image_name)
        target_path = os.path.join(test_actor_dir, image_name)
        shutil.copy(image_path, target_path)


In [None]:
import cv2
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
import os

def load_dataset(data_dir):
  test_images = []
  test_labels = []
  for label, folder_name in enumerate(os.listdir(data_dir)):
      folder_path = os.path.join(data_dir, folder_name)
      for filename in os.listdir(folder_path):
          image_path = os.path.join(folder_path, filename)
          image = cv2.imread(image_path, 0)
          # image = cv2.resize(image, (100, 100))
          test_images.append(image)
          test_labels.append(label)
  test_images = np.array(test_images)
  test_labels = np.array(test_labels)

  return test_images,test_labels

## Extract eigen faces:
from sklearn.decomposition import PCA


X,y=load_dataset("/content/Facesfolder/train")

n_samples = X.shape[0]
X_flat = X.reshape(n_samples, -1)

n_components = 50 # number of eigenfaces to extract
pca = PCA(n_components=n_components, whiten=True)
X_pca = pca.fit_transform(X_flat)

adaboost = AdaBoostClassifier(base_estimator=SVC(kernel='linear', C=1000, gamma='auto'), algorithm='SAMME')


adaboost.fit(X_pca, y)




In [None]:
Xtest,ytest=load_dataset("/content/Facesfolder/test")

correct = 0
total = len(ytest)



for i in range(total):
    test_image = Xtest[i]
    test_label = ytest[i]
    test_image_pca = pca.transform(test_image.reshape(1, -1))
    predicted_label = adaboost.predict(test_image_pca)[0]
    if predicted_label == test_label:
        correct += 1

accuracy = correct / total
print(f"Accuracy: {accuracy}")

In [None]:
import os

folder_path = '/content/Facesfolder/train'
subfolders = [f.path for f in os.scandir(folder_path) if f.is_dir()]

sum=0

for subfolder_path in subfolders:
    subfolder_name = os.path.basename(subfolder_path)
    sum+= len(os.listdir(subfolder_path))

print(sum)

In [None]:
!unzip "/content/Cristiano Ronaldo.zip" -d ./CR

In [None]:
def folder_name_labels(data_dir):
  test_images = []
  test_labels = []

  folder2label={}
  for label, folder_name in enumerate(os.listdir(data_dir)):
      folder_path = os.path.join(data_dir, folder_name)
      # print(folder_name)

      folder2label[folder_name]=label
      

  return folder2label    

folder2label=folder_name_labels("/content/Facesfolder/test")  


print(folder2label)

In [None]:

def load_dataset2(data_dir,folder2label):
  test_images = []
  test_labels = []
  for label, folder_name in enumerate(os.listdir(data_dir)):
      folder_path = os.path.join(data_dir, folder_name)
      for filename in os.listdir(folder_path):
          image_path = os.path.join(folder_path, filename)
          image = cv2.imread(image_path, 0)
          # image = cv2.resize(image, (100, 100))
          test_images.append(image)
          test_labels.append(folder2label[folder_name])
  test_images = np.array(test_images)
  test_labels = np.array(test_labels)

  return test_images,test_labels



In [None]:
Xtest,ytest=load_dataset2("/content/CR",folder2label)

correct = 0
total = len(ytest)


for i in range(total):
    test_image = Xtest[i]
    test_label = ytest[i]
    test_image_pca = pca.transform(test_image.reshape(1, -1))
    predicted_label = adaboost.predict(test_image_pca)[0]

    print(predicted_label,test_label)
    if predicted_label == test_label:
        correct += 1

accuracy = correct / total
print(f"Accuracy: {accuracy}")