In [4]:
import os
import glob
import cv2
import numpy as np
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

# Define the main folder path where the dataset is located
dataset = "untitled folder/images"

# Iterate over each letter in the main folder and create subfolders for processed images
for image in os.listdir(dataset):
    os.makedirs(os.path.join(dataset, "processed", image), exist_ok=True)

# Read each image, resize it to 150x150 pixels, convert it to grayscale, 
# and save the processed image to the corresponding subfolder
for image_folder in glob.glob(os.path.join(dataset, "*")):
    image = os.path.basename(image_folder)
    image_paths = glob.glob(os.path.join(image_folder, "*.jpg")) + glob.glob(os.path.join(image_folder, "*.png"))
    for image_path in image_paths:
        imag = cv2.imread(image_path)
        resized_image = cv2.resize(imag, (150, 150))
        gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(os.path.join(dataset, "processed", image, os.path.basename(image_path)), gray_image)

In [5]:
# Assuming you have 10 images of a person in your dataset

# Load the images and convert them to grayscale
images = []
labels = []
for i in range(10):
    image_path = f"untitled folder/images/processed/bitaniya/image{i+1}.jpg"
    try:
        image = Image.open(image_path).convert("L")  # Convert to grayscale
        images.append(np.array(image))
        labels.append("person")
        print(f"Loaded image: {image_path}")
    except Exception as e:
        print(f"Error loading image: {image_path}")
        print(str(e))

# Convert the images and labels to NumPy arrays
images = np.array(images)
labels = np.array(labels)

# Flatten the images
num_images, height, width = images.shape
flattened_images = images.reshape(num_images, height * width)

# Encode the labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

Loaded image: untitled folder/images/processed/bitaniya/image1.jpg
Loaded image: untitled folder/images/processed/bitaniya/image2.jpg
Loaded image: untitled folder/images/processed/bitaniya/image3.jpg
Loaded image: untitled folder/images/processed/bitaniya/image4.jpg
Loaded image: untitled folder/images/processed/bitaniya/image5.jpg
Loaded image: untitled folder/images/processed/bitaniya/image6.jpg
Loaded image: untitled folder/images/processed/bitaniya/image7.jpg
Loaded image: untitled folder/images/processed/bitaniya/image8.jpg
Loaded image: untitled folder/images/processed/bitaniya/image9.jpg
Loaded image: untitled folder/images/processed/bitaniya/image10.jpg


In [6]:
# Prepare the Dataset
face_images = []
for image in images:
    image_array = np.array(image)  # Convert PIL Image to NumPy array
    face_images.append(image_array)

In [7]:
face_images = np.array(face_images)
num_images, height, width = face_images.shape  # Adjusted unpacking
face_images_flattened = face_images.reshape(num_images, height * width)

In [8]:

# Subtract the Mean Face Vector
mean_face = np.mean(face_images_flattened, axis=0)
centered_face_images = face_images_flattened - mean_face

In [24]:
# Apply Randomized PCA
k = 2 # Choose the number of principal components (can be adjusted)
rpca = PCA(n_components=k, svd_solver='randomized')
projected_faces = rpca.fit_transform(centered_face_images)
print(projected_faces)

[[  -58.28064321  3263.43631286]
 [-4107.6181577  -3880.9944513 ]
 [-4196.07197296 -2933.07498959]
 [ 6050.36780804  -560.47858209]
 [ 4626.72194149  1933.18840033]
 [-2796.25275475  7523.81796817]
 [-4563.81939737  -358.3855155 ]
 [ 4148.93776162  -626.53508713]
 [ 7818.55941348 -2835.52418451]
 [-6922.54399865 -1525.44987125]]


In [None]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.utils import shuffle

def mini_batch_pca(X, batch_size, n_components):
    # Initialize the running covariance matrix
    running_cov = None
    
    # Iterate over mini-batches
    for batch in range(0, len(X), batch_size):
        # Randomly select a mini-batch
        X_batch = shuffle(X[batch:batch+batch_size])
        
        # Compute the covariance matrix of the mini-batch
        cov_batch = np.cov(X_batch, rowvar=False)
        
        # Update the running covariance matrix
        if running_cov is None:
            running_cov = cov_batch
        else:
            # Update the running covariance matrix by summing the covariance matrices
            running_cov += cov_batch
    
    # Compute the eigenvectors of the running covariance matrix
    _, eigenvectors = np.linalg.eigh(running_cov)
    
    # Select the top-k eigenvectors
    top_k_eigenvectors = eigenvectors[:, -n_components:]
    
    # Project the data onto the selected eigenvectors
    projected_data = np.dot(X, top_k_eigenvectors)
    
    return projected_data

# Set the desired batch size and number of principal components (k)
batch_size = 100
n_components = 20

# Apply Mini-Batch PCA
projected_data = mini_batch_pca(X, batch_size, n_components)