# Code to Unzip and preprocess images dataset in colab reposatory

In [None]:
import zipfile
import os

# Specify the name of your ZIP file
zip_file_name = "Classmates.zip"

# Specify the directory where you want to extract the contents
extracted_folder = "Classmates"

# Create the target directory if it doesn't exist
os.makedirs(extracted_folder, exist_ok=True)

# Extract the contents of the ZIP file
with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder)

# Check the extracted contents
extracted_contents = os.listdir(extracted_folder)
print("Extracted contents:", extracted_contents)


In [None]:
import cv2
import dlib
from skimage import exposure, img_as_ubyte
import os

# Load face detector from dlib
face_detector = dlib.get_frontal_face_detector()

# Path to the directory containing raw images
raw_dataset_path = "/content/Classmates"

# Path to the directory where preprocessed images will be saved
preprocessed_dataset_path = "preprocessed_dataset"
os.makedirs(preprocessed_dataset_path, exist_ok=True)

# Iterate over class folders
for class_folder in os.listdir(raw_dataset_path):
    class_path = os.path.join(raw_dataset_path, class_folder)

    # Create a folder for each class in the preprocessed dataset
    preprocessed_class_path = os.path.join(preprocessed_dataset_path, class_folder)
    os.makedirs(preprocessed_class_path, exist_ok=True)

    # Iterate over images in the class folder
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)

        # Read the image
        img = cv2.imread(image_path)

        # Convert the image to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Detect faces using dlib
        faces = face_detector(gray)

        # Process each detected face
        for i, face in enumerate(faces):
            x, y, w, h = face.left(), face.top(), face.width(), face.height()

            # Crop the face
            cropped_face = img[y:y+h, x:x+w]

            # Resize the face to a standard size
            resized_face = cv2.resize(cropped_face, (224, 224))

            # Normalize pixel values to a standard range (0 to 1)
            normalized_face = resized_face / 255.0

            # Save the preprocessed face
            preprocessed_image_name = f"{class_folder}_face{i+1}_{image_name}"
            preprocessed_image_path = os.path.join(preprocessed_class_path, preprocessed_image_name)
            cv2.imwrite(preprocessed_image_path, img_as_ubyte(normalized_face))

            # Augmentation: You can add additional data augmentation steps here if needed

print("Preprocessing complete.")

# Code to Unzip, and preprocess the images Dataset within google drive

In [None]:
from google.colab import drive
import zipfile
import os
import cv2
import dlib
from skimage import exposure, img_as_ubyte

# Mount Google Drive
drive.mount('/content/drive')

# Specify the path to your ZIP file in Google Drive
zip_file_path = '/content/drive/MyDrive/Project/Classmates.zip'

# Specify the path for extracting contents on Google Drive
extracted_folder_path = '/content/drive/MyDrive/Project/Classmates'

# Specify the path to the preprocessed dataset on Google Drive
preprocessed_dataset_path = '/content/drive/MyDrive/Project/preprocessed_dataset'

# Delete the existing extracted folder if it exists
if os.path.exists(extracted_folder_path):
    !rm -r "$extracted_folder_path"

# Extract the contents of the ZIP file to Google Drive
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder_path)

# Delete the existing preprocessed dataset folder if it exists
if os.path.exists(preprocessed_dataset_path):
    !rm -r "$preprocessed_dataset_path"

# Create the target directory for preprocessed images on Google Drive
os.makedirs(preprocessed_dataset_path, exist_ok=True)

# Load face detector from dlib
face_detector = dlib.get_frontal_face_detector()

# Iterate over class folders
for class_folder in os.listdir(extracted_folder_path):
    class_path = os.path.join(extracted_folder_path, class_folder)

    # Create a folder for each class in the preprocessed dataset
    preprocessed_class_path = os.path.join(preprocessed_dataset_path, class_folder)
    os.makedirs(preprocessed_class_path, exist_ok=True)

    # Iterate over images in the class folder
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)

        # Read the image
        img = cv2.imread(image_path)

        # Convert the image to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Detect faces using dlib
        faces = face_detector(gray)

        # Process each detected face
        for i, face in enumerate(faces):
            x, y, w, h = face.left(), face.top(), face.width(), face.height()

            # Crop the face
            cropped_face = img[y:y+h, x:x+w]

            # Resize the face to a standard size
            resized_face = cv2.resize(cropped_face, (224, 224))

            # Normalize pixel values to a standard range (0 to 1)
            normalized_face = resized_face / 255.0

            # Save the preprocessed face on Google Drive
            preprocessed_image_name = f"{class_folder}_face{i+1}_{image_name}"
            preprocessed_image_path = os.path.join(preprocessed_class_path, preprocessed_image_name)
            cv2.imwrite(preprocessed_image_path, img_as_ubyte(normalized_face))

print("Preprocessing complete.")
