In [1]:
!git clone https://github.com/afeefjunaid/fyp.git  # Replace with your GitHub URL

# Step 2: Define the path to the dataset folder within the cloned repository
dataset_path = '/content/fyp/skinType'

Cloning into 'fyp'...
remote: Enumerating objects: 2737, done.[K
remote: Total 2737 (delta 0), reused 0 (delta 0), pack-reused 2737 (from 1)[K
Receiving objects: 100% (2737/2737), 107.49 MiB | 12.78 MiB/s, done.
Resolving deltas: 100% (2/2), done.
Updating files: 100% (3826/3826), done.


In [2]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split

# Function to load and preprocess images
def preprocess_images(dataset_path, img_size=(16, 16)):
    X = []  # List to hold image data
    y = []  # List to hold labels
    class_names = []  # List to hold class names (dry, oily, normal)

    # Loop over each folder (class)
    for idx, class_folder in enumerate(os.listdir(dataset_path)):
        class_path = os.path.join(dataset_path, class_folder)
        class_names.append(class_folder)

        # Loop over each image in the class folder
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)

            # Open and resize image
            with Image.open(img_path) as img:
                img = img.resize(img_size)  # Resize image to 16x16 pixels
                img_array = np.array(img)  # Convert to numpy array

                # Normalize pixel values to [0, 1] range (optional)
                img_array = img_array / 255.0

                # Append data and label
                X.append(img_array)
                y.append(idx)  # Class label as index

    # Convert lists to numpy arrays
    X = np.array(X)
    y = np.array(y)

    return X, y, class_names

# Function to split the dataset into training and test sets
def create_dataset(X, y, test_size=0.2):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    return X_train, X_test, y_train, y_test

# Main function to run the tool
def process_skin_type_dataset(dataset_path):
    print(f"Processing dataset from: {dataset_path}")
    X, y, class_names = preprocess_images(dataset_path)

    print(f"Total images: {len(X)}")
    print(f"Classes: {class_names}")

    X_train, X_test, y_train, y_test = create_dataset(X, y)

    # Print summary
    print(f"Training set size: {X_train.shape[0]}")
    print(f"Test set size: {X_test.shape[0]}")

    # Save preprocessed dataset for future use (optional)
    np.save("X_train.npy", X_train)
    np.save("X_test.npy", X_test)
    np.save("y_train.npy", y_train)
    np.save("y_test.npy", y_test)

    return X_train, X_test, y_train, y_test, class_names

# Example usage:
dataset_path = '/content/fyp/skinType'  # Update this to your dataset path
X_train, X_test, y_train, y_test, class_names = process_skin_type_dataset(dataset_path)


Processing dataset from: /content/fyp/skinType
Total images: 3824
Classes: ['dry', 'normal', 'oily']
Training set size: 3059
Test set size: 765


In [5]:
import os
from PIL import Image

# Function to create a directory if it doesn't exist
def create_directory(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

# Function to load, preprocess, and save images
def preprocess_and_save_images(dataset_path, output_path, img_size=(16, 16)):
    class_names = []  # List to hold class names (dry, oily, normal)

    # Loop over each folder (class)
    for idx, class_folder in enumerate(os.listdir(dataset_path)):
        class_path = os.path.join(dataset_path, class_folder)

        # Only process directories (skip files if any)
        if not os.path.isdir(class_path):
            continue

        class_names.append(class_folder)

        # Create output directories for each class
        output_class_dir = os.path.join(output_path, class_folder)
        create_directory(output_class_dir)

        # Loop over each image in the class folder
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)

            # Open and resize image
            try:
                with Image.open(img_path) as img:
                    img = img.resize(img_size)  # Resize image to 16x16 pixels

                    # Save the resized image to the output directory
                    output_img_path = os.path.join(output_class_dir, img_name)
                    img.save(output_img_path, "JPEG")

            except Exception as e:
                print(f"Error processing image {img_name}: {e}")

    print(f"Processed images saved to: {output_path}")
    return class_names

# Main function to run the tool
def process_skin_type_dataset(dataset_path, output_path):
    print(f"Processing dataset from: {dataset_path}")
    class_names = preprocess_and_save_images(dataset_path, output_path)

    print(f"Classes processed: {class_names}")

# Example usage:
dataset_path = '/content/fyp/skinType'  # Replace with the path to your dataset
output_path = '/content/fyp/skinType/UpdateImages'  # Replace with the path to save resized images
process_skin_type_dataset(dataset_path, output_path)


Processing dataset from: /content/fyp/skinType
Error processing image .ipynb_checkpoints: [Errno 21] Is a directory: '/content/fyp/skinType/skinType/.ipynb_checkpoints'
Error processing image dry: [Errno 21] Is a directory: '/content/fyp/skinType/skinType/dry'
Error processing image normal: [Errno 21] Is a directory: '/content/fyp/skinType/skinType/normal'
Error processing image oily: [Errno 21] Is a directory: '/content/fyp/skinType/skinType/oily'
Processed images saved to: /content/fyp/skinType/UpdateImages
Classes processed: ['.ipynb_checkpoints', 'skinType', 'dry', 'normal', 'oily']
