# Preprocessing for Fruit Image Classification Dataset

This notebook applies preprocessing steps to improve the quality of a fruit classification dataset by resizing images, detecting and removing blurry images, and applying edge detection.

## Step 1: Set Up Paths and Define Helper Functions

In [None]:

import os
import numpy as np
from PIL import Image, ImageOps
import cv2

# Define dataset paths
dataset_path = 'train_data'
output_path = 'processed_data'
classes = ['tomato', 'cherry', 'strawberry']
target_size = (128, 128)

# Ensure output directory exists
os.makedirs(output_path, exist_ok=True)
for cls in classes:
    os.makedirs(os.path.join(output_path, cls), exist_ok=True)

# Function to detect blur
def is_blurry(image):
    gray_image = np.array(image.convert('L'))
    laplacian_var = cv2.Laplacian(gray_image, cv2.CV_64F).var()
    return laplacian_var < 100  # Threshold for blurriness

# Edge detection function
def apply_edge_detection(image):
    gray_image = np.array(image.convert('L'))
    edges = cv2.Canny(gray_image, threshold1=100, threshold2=200)
    return Image.fromarray(edges)


## Step 2: Preprocess Images

In [None]:

# Loop through each class and preprocess images
for fruit_class in classes:
    folder_path = os.path.join(dataset_path, fruit_class)
    output_folder_path = os.path.join(output_path, fruit_class)
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        image = Image.open(image_path)

        # Step 1: Check for blur
        if is_blurry(image):
            print(f"Skipping blurry image: {image_path}")
            continue
        
        # Step 2: Resize with padding to target size
        image = ImageOps.fit(image, target_size, Image.LANCZOS)

        # Step 3: Apply edge detection
        edge_image = apply_edge_detection(image)

        # Save processed image
        processed_image_path = os.path.join(output_folder_path, image_file)
        edge_image.save(processed_image_path)

print("Preprocessing completed. Processed images are saved in the 'processed_data' folder.")
