In [None]:
import os
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import zipfile

zip_file_path = r"C:\Users\prudh\Desktop\Datadl.zip"  #change the directory of the zip file

In [45]:
# Create a new directory named "data" within the same path
zip_file_directory = os.path.dirname(zip_file_path)
new_directory_name = "data"  # Name of the new folder
base_path = os.path.join(zip_file_directory, new_directory_name)

# Create the new directory if it doesn't exist
os.makedirs(base_path, exist_ok=True)

try:
    # Open the zip file
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        # Extract all contents into the "data" directory
        zip_ref.extractall(base_path)
        print(f"Extracted all files to: {base_path}")
except FileNotFoundError:
    print(f"The file {zip_file_path} does not exist.")
except zipfile.BadZipFile:
    print(f"The file {zip_file_path} is not a valid zip file.")


Extracted all files to: C:\Users\prudh\Desktop\data


## Image Pre Processing
#### Resizing Images to 128x128
#### Normalizing pixel values


In [63]:
data_file = os.path.join(base_path, "Datadl")  # Replace "datadl" with the actual filename


In [65]:
def load_and_preprocess_images(data_file, target_size=(128, 128)):
    # Dictionary to hold processed images categorized by their class
    preprocessed_data = {}

    # Loop through each category folder (Healthy, Miner, Phoma, Leaf Rust, Cerscospora)
    for class_folder in os.listdir(data_file):
        class_path = os.path.join(data_file, class_folder)
        if os.path.isdir(class_path):
            images = []
            
            # Loop through each image in the class folder
            for img_file in os.listdir(class_path):
                img_path = os.path.join(class_path, img_file)
                image = Image.open(img_path)
                
                # Preprocessing steps
                processed_img = preprocess_image(image, target_size)
                
                # Append processed image to list
                images.append(processed_img)
            
            # Add to dictionary
            preprocessed_data[class_folder] = images

    return preprocessed_data

In [67]:
def preprocess_image(image, target_size):
    
    # 1. Resize Images 
    image = image.resize(target_size)
    
    # 2. Normalize pixel values
    image = np.array(image) / 255.0  # Convert to numpy array and scale
    return image

In [69]:
# Process all images
preprocessed_data = load_and_preprocess_images(data_file)

True


#### Saving pre processed images

In [None]:
# Create a new directory named "preprocessed" inside the base_path
output_path = os.path.join(base_path, "preprocessed")  # Path to the new directory

# Create the "preprocessed" directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

print(f"Created directory for preprocessed data: {output_path}")

for class_name, images in preprocessed_data.items():
    class_output_path = os.path.join(output_path, class_name)
    os.makedirs(class_output_path, exist_ok=True)
    
    for i, img in enumerate(images):
        img = (img * 255).astype(np.uint8)  # Convert back to 8-bit values
        img_pil = Image.fromarray(img)
        img_pil.save(os.path.join(class_output_path, f"{class_name}_{i}.png"))


Created directory for preprocessed data: C:\Users\prudh\Desktop\data\preprocessed
