In [2]:
from PIL import Image
import numpy as np
import os
import h5py

def crop_and_resize(img, target_size=(256, 256)):
    # Get original dimensions
    width, height = img.size
    target_width, target_height = target_size

    # Crop image to square (centered) to avoid squeezing
    left = (width - min(width, height)) // 2
    top = (height - min(width, height)) // 2
    right = (width + min(width, height)) // 2
    bottom = (height + min(width, height)) // 2

    cropped_img = img.crop((left, top, right, bottom))

    # Resize the cropped image to the target size while maintaining the aspect ratio
    resized_img = cropped_img.resize(target_size)

    return resized_img

def load_images_from_folder(folder, label, size=(256, 256)):
    images = []
    labels = []
    
    for filename in sorted(os.listdir(folder)):
        if filename.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path).convert('RGB')
            
            # Crop and resize image
            img = crop_and_resize(img, size)
            
            images.append(np.asarray(img))
            labels.append(label)
    
    return images, labels

# Paths to folders
rui_folder = "images/Rui"
not_rui_folder = "images/Non-Rui"

# Load Rui images
rui_images, rui_labels = load_images_from_folder(rui_folder, 1)

# Load Not-Rui images
not_rui_images, not_rui_labels = load_images_from_folder(not_rui_folder, 0)

# Combine everything
all_images = np.array(rui_images + not_rui_images)
all_labels = np.array(rui_labels + not_rui_labels)

# Shuffle
indices = np.arange(len(all_images))
np.random.shuffle(indices)

all_images = all_images[indices]
all_labels = all_labels[indices]

# Save as H5
with h5py.File("rui_dataset.h5", "w") as f:
    f.create_dataset("images", data=all_images)
    f.create_dataset("labels", data=all_labels)

print("✅ rui_dataset.h5 created successfully!")


✅ rui_dataset.h5 created successfully!
