In [10]:
import numpy as np
import cv2
from glob import glob
from tqdm import tqdm

### **Filter images: Size**

In [4]:
def filter_images_by_size(images, min_size=512):
    """
    Filters images by their size.
    Keeps only images with both height and width greater than min_size.
    """
    output = []
    for img in tqdm(images, total=len(images)):
        x = cv2.imread(img, cv2.IMREAD_COLOR)
        if x is not None:
            h, w, _ = x.shape
            if h > min_size and w > min_size:
                output.append(img)
    return output

### **Filter Image: Potrait**

In [11]:
# def filter_images_by_portrait(images):
#     """
#     Filters portrait-oriented images.
#     Keeps only images where height > width.
#     """
#     output = []

#     for img in tqdm(images, total=len(images)):
#         x = cv2.imread(img, cv2.IMREAD_COLOR)

#         if x is not None:  # Ensure the image is loaded
#             h, w, c = x.shape
#             if h > w:
#                 output.append(img)

#     return output


### **Resize and saving the images**


In [20]:
def save_images(images, save_dir, size=(512, 512)):
    """
    Resizes and saves images in .png format to the specified directory.
    """
    for idx, path in enumerate(tqdm(images, total=len(images))):
        x = cv2.imread(path, cv2.IMREAD_COLOR)
        if x is not None:
            x = cv2.resize(x, size)  # Resize to model input size
            cv2.imwrite(f"{save_dir}/{idx + 1:04d}.png", x)  # Save as .png

### **Executing the program**

In [6]:
# Load raw images
raw_images = glob("/home/ahsan/University/Thesis/UNet_Directory/Datasets/raw_images/*")
print("Initial images:", len(raw_images))

initial images:  5


In [7]:
# Filter by size
output = filter_images_by_size(raw_images, min_size=512)
print("Filtered by size:", len(output))

100%|██████████| 5/5 [00:01<00:00,  4.14it/s]

Filter by size:  5





In [15]:
# output = filter_images_by_potrait(output)
# print("Filter by potrait: ", len(output))

100%|██████████| 3/3 [00:00<00:00,  3.65it/s]

Filter by potrait:  3





In [21]:
# Save resized images
save_images(output, "/home/ahsan/University/Thesis/UNet_Directory/Datasets/cleaned_images")

100%|██████████| 3/3 [00:00<00:00,  3.25it/s]


In [22]:
clean_images = glob("/home/ahsan/University/Thesis/UNet_Directory/Datasets/cleaned_images/*")
print("Cleaned images: ", len(clean_images))

Cleaned images:  3
