In [1]:
from typing import *
import numpy as np
from skimage import io
from skimage.transform import rescale, resize
import os
import matplotlib.pyplot as plt
from utils.common_functions import *
import cv2
from scipy import ndimage


### Reading images

In [8]:
def read_images():
    dataset_path = '../dataset'

    images = []
    labels = []
    for dirpath, _, filenames in os.walk(dataset_path):
        if not filenames: continue
        print(f'Processing directory {dirpath}...')
        for file in filenames:
            if not file.endswith('.jpg') and not file.endswith('.JPG'): 
                print(f'File {file} is not a jpg file. Skipping...')
                continue

            file_path = os.path.join(dirpath, file)

            # to avoid reading corrupted images
            image = cv2.imread(file_path)
            if image is None:
                print(f'File {file} is not a valid image. Skipping...')
                continue
            
            images.append(image)
            labels.append(int(file[0]))
    
    return images, labels

In [9]:
images, labels = read_images()


Processing directory ../dataset\men\0...
File desktop.ini is not a jpg file. Skipping...
Processing directory ../dataset\men\1...
File desktop.ini is not a jpg file. Skipping...
Processing directory ../dataset\men\2...
File 2_men (107).JPG is not a valid image. Skipping...
File 2_men (108).JPG is not a valid image. Skipping...
File desktop.ini is not a jpg file. Skipping...
Processing directory ../dataset\men\3...
File 3_men (140).JPG is not a valid image. Skipping...
File 3_men (141).JPG is not a valid image. Skipping...
File desktop.ini is not a jpg file. Skipping...
Processing directory ../dataset\men\4...
File 4_men (5).JPG is not a valid image. Skipping...
File 4_men (6).JPG is not a valid image. Skipping...
File desktop.ini is not a jpg file. Skipping...
Processing directory ../dataset\men\5...
File desktop.ini is not a jpg file. Skipping...
Processing directory ../dataset\Women\0...
File desktop.ini is not a jpg file. Skipping...
Processing directory ../dataset\Women\1...


error: OpenCV(4.7.0) D:\a\opencv-python\opencv-python\opencv\modules\core\src\alloc.cpp:73: error: (-4:Insufficient memory) Failed to allocate 35831808 bytes in function 'cv::OutOfMemoryError'


In [None]:
print(f'Number of images: {len(images)}')

Number of images: 59


### Preprocessing

In [None]:
def preprocess_image(image: np.ndarray) -> np.ndarray:
    resize_ratio = 0.1

    image = cv2.resize(image, (int(image.shape[1] * resize_ratio), int(image.shape[0] * resize_ratio)))
    show_images([image])

    image = cv2.cvtColor(image, cv2.COLOR_BGR2YCR_CB)
    show_images([image])

    lower_bound = np.array([0, 133, 77])
    upper_bound = np.array([255, 173, 127])
    image = cv2.inRange(image, lower_bound, upper_bound)
    show_images([image])

    # kernel = np.ones((5, 5), np.uint8)
    # image = cv2.dilate(image, kernel, iterations=5)
    # show_images([image])

    kernel = np.ones((5, 5), np.uint8)
    image = cv2.erode(image, kernel, iterations=1)
    show_images([image])

    image = ndimage.binary_fill_holes(image).astype(np.int8)
    show_images([image])

    # image = any2gray(image)
    # image = change_gray_range(image, format=255)
    # image = change_gray_range(rescale(image, 0.1, anti_aliasing=True))
    # image = convert_to_binary(image, threshold=140)
    return image

In [None]:
preprocess_image(cv2.imread('../dataset/men/1/1_men (34).JPG'))

In [None]:
# preprocess all images
for i in range(len(images)):
    images[i] = preprocess_image(images[i].copy())
    # break