In [4]:
import numpy as np
import cv2
import glob
import os

In [5]:
def extract_contour_for_dim_image(image : np.ndarray):
    # to grayscale image
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # get edges
    blur = cv2.GaussianBlur(gray, (3, 3), -10)
    adaptive_binary = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 7,3)
    edges = cv2.Canny(adaptive_binary,100,200)
    binarized_grad = 255 - edges

    # denoises again
    open_binarized_grad = cv2.morphologyEx(
        binarized_grad, 
        cv2.MORPH_OPEN, 
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7)))

    # get contours
    contours, _ = cv2.findContours(open_binarized_grad, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

    # image size
    height, width = binarized_grad.shape
    image_area = height * width

    # sort contour index
    index_sort = sorted(range(len(contours)), key=lambda i : cv2.contourArea(contours[i]),reverse=True)
    contours_sort = [contours[i] for i in index_sort]

    # get area and perimeter
    contour_area = [cv2.contourArea(contours_sort[i]) for i in range(len(index_sort))]
    contour_peri = [cv2.arcLength(contours_sort[i], True) for i in range(len(index_sort))]
    approx = [cv2.approxPolyDP(contours_sort[i], 0.001 * contour_peri[i], True) for i in range(len(index_sort))]
    bounding_box = [cv2.boundingRect(approx[i]) for i in range(len(index_sort))]
    is_card = list(filter(lambda x : x >= 0, [i if contour_area[i] >= 0.48 * image_area and contour_area[i] <= 0.7 * image_area else -1 for i in range(len(index_sort))]))

    return len(is_card) > 0

In [6]:
def extract_contour_for_pop_image(image : np.ndarray):
    # to grayscale image
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # get edges
    _, otsu_grad = cv2.threshold(gray,0,255,cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # get contours
    contours, _ = cv2.findContours(otsu_grad, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    # image size
    height, width = otsu_grad.shape
    image_area = height * width

    # sort contour index
    index_sort = sorted(range(len(contours)), key=lambda i : cv2.contourArea(contours[i]),reverse=True)
    contours_sort = [contours[i] for i in index_sort]

    # get area and perimeter
    contour_peri = [cv2.arcLength(contours_sort[i], True) for i in range(len(index_sort))]
    approx = [cv2.approxPolyDP(contours_sort[i], 0.001 * contour_peri[i], True) for i in range(len(index_sort))]
    bounding_box = [cv2.boundingRect(approx[i]) for i in range(len(index_sort))]
    contour_area = [bounding_box[i][2] * bounding_box[i][3]  for i in range(len(index_sort))]
    is_card = list(filter(lambda x : x >= 0, [i if contour_area[i] >= 0.48 * image_area and contour_area[i] <= 0.6 * image_area else -1 for i in range(len(index_sort))]))

    return len(is_card) > 0

In [9]:
total_number_of_images = 0
contour_found = 0
for file_path in glob.glob(os.path.join('data','*')):
    image_file_path = os.path.join(file_path, "back.jpg")
    try:
        image = cv2.imread(image_file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if extract_contour_for_pop_image(image):
            contour_found += 1
        elif extract_contour_for_dim_image(image):
            contour_found += 1
        total_number_of_images += 1
    except:
        pass

In [8]:
print(f"Current images whose content found: {contour_found}")
print(f"Current processed images: {total_number_of_images}")

Current images whose content found: 7401
Current processed images: 8214
