**IMPORTS**

In [1]:
import os
import cv2
import numpy as np 
import matplotlib.pyplot as plt

- UNDEFINED = (200, 0, 255) = 99

**FUNCTION DEFINITIONS**

In [2]:
# Importing an image
def Init(path:str, grey:bool = False, blur:bool = False) -> np.ndarray:
    img = cv2.imread(path)
    if (grey):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    if (blur):
        img = cv2.GaussianBlur(img, (3, 3), cv2.BORDER_DEFAULT)
    return img

# Displaying an image
def Display(img:np.ndarray) -> None:
    img = cv2.resize(img, (600, 600))
    cv2.imshow("wasu", img); cv2.waitKey(0)

# Displaying the rect
def DisplayRect(img:np.ndarray, rect:tuple) -> None:
    retImg = img.copy()
    cv2.rectangle(retImg, rect, (255, 255, 255), 2)
    Display(retImg)

# Returns a cropped image based on rect
def CropRect(img:np.ndarray, rect:tuple) -> np.ndarray:
    x, y, w, h = rect
    ret = img[y:y + h, x:x + w]
    return cv2.resize(ret, (200, 200))

def FillRect(grey:np.ndarray, rect:tuple) -> np.ndarray:
    retImg = grey.copy()
    cv2.rectangle(retImg, rect, np.mean(retImg), -1)
    return retImg

def Blank() -> np.ndarray:
    return np.zeros((200, 200), dtype=np.uint8)

In [3]:
# Returns a rect that contains all contours
def GetBiggestRect(contours:np.ndarray) -> tuple:
    min_x = min_y = 200
    max_x = max_y = 0
    for contour in contours:
        (x,y,w,h) = cv2.boundingRect(contour)
        min_x, max_x = min(x, min_x), max(x + w, max_x)
        min_y, max_y = min(y, min_y), max(y + h, max_y)
    max_w = max_x - min_x
    max_h = max_y - min_y
    if (max_w == 200) : 
        min_x += 1
        max_w -= 1
    if (max_h == 200) : 
        min_y += 1
        max_h -= 1
    return (min_x, min_y, max_w, max_h)

# Returns a rect of no white space image
def RectWhiteSpace(grey:np.ndarray) -> tuple:
    kernel_size = 5; white_min = 250; white_max = 255
    _, binary_mask = cv2.threshold(grey, white_min, white_max, cv2.THRESH_BINARY_INV)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
    expanded_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(expanded_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return GetBiggestRect(contours)

# Returns a rect of contours
def RectContour(grey_blur:np.ndarray) -> tuple:
    edges = cv2.Canny(grey_blur, ((2.7 * np.std(grey_blur))), min(255, 3 * np.std(grey_blur)))
    contours, hierarchies = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
    try: hierarchy = hierarchies[0]
    except: hierarchy = []
    return GetBiggestRect(contours)

In [4]:
def Cont(canny:np.ndarray) -> np.ndarray:
    contours, hierarchies = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    return contours

def ContFilter(contours:np.ndarray, threshold:int = 10) -> np.ndarray:
    ret = []
    for cnt in contours:
        std = np.std(cnt)
        if (std > threshold):
            ret.append(cnt)
        # rect = cv2.boundingRect(cnt)
        # if (rect[2] > threshold) or (rect[3] > threshold):
        #     ret.append(cnt)
    return ret


In [5]:
# GrabCut from selected rect
def GrabCut(img:np.ndarray, rect:tuple, iterations:int = 10) -> np.ndarray:
    mask = np.zeros(img.shape[:2],np.uint8)
    bgdModel = np.zeros((1,65),np.float64)
    fgdModel = np.zeros((1,65),np.float64)
    cv2.grabCut(img, mask, rect, bgdModel, fgdModel, iterations, cv2.GC_INIT_WITH_RECT)
    mask2 = np.where((mask==2)|(mask==0),0,1).astype('uint8')
    img = img*mask2[:,:,np.newaxis]
    for i in range(200):
        for j in range(200):
            if img[i][j][0] == 0 and img[i][j][1] == 0 and img[i][j][2] == 0:
                img[i][j][0] = 200; img[i][j][1] = 0; img[i][j][2] = 255
    return img

def GetNonPink(grey:np.ndarray) -> np.array:
    PINK = 99
    arr = []
    for i in grey:
        for j in i:
            if j != PINK: arr.append(j)
    return arr

def GetMiddle(grey:np.ndarray, area:int = 50) -> np.array:
    PINK = 99
    arr = []
    r = range(100 - area//2, 100 + area//2)
    for i in r:
        for j in r:
            if grey[i][j] != PINK: arr.append(grey[i][j])
    return arr

def RemoveBackground(img:np.ndarray) -> np.ndarray:
    grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    arr = GetNonPink(grey)
    mid = GetMiddle(grey)

    if len(mid) == 0:
        mid = arr.copy()

    lt = np.mean(mid) - (np.std(arr) + (1 * np.std(mid)))
    ut = np.mean(mid) + (np.std(arr) + (1 * np.std(mid)))

    for i in range(200):
        for j in range(200):
            v = grey[i][j]
            if v != 99:
                if v > ut or v < lt:
                    img[i][j] = (200, 0, 255)
    return img

**MAIN**

*ide*
- import image
- hapus whitespace
- hapus text
- itung contour make threshold sesuai std
- grabcut

*nanti*
- hapus kepala setelah whitespace

In [6]:
id = 888
path = "data/test/test/"
src = path + f"{id}.jpg"

def Process(src:str) -> np.ndarray:
    img = Init(src, grey=True, blur=True)
    rectClean = RectWhiteSpace(img)
    img = CropRect(img, rectClean)
    threshold1 = min(255, np.std(img) * 1)
    threshold2 = min(255, 2 * threshold1)
    edges = cv2.Canny(img, threshold1, threshold2)
    cnts = Cont(edges)
    filtered_cnts = ContFilter(cnts, 20)
    rect = GetBiggestRect(filtered_cnts)
    cleanImage = Init(src)
    cleanImage = CropRect(cleanImage, rectClean)
    ret = GrabCut(cleanImage, rect, 1)
    ret = RemoveBackground(ret)

    return ret

img = Process(src)
Display(img)

In [10]:
count = 0
error = []

path = "data/train/train/"
destination = ("processed_train/")

# for f in os.listdir(path):
#     count += 1
#     try:
#         src = os.path.join(path, f)
#         ret = Process(src)
#         cv2.imwrite(os.path.join(destination, f.split('.')[0] + ".jpg"), ret)
#     except:
#         error.append(f)
# print(f"test Count: {count} | Error: {len(error)}, {error}\n")

#############################################

path2 = "data/test/test/"
destination = ("processed_test/")

for f in os.listdir(path2):
    count += 1
    try:
        src = os.path.join(path2, f)
        ret = Process(src)
        cv2.imwrite(os.path.join(destination, f.split('.')[0] + ".jpg"), ret)
    except:
        error.append(f)
print(f"train Count: {count} | Error: {len(error)}, {error}\n")


train Count: 334 | Error: 0, []



In [12]:
path3 = "processed_train/"
dest = "grey_train/"
for f in os.listdir(path3):
    img = Init(path3 + f, True, False)
    cv2.imwrite(dest + f, img)