# Preparing the dataset
Objective: Find the affected area, crop the image or fill it to make all of them have the same size. 

In [92]:
import cv2 as cv
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os 


def save_image(name, img):
    cv.imwrite('..\datasets\ISIC2017\Training_data_processed\successfully_cutted\\{}_2.jpg'.format(name), img)

def read_image(name):
    img = cv.imread('..\datasets\ISIC2017\ISIC-2017_Training_Data\\{}.jpg'.format(name))
    return img




Obtaining all images name

In [93]:
all_files = os.listdir('..\datasets\ISIC2017\Training_data_processed\successfully_cutted\\')
all_images = []
for file in all_files:
        if file.find('superpixels') == -1 and file.find('Training_Data_metadata') == -1 and file.find('_final_2') == -1 and file.find('cut_coords.csv') == -1:
                new_img = file.split('_final')[0]
                all_images.append(new_img)

all_images


['ISIC_0000000',
 'ISIC_0000001',
 'ISIC_0000002',
 'ISIC_0000003',
 'ISIC_0000004',
 'ISIC_0000006',
 'ISIC_0000007',
 'ISIC_0000008',
 'ISIC_0000009',
 'ISIC_0000010',
 'ISIC_0000011',
 'ISIC_0000012',
 'ISIC_0000013',
 'ISIC_0000014',
 'ISIC_0000015',
 'ISIC_0000016',
 'ISIC_0000017',
 'ISIC_0000018',
 'ISIC_0000019',
 'ISIC_0000020',
 'ISIC_0000022',
 'ISIC_0000023',
 'ISIC_0000024',
 'ISIC_0000025',
 'ISIC_0000027',
 'ISIC_0000028',
 'ISIC_0000029',
 'ISIC_0000030',
 'ISIC_0000034',
 'ISIC_0000035',
 'ISIC_0000037',
 'ISIC_0000038',
 'ISIC_0000039',
 'ISIC_0000040',
 'ISIC_0000041',
 'ISIC_0000042',
 'ISIC_0000043',
 'ISIC_0000044',
 'ISIC_0000045',
 'ISIC_0000046',
 'ISIC_0000047',
 'ISIC_0000048',
 'ISIC_0000049',
 'ISIC_0000050',
 'ISIC_0000051',
 'ISIC_0000052',
 'ISIC_0000053',
 'ISIC_0000054',
 'ISIC_0000055',
 'ISIC_0000056',
 'ISIC_0000057',
 'ISIC_0000059',
 'ISIC_0000061',
 'ISIC_0000062',
 'ISIC_0000063',
 'ISIC_0000064',
 'ISIC_0000065',
 'ISIC_0000066',
 'ISIC_0000067

Obtain the threshold image for gray scale image and blue channel

In [94]:
def threshold_image (img):
    img = cv.GaussianBlur(img,(5,5),0)
    _,img = cv.threshold(img,0,255,cv.THRESH_BINARY_INV+cv.THRESH_OTSU)
    kernel = np.ones((10,10),np.uint8)
    img = cv.erode(img,kernel,iterations = 1) 
    return img

Obtain the threshold image to highlight the black corners

In [95]:
def highlight_black_corners (img):
    img = cv.GaussianBlur(img,(5,5),0)
    y, x = img.shape
    for i in range(5):
        img[i,:] = 0
        img[:,i] = 0
        img[:,x-i-1] = 0
        img[y-i-1,:] = 0
    _,img = cv.threshold(img,15,255,cv.THRESH_BINARY)
    return img

Remove black corners remaining from the lens of the camera used to take the lesion image 

In [96]:
def remove_corners(img):
    y, x = img.shape

    corners = {
        'top_left': {
            'x': [0,10],
            'y': [0,10]
        },
        'top_right': {
            'x': [0,10],
            'y': [y - 11, y-1]
        },
        'bottom_left': {
            'x': [x - 11, x-1],
            'y': [0,10]
        },
        'bottom_right': {
            'x': [x - 11, x-1],
            'y': [y - 11, y-1]
        },
    }

    for position in corners:
        for x in range(corners[position]['x'][0], corners[position]['x'][1]):
            for y in range(corners[position]['y'][0], corners[position]['y'][1]):
                if img[y,x] >= 255:
                    cv.floodFill(img, None, (x,y), 0)
    return img

Add gray and blue images

In [97]:
def add_gray_and_blue(img_1, img_2):
    img_sum = img_1 + img_2
    img_sum[img_sum > 125] = 255
    img_sum[img_sum < 125] = 0
    return img_sum
    

Remove minimal blobs from the image

In [98]:
def remove_minimal_blobs(img):
    contours, _ = cv.findContours(img, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    img_clean = np.zeros_like(img)
    for contour in contours:
        area = cv.contourArea(contour)
        if area > 5000:
            cv.fillPoly(img_clean, [contour], 255)

    kernel = np.ones((10,10),np.uint8)
    img_clean = cv.dilate(img_clean,kernel,iterations = 1)
    return img_clean

Get coordinates of rectangle around ROI

In [99]:
def get_coordinates(img, gap):
    contours, _ = cv.findContours(img,cv.RETR_TREE,cv.CHAIN_APPROX_NONE)

    if len(contours) == 0:
        return []

    y, x = img.shape
    coords = []
    
    t, b, r, l = [-1, 9999999, -1, 9999999]
    
    for points in contours:
        for p in points:
            if t < p[0][1]:
                t = p[0][1]
            if b > p[0][1]:
                b = p[0][1]
            if l > p[0][0]:
                l = p[0][0]
            if r < p[0][0]:
                r = p[0][0]

    coords.append(t + gap if t + gap < y else y)
    coords.append(b - gap if b - gap > -1 else 0)
    coords.append(r + gap if r + gap < x else x)
    coords.append(l - gap if l - gap > -1 else 0)

    return coords

Draw rectangle based on found coordinates

In [100]:
def draw_rectangle(img, coords):

    [t,b,r,l] = coords
                
    img_final = cv.rectangle(img, (l,b), (r,t), (255, 0, 0), 3)

    return img_final

Crop in coordinates     

In [101]:
def crop_image(img, coords):
    if len(coords) != 4:
        return img
    
    [t,b,r,l] = coords
    crop = img[b:t, l:r, :]

    return crop

Get maximized coordinates

In [102]:
def get_max_coords(max_coords, coords):
    if len(coords) != 4:
        return max_coords

    [t_m,b_m,r_m,l_m] = max_coords
    [t,b,r,l] = coords

    t_m = t if t_m > t else t_m
    b_m = b if b_m < b else b_m
    r_m = r if r_m < r else r_m
    l_m = l if l_m > l else l_m

    return [t_m,b_m,r_m,l_m]

In [103]:
max_coords = [-1, 9999999, -1, 9999999]
img_name = ''

data = {
    'name': [],
    'coords': []
}

try:
    for img_name in all_images:
        data['name'].append(img_name)
        img_rgb = read_image(img_name)

        img_black_corners = highlight_black_corners(cv.cvtColor(img_rgb, cv.COLOR_BGR2GRAY))
        coords_1 = get_coordinates(img_black_corners, 0)
        crop = crop_image(img_rgb, coords_1)

        img_gray = cv.cvtColor(crop, cv.COLOR_BGR2GRAY)
        img_blue = crop[:,:,0]

        img_thresh_gray = threshold_image(img_gray)
        img_thresh_blue = threshold_image(img_blue)
        
        # save_image(img_name+'_thresh_gray.jpg', img_thresh_gray)
        # save_image(img_name+'_thresh_blue.jpg', img_thresh_blue)

        img_gray_no_corners = remove_corners(img_thresh_gray)
        img_blue_no_corners = remove_corners(img_thresh_blue)

        # save_image(img_name+'_gray_no_corners.jpg', img_gray_no_corners)
        # save_image(img_name+'_blue_no_corners.jpg', img_blue_no_corners)

        img_sum = add_gray_and_blue(img_gray_no_corners, img_blue_no_corners)
        img_clean = remove_minimal_blobs(img_sum)

        coords = get_coordinates(img_clean, 100)
        
        img_final = crop_image(crop, coords)
        # print(coords, img_final.shape)
        max_coords = get_max_coords(max_coords, coords)

        data['coords'].append(' '.join(map(str,coords if len(coords) > 0 else coords_1)))

        save_image(img_name+'_final', img_final)
except Exception as e: 
    print(img_name)
    print(e)

data['name'].append('max_coords')
data['coords'].append(', '.join(map(str,max_coords)))

In [104]:
pd.DataFrame(data).drop_duplicates().to_csv('..\datasets\ISIC2017\Training_data_processed\successfully_cutted\cut_coords.csv')