## Crop images and masks

In [18]:
import os
import pathlib as pt
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm

#### Define helper functions

In [2]:
def get_dim_tumor(input_mask, show=False):
    """
    Get the bounding box around the tumor
    
    Args:
        input_mask(numpy array): contains the mask image
        show(bool): whether to display the rectangle on the image or not
        
    """
    
    if len(input_mask.shape) == 3:
        input_mask = cv2.cvtColor(input_mask, cv2.COLOR_BGR2GRAY)
    white_area = cv2.findNonZero(input_mask)
    x, y, w, h = cv2.boundingRect(white_area)
    if show:
        print("[INFO] Bbox:", x, y, w, h)
        display_mask = cv2.cvtColor(input_mask, cv2.COLOR_GRAY2RGB)
        cv2.rectangle(display_mask, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.imwrite("bbox.jpg", display_mask)
        plt.imshow(display_mask)
    return x, y, w, h

In [3]:
def get_center_rect(x, y, w, h ):
    """
    Given a bounding box return its center
    Args:
        x(int): x-coordinate of the top left corner
        y(int): y-coordinate of the top left corner
        w(int): width
        h(int): height
    
    Returns:
        center(float): center of the rectangle
        
    """
    return (x + w * 0.5, y + h * 0.5)

In [4]:
def get_img_orientation(input_img, show=False):
    """
    Determine the breast position in the image: left or right
    
    Args:
        input_img(numpy array): the image scan
        show(bool): whether to display the image or not
        
    """
    
    width = 150
    pos_left = 10
    pos_right = input_img.shape[1] - 10
    
    rect_left = input_img[:, pos_left:width]
    rect_right = input_img[:, (pos_right - width):pos_right]
    
    m_left = rect_left.mean()
    m_right = rect_right.mean()
    
    if show:
        plt.imshow(rect_left, cmap="gray")
        plt.show()
        plt.imshow(rect_right, cmap="gray")
        plt.show()
        print(m_left, m_right)

    orientation = None
    if m_left > m_right:
        orientation = "left"
    else:
        orientation = "right"
        
    return orientation

In [21]:
def get_patches(img, mask, img_name, patch_dim=1024, debug=False):
    """
    Compute the patch enclosing the tumor
    Args:
        img(numpy array): the input image of the breast
        mask(numpy array): the mask of the tumor
        img_name(str): image filename
        patch_dim(int): dimension of the output patch, default 1024
        debug(bool): whether to save an image with the patch and the tumor center drawn on the mask
    
    Returns:
        roi_crop(numpy array): cropped input image
        mask_crop(numpy array): cropped input mask
        
    """
    
    # get bbox of the tumor
    x, y, w, h = get_dim_tumor(mask)
    center = get_center_rect(x, y, w, h )
    
    orientation = get_img_orientation(img)
    
    # construct patch around center
    patch_tl_x = int(center[0] - patch_dim * 0.5)
    patch_tl_y = int(center[1] - patch_dim * 0.5)
    patch_br_x = patch_tl_x + patch_dim
    patch_br_y = patch_tl_y + patch_dim

    if patch_tl_x < 0:
        if orientation == 'right':
            # length in pixels of the borders at each side of the image: top, bottom, left, right 
            left_pad = abs(patch_tl_x)
            img = cv2.copyMakeBorder(img, 0, 0, left_pad, 0, cv2.BORDER_CONSTANT, value=(0, 0, 0))
            mask = cv2.copyMakeBorder(mask, 0, 0, left_pad, 0, cv2.BORDER_CONSTANT, value=(0, 0, 0))
        patch_tl_x = 0
            

    if patch_tl_y < 0:
        patch_tl_y = 0

    if patch_br_x > mask.shape[1]:
        if orientation == 'left':
            right_pad = patch_br_x - mask.shape[1]
            img = cv2.copyMakeBorder(img, 0, 0, 0, right_pad, cv2.BORDER_CONSTANT, value=(0, 0, 0))
            mask = cv2.copyMakeBorder(mask, 0, 0, 0, right_pad, cv2.BORDER_CONSTANT, value=(0, 0, 0))
        patch_br_x = mask.shape[1]

    if patch_br_y > mask.shape[0]:
        patch_br_y = mask.shape[0]

#     print("Crop rect coord", patch_tl_x, patch_tl_y, patch_br_x, patch_br_y)
    if debug == True:
        mask2 = mask.copy()
        img2 = img.copy()
        preview_folder = pt.Path.cwd()/"preview"
        if not preview_folder.exists():
            os.mkdir(preview_folder)
            
        
        cv2.circle(mask2, (int(center[0]), int(center[1])), 10, (0, 0, 255), -1)
        cv2.rectangle(mask2, (patch_tl_x, patch_tl_y), (patch_br_x, patch_br_y), (0, 0, 255), 3)
        mask_path = preview_folder/"mask_{}.jpg".format(img_name)
        cv2.imwrite(str(mask_path), mask2)
        
        cv2.circle(img2, (int(center[0]), int(center[1])), 10, (0, 0, 255), -1)
        cv2.rectangle(img2, (patch_tl_x, patch_tl_y), (patch_br_x, patch_br_y), (0, 0, 255), 3)
        image_path = preview_folder/"img_{}".format(img_name)
        cv2.imwrite(str(image_path), img2)
        print("[INFO] {}: ".format(img_name))
        print("         bbox:", x, y, w, h)
        print("         patch:", patch_tl_x, patch_tl_y, patch_br_x, patch_br_y)
        
    roi_crop = img[patch_tl_y:patch_br_y, patch_tl_x:patch_br_x]
    mask_crop = mask[patch_tl_y:patch_br_y, patch_tl_x:patch_br_x]
    
    return roi_crop, mask_crop

#### Input paths to the data and the patch dimension

In [13]:
DATA_PATH   = pt.Path(r"D:\work\BreastTumorSegmentation\dataset\original_size")
OUTPUT_PATH = pt.Path(r"D:\work\BreastTumorSegmentation\dataset\cropped_size")
IMGS_DIR    = "images"
MASKS_DIR   = "masks"
IMGS_CSV    = DATA_PATH/"images_original.csv"
MASKS_CSV   = DATA_PATH/"masks_original.csv"
PATCH_DIM   = 1024

In [7]:
imgs_df = pd.read_csv(IMGS_CSV)
masks_df = pd.read_csv(MASKS_CSV)

In [22]:
assert len(imgs_df) == len(masks_df), "The number of images should be equal with the number of masks"

nr_removed = 0

with tqdm(desc="Cropping", unit="img", total=len(imgs_df), leave=True) as pbar:
    for img_row, mask_row in zip(imgs_df.iterrows(), masks_df.iterrows()):
        img_row = img_row[1]
        mask_row = mask_row[1]

        img_filename  = pt.Path(img_row['file_path']).name
        mask_filename = pt.Path(mask_row['file_path']).name
        
        assert img_filename == mask_filename
        
        _img = cv2.imread(img_row['file_path'])
        _mask = cv2.imread(mask_row['file_path'])  
        if abs(_img.shape[0] - _mask.shape[0]) > 1 or abs(_img.shape[1] - _mask.shape[1]) > 1:
            # ignore images and masks that do not have the same size
#             print(mask_filename)
            nr_removed += 1
            continue

        roi_crop, mask_crop = get_patches(_img, _mask, img_filename, patch_dim=PATCH_DIM, debug=False)

        roi_path = OUTPUT_PATH/IMGS_DIR/img_filename
        mask_path = OUTPUT_PATH/MASKS_DIR/mask_filename        

        cv2.imwrite(str(roi_path), roi_crop)
        cv2.imwrite(str(mask_path), mask_crop)
        
        pbar.update()

Cropping:   9%|█████▊                                                              | 305/3568 [01:43<16:29,  3.30img/s]

CALC_P_00353_LEFT_CC_M_2.jpg


Cropping:   9%|█████▊                                                              | 306/3568 [01:44<19:51,  2.74img/s]

CALC_P_00353_LEFT_MLO_M_2.jpg


Cropping:  54%|████████████████████████████████████                               | 1921/3568 [10:45<09:12,  2.98img/s]

MASS_P_00059_LEFT_CC_M_1.jpg
MASS_P_00059_LEFT_MLO_M_1.jpg


Cropping:  55%|█████████████████████████████████████                              | 1973/3568 [11:04<10:29,  2.53img/s]

MASS_P_00108_LEFT_CC_B_1.jpg
MASS_P_00108_LEFT_MLO_B_1.jpg


Cropping:  56%|█████████████████████████████████████▌                             | 2002/3568 [11:14<08:45,  2.98img/s]

MASS_P_00145_LEFT_CC_B_1.jpg
MASS_P_00145_LEFT_MLO_B_1.jpg


Cropping:  59%|███████████████████████████████████████▋                           | 2115/3568 [11:53<10:10,  2.38img/s]

MASS_P_00279_LEFT_CC_B_1.jpg


Cropping:  60%|████████████████████████████████████████                           | 2132/3568 [11:58<06:27,  3.71img/s]

MASS_P_00304_LEFT_MLO_B_1.jpg


Cropping:  62%|█████████████████████████████████████████▎                         | 2202/3568 [12:22<08:48,  2.58img/s]

MASS_P_00379_LEFT_MLO_B_1.jpg
MASS_P_00381_LEFT_CC_B_1.jpg
MASS_P_00381_LEFT_MLO_B_1.jpg


Cropping:  62%|█████████████████████████████████████████▎                         | 2203/3568 [12:23<15:12,  1.50img/s]

MASS_P_00384_RIGHT_CC_B_1.jpg
MASS_P_00384_RIGHT_MLO_B_1.jpg


Cropping:  63%|██████████████████████████████████████████▎                        | 2253/3568 [12:40<06:29,  3.37img/s]

MASS_P_00423_RIGHT_CC_M_1.jpg


Cropping:  64%|██████████████████████████████████████████▋                        | 2270/3568 [12:46<06:54,  3.13img/s]

MASS_P_00436_LEFT_CC_B_1.jpg
MASS_P_00436_LEFT_MLO_B_1.jpg


Cropping:  64%|██████████████████████████████████████████▉                        | 2284/3568 [12:51<07:54,  2.71img/s]

MASS_P_00453_LEFT_CC_B_1.jpg
MASS_P_00453_LEFT_MLO_B_1.jpg


Cropping:  70%|██████████████████████████████████████████████▌                    | 2481/3568 [14:02<07:27,  2.43img/s]

MASS_P_00687_LEFT_CC_B_1.jpg
MASS_P_00687_LEFT_MLO_B_1.jpg


Cropping:  70%|██████████████████████████████████████████████▋                    | 2483/3568 [14:03<07:58,  2.27img/s]

MASS_P_00694_RIGHT_CC_B_1.jpg
MASS_P_00694_RIGHT_MLO_B_1.jpg


Cropping:  70%|██████████████████████████████████████████████▋                    | 2487/3568 [14:05<08:04,  2.23img/s]

MASS_P_00699_RIGHT_CC_B_1.jpg
MASS_P_00699_RIGHT_MLO_B_1.jpg


Cropping:  70%|██████████████████████████████████████████████▊                    | 2491/3568 [14:07<09:04,  1.98img/s]

MASS_P_00703_LEFT_CC_B_1.jpg
MASS_P_00703_LEFT_MLO_B_1.jpg


Cropping:  70%|███████████████████████████████████████████████                    | 2506/3568 [14:12<07:15,  2.44img/s]

MASS_P_00715_RIGHT_CC_B_1.jpg
MASS_P_00715_RIGHT_MLO_B_1.jpg


Cropping:  71%|███████████████████████████████████████████████▊                   | 2548/3568 [14:27<06:36,  2.58img/s]

MASS_P_00765_RIGHT_CC_B_1.jpg
MASS_P_00765_RIGHT_MLO_B_1.jpg


Cropping:  73%|████████████████████████████████████████████████▊                  | 2600/3568 [14:43<04:02,  3.99img/s]

MASS_P_00826_LEFT_CC_B_1.jpg
MASS_P_00826_LEFT_MLO_B_1.jpg


Cropping:  73%|█████████████████████████████████████████████████▏                 | 2621/3568 [14:52<06:45,  2.33img/s]

MASS_P_00859_LEFT_CC_B_1.jpg
MASS_P_00859_LEFT_MLO_B_1.jpg


Cropping:  75%|██████████████████████████████████████████████████                 | 2668/3568 [15:08<06:38,  2.26img/s]

MASS_P_00915_RIGHT_CC_M_1.jpg
MASS_P_00915_RIGHT_MLO_M_1.jpg


Cropping:  75%|██████████████████████████████████████████████████▏                | 2672/3568 [15:10<06:30,  2.30img/s]

MASS_P_00922_RIGHT_CC_B_1.jpg
MASS_P_00922_RIGHT_MLO_B_1.jpg


Cropping:  75%|██████████████████████████████████████████████████▏                | 2674/3568 [15:11<06:46,  2.20img/s]

MASS_P_00927_LEFT_MLO_B_1.jpg


Cropping:  75%|██████████████████████████████████████████████████▌                | 2690/3568 [15:16<04:16,  3.42img/s]

MASS_P_00949_LEFT_CC_B_1.jpg
MASS_P_00949_LEFT_MLO_B_1.jpg


Cropping:  78%|████████████████████████████████████████████████████               | 2774/3568 [15:47<04:31,  2.93img/s]

MASS_P_01048_RIGHT_CC_B_1.jpg
MASS_P_01048_RIGHT_MLO_B_1.jpg


Cropping:  79%|█████████████████████████████████████████████████████              | 2828/3568 [16:07<04:24,  2.79img/s]

MASS_P_01115_RIGHT_CC_B_1.jpg
MASS_P_01115_RIGHT_MLO_B_1.jpg


Cropping:  81%|██████████████████████████████████████████████████████▎            | 2893/3568 [16:29<02:58,  3.78img/s]

MASS_P_01182_LEFT_CC_B_1.jpg
MASS_P_01182_LEFT_MLO_B_1.jpg


Cropping:  83%|███████████████████████████████████████████████████████▎           | 2946/3568 [16:48<02:51,  3.62img/s]

MASS_P_01243_LEFT_CC_B_1.jpg
MASS_P_01243_LEFT_MLO_B_1.jpg


Cropping:  86%|█████████████████████████████████████████████████████████▎         | 3053/3568 [17:26<03:43,  2.30img/s]

MASS_P_01363_RIGHT_CC_B_1.jpg
MASS_P_01363_RIGHT_MLO_B_1.jpg


Cropping:  86%|█████████████████████████████████████████████████████████▌         | 3064/3568 [17:30<02:44,  3.07img/s]

MASS_P_01378_RIGHT_CC_B_1.jpg


Cropping:  87%|██████████████████████████████████████████████████████████▎        | 3103/3568 [17:44<02:38,  2.94img/s]

MASS_P_01423_RIGHT_CC_B_1.jpg
MASS_P_01423_RIGHT_MLO_B_1.jpg


Cropping:  88%|███████████████████████████████████████████████████████████        | 3143/3568 [17:58<02:02,  3.46img/s]

MASS_P_01486_RIGHT_CC_B_1.jpg
MASS_P_01486_RIGHT_MLO_B_1.jpg


Cropping:  90%|███████████████████████████████████████████████████████████▉       | 3194/3568 [18:15<02:07,  2.94img/s]

MASS_P_01551_LEFT_CC_B_1.jpg


Cropping:  91%|████████████████████████████████████████████████████████████▋      | 3230/3568 [18:28<01:34,  3.58img/s]

MASS_P_01595_LEFT_CC_B_1.jpg
MASS_P_01595_LEFT_MLO_B_1.jpg


Cropping:  93%|██████████████████████████████████████████████████████████████▌    | 3331/3568 [19:01<01:03,  3.71img/s]

MASS_P_01686_RIGHT_CC_B_1.jpg
MASS_P_01686_RIGHT_MLO_B_1.jpg


Cropping:  94%|██████████████████████████████████████████████████████████████▉    | 3352/3568 [19:10<01:34,  2.28img/s]

MASS_P_01714_RIGHT_CC_B_1.jpg
MASS_P_01714_RIGHT_MLO_B_1.jpg


Cropping:  95%|███████████████████████████████████████████████████████████████▋   | 3389/3568 [19:23<01:14,  2.42img/s]

MASS_P_01757_RIGHT_CC_B_1.jpg
MASS_P_01757_RIGHT_MLO_B_1.jpg


Cropping:  97%|████████████████████████████████████████████████████████████████▊  | 3452/3568 [19:45<00:43,  2.70img/s]

MASS_P_01831_RIGHT_CC_B_1.jpg
MASS_P_01831_RIGHT_MLO_B_1.jpg


Cropping:  98%|█████████████████████████████████████████████████████████████████▍ | 3486/3568 [19:57<00:29,  2.79img/s]

MASS_P_01908_LEFT_CC_M_1.jpg


Cropping:  98%|█████████████████████████████████████████████████████████████████▍ | 3488/3568 [19:57<00:25,  3.16img/s]

MASS_P_01946_RIGHT_MLO_M_1.jpg
MASS_P_01981_RIGHT_CC_M_1.jpg
MASS_P_01981_RIGHT_MLO_M_1.jpg
MASS_P_01983_LEFT_MLO_M_1.jpg
MASS_P_02033_RIGHT_CC_M_1.jpg
MASS_P_02033_RIGHT_MLO_M_1.jpg
MASS_P_02079_RIGHT_CC_M_1.jpg
MASS_P_02079_RIGHT_MLO_M_1.jpg
MASS_P_02092_LEFT_CC_M_1.jpg


Cropping:  98%|█████████████████████████████████████████████████████████████████▍ | 3488/3568 [20:00<00:27,  2.91img/s]

MASS_P_02092_LEFT_MLO_M_1.jpg



