### Below code is to create overlapping tiles so that more images can be generated from the limited training dataset given. This will modify the training images only (only the images with annotations, so dataset 1 and 2, or wsi images # 1,2,3,4). This is necessary because when training any model, boundary effects will be inevitable, and by creating overlapping tiles around all four quadrants of the edge of the image, the model sees the object at least one time where it is not at the edge of the image. Below are the steps to create overlapping tiles:

1. Let there be one image (size 512 by 512 in this case).
2. Find all four 256 by 256 quadrants of the image.
3. Let that quadrant be opposite of the new overlapping tile generated (opposite meaning top-left quadrant of original image will be bottom-right quadrant of new overlapping image and vice versa for all other three)
4. Repeat # 1~3 for all images.


In [1]:
import numpy as np
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
from tqdm import tqdm
import os
import pandas as pd
import cv2

In [2]:
original_image_src = r"\\fatherserverdw\Kevin\hubmap\train" #all 1622 images
original_bv_mask_src = r"\\fatherserverdw\Kevin\hubmap\unet++\masks\blood_vessel"
original_glom_mask_src = r"\\fatherserverdw\Kevin\hubmap\unet++\masks\glomerulus"
tile_meta_src = r"\\fatherserverdw\Kevin\hubmap\yolov8_v2\obj_detect_bv_glo.xlsx"
annotation_meta_src = r"\\fatherserverdw\Kevin\hubmap\annotation_meta.xlsx"

In [None]:
for wsi_idx in tqdm(range(1,5)): #for wsi # 1,2,3,4:
    tile_df = pd.read_excel(tile_meta_src)
    tmp_df = pd.read_excel(annotation_meta_src)
    supervised_tile_df = tile_df[tile_df["source_wsi"] == wsi_idx]
    supervised_tile_ids = np.unique(supervised_tile_df.id.tolist()) #list of picture ids that are in source_wsi = 1
    supervised_tile_df = supervised_tile_df.sort_values(by=['i', 'j'])
    image_src = r"\\fatherserverdw\Kevin\hubmap\train"
    bv_mask_src= r"\\fatherserverdw\Kevin\hubmap\unet++\masks\blood_vessel" #include dilated or not dilated
    glom_mask_src= r"\\fatherserverdw\Kevin\hubmap\unet++\masks\glomerulus" #include dilated or not dilated
    wsi_tiles = [os.path.join(image_src,x + ".tif") for x in supervised_tile_ids]
    wsi_bv_masks = [os.path.join(bv_mask_src,x + ".png") for x in supervised_tile_ids]
    glom_df = tmp_df[tmp_df["type"] == "glomerulus"]
    glom_df_ids = glom_df["id"].tolist()
    glom_ids = [x for x in supervised_tile_ids if x in glom_df_ids]
    # print(len(glom_ids)) #different for each wsi
    wsi_glom_masks = [os.path.join(glom_mask_src,x + ".png") for x in glom_ids]
    image_paths = wsi_tiles
    bv_mask_paths = wsi_bv_masks
    gl_mask_paths = wsi_glom_masks
    images = {}  #dictionary
    bv_masks = {}
    gl_masks = {}

    #load all the images and masks into memory
    for image_path, bv_mask_path in tqdm(zip(image_paths, bv_mask_paths),total = len(image_paths)):
        image_name = os.path.splitext(os.path.basename(image_path))[0]
        bv_mask_name = os.path.splitext(os.path.basename(bv_mask_path))[0]
        images[image_name] = cv2.imread(image_path)
        bv_masks[bv_mask_name] = cv2.imread(bv_mask_path,0)
    for gl_mask_path in tqdm(gl_mask_paths, total = len(gl_mask_paths)):
        gl_mask_name = os.path.splitext(os.path.basename(gl_mask_path))[0]
        gl_masks[gl_mask_name] = cv2.imread(gl_mask_path,0)

    #create a directory to store the overlap images and masks
    image_save_dir = r"\\fatherserverdw\Kevin\hubmap\yolov8_v5_nodilate\images"
    bv_mask_save_dir = r"\\fatherserverdw\Kevin\hubmap\yolov8_v5_nodilate\masks\blood_vessel"
    gl_mask_save_dir = r"\\fatherserverdw\Kevin\hubmap\yolov8_v5_nodilate\masks\glomerulus"
    overlap_size = 256
    supervised_tile_df = supervised_tile_df.groupby(["id"]).agg({"i":"first","j":"first"})
    supervised_tile_df = supervised_tile_df.reset_index()
    supervised_tile_df = supervised_tile_df.sort_values(by=['i', 'j'])

    #iterate over each row in the df, repeat this for each wsi # 1~4
    for index, row in tqdm(supervised_tile_df.iterrows(), total = supervised_tile_df.shape[0]):
        image_name = row['id']
        bv_mask_name = image_name
        if image_name in list(gl_masks.keys()):
            gl_mask_name = image_name
            gl_mask_exists = True
        else:
            gl_mask_exists = False
        # x,y coordinate of current image
        x = row['i']
        y = row['j']

        #returns id of all neighboring images (all of the images that exist):
        left_image_name = supervised_tile_df[(supervised_tile_df['i'] == x-512) & (supervised_tile_df['j'] == y)]['id'].values
        top_left_image_name = supervised_tile_df[(supervised_tile_df['i'] == x-512) & (supervised_tile_df['j'] == y-512)]['id'].values
        top_image_name = supervised_tile_df[(supervised_tile_df['i'] == x) & (supervised_tile_df['j'] == y-512)]['id'].values
        top_right_image_name = supervised_tile_df[(supervised_tile_df['i'] == x+512) & (supervised_tile_df['j'] == y-512)]['id'].values
        right_image_name = supervised_tile_df[(supervised_tile_df['i'] == x+512) & (supervised_tile_df['j'] == y)]['id'].values
        bot_right_image_name = supervised_tile_df[(supervised_tile_df['i'] == x+512) & (supervised_tile_df['j'] == y+512)]['id'].values
        bot_image_name = supervised_tile_df[(supervised_tile_df['i'] == x) & (supervised_tile_df['j'] == y+512)]['id'].values
        bot_left_image_name = supervised_tile_df[(supervised_tile_df['i'] == x-512) & (supervised_tile_df['j'] == y+512)]['id'].values

        #get the image and mask array for the current tile
        image = images[image_name]
        bv_mask = bv_masks[bv_mask_name]

        #save original images first:
        overlap_image_path = os.path.join(image_save_dir, f"{image_name}.tif")
        overlap_mask_path = os.path.join(bv_mask_save_dir, f"{bv_mask_name}.tif")
        cv2.imwrite(overlap_image_path, image)
        cv2.imwrite(overlap_mask_path, bv_mask)
        if gl_mask_exists:
            overlap_gl_mask_path = os.path.join(gl_mask_save_dir, f"{gl_mask_name}.tif")
            gl_mask = gl_masks[gl_mask_name]
            cv2.imwrite(overlap_gl_mask_path, gl_mask)

        image_name_list = list(images.keys())
        gl_mask_name_list = list(gl_masks.keys())
        #now check if the neighboring images exist, by checking if image names are in the entire image list for each quadrant overlap
        #for top left overlap
        if left_image_name in image_name_list and top_image_name in image_name_list and top_left_image_name in image_name_list:
            final_top_left_overlap_image = np.zeros((512, 512, 3))
            final_top_left_overlap_bv_mask = np.zeros((512, 512))
            left_image = images[left_image_name[0]]
            top_left_image = images[top_left_image_name[0]]
            top_image = images[top_image_name[0]]

            #slicing goes [ymin:ymax,xmin:xmax]
            top_left_overlap = top_left_image[256:512,256:512,:]
            top_left_bv_mask_overlap = bv_masks[top_left_image_name[0]][256:512,256:512]

            top_overlap = top_image[256:512,0:256,:]
            top_bv_mask_overlap = bv_masks[top_image_name[0]][256:512,0:256]

            left_overlap = left_image[0:256,256:512,:]
            left_bv_mask_overlap = bv_masks[left_image_name[0]][0:256,256:512]

            original_top_left_image = image[0:256,0:256,:]
            original_top_left_bv_mask = bv_mask[0:256,0:256]

            final_top_left_overlap_image[256:, 256:,:] = original_top_left_image
            final_top_left_overlap_image[256:, :256,:] = left_overlap
            final_top_left_overlap_image[:256, 256:,:] = top_overlap
            final_top_left_overlap_image[:256, :256,:] = top_left_overlap

            final_top_left_overlap_bv_mask[256:, 256:] = original_top_left_bv_mask
            final_top_left_overlap_bv_mask[256:, :256] = left_bv_mask_overlap
            final_top_left_overlap_bv_mask[:256, 256:] = top_bv_mask_overlap
            final_top_left_overlap_bv_mask[:256, :256] = top_left_bv_mask_overlap

            overlap_image_path = os.path.join(image_save_dir, f"{image_name}_top_left.tif")
            overlap_mask_path = os.path.join(bv_mask_save_dir, f"{bv_mask_name}_top_left.tif")
            cv2.imwrite(overlap_image_path, final_top_left_overlap_image.astype(np.uint8))
            cv2.imwrite(overlap_mask_path, final_top_left_overlap_bv_mask.astype(np.uint8))
            if gl_mask_exists:
                if left_image_name in gl_mask_name_list and top_image_name in gl_mask_name_list and top_left_image_name in gl_mask_name_list:
                    final_top_left_overlap_gl_mask = np.zeros((512, 512))
                    top_left_gl_mask_overlap = gl_masks[top_left_image_name[0]][256:,256:]
                    top_gl_mask_overlap = gl_masks[top_image_name[0]][256:,:256]
                    left_gl_mask_overlap = gl_masks[left_image_name[0]][:256,256:]
                    original_top_left_gl_mask = gl_mask[:256,:256]

                    final_top_left_overlap_gl_mask[256:, 256:] = original_top_left_gl_mask
                    final_top_left_overlap_gl_mask[256:, :256] = left_gl_mask_overlap
                    final_top_left_overlap_gl_mask[:256, 256:] = top_gl_mask_overlap
                    final_top_left_overlap_gl_mask[:256, :256] = top_left_gl_mask_overlap

                    overlap_gl_mask_path = os.path.join(gl_mask_save_dir, f"{gl_mask_name}_top_left.tif")
                    cv2.imwrite(overlap_gl_mask_path, final_top_left_overlap_gl_mask.astype(np.uint8))

        #for top right overlap
        if right_image_name in image_name_list and top_right_image_name in image_name_list and top_image_name in image_name_list:
            final_top_right_overlap_image = np.zeros((512, 512, 3))
            final_top_right_overlap_bv_mask = np.zeros((512, 512))
            right_image = images[right_image_name[0]]
            top_right_image = images[top_right_image_name[0]]
            top_image = images[top_image_name[0]]

            right_overlap = right_image[:256,:256,:]
            right_bv_mask_overlap = bv_masks[right_image_name[0]][:256,:256]

            top_right_overlap = top_right_image[256:,:256,:]
            top_right_bv_mask_overlap = bv_masks[top_right_image_name[0]][256:,:256]

            top_overlap = top_image[256:,256:,:]
            top_bv_mask_overlap = bv_masks[top_image_name[0]][256:,256:]

            original_top_right_image = image[:256,256:,:]
            original_top_right_bv_mask = bv_mask[:256,256:]

            final_top_right_overlap_image[256:,256:,:] = right_overlap
            final_top_right_overlap_image[256:,:256,:] = original_top_right_image
            final_top_right_overlap_image[:256, 256:,:] = top_right_overlap
            final_top_right_overlap_image[:256,:256,:] = top_overlap

            final_top_right_overlap_bv_mask[256:,256:] = right_bv_mask_overlap
            final_top_right_overlap_bv_mask[256:,:256] = original_top_right_bv_mask
            final_top_right_overlap_bv_mask[:256, 256:] = top_right_bv_mask_overlap
            final_top_right_overlap_bv_mask[:256,:256] = top_bv_mask_overlap

            overlap_image_path = os.path.join(image_save_dir, f"{image_name}_top_right.tif")
            overlap_mask_path = os.path.join(bv_mask_save_dir, f"{bv_mask_name}_top_right.tif")
            cv2.imwrite(overlap_image_path, final_top_right_overlap_image.astype(np.uint8))
            cv2.imwrite(overlap_mask_path, final_top_right_overlap_bv_mask.astype(np.uint8))
            if gl_mask_exists:
                if right_image_name in gl_mask_name_list and top_right_image_name in gl_mask_name_list and top_image_name in gl_mask_name_list:
                    final_top_right_overlap_gl_mask = np.zeros((512, 512))

                    right_gl_mask_overlap = gl_masks[right_image_name[0]][:256,:256]
                    top_right_gl_mask_overlap = gl_masks[top_right_image_name[0]][256:,:256]
                    top_gl_mask_overlap = gl_masks[top_image_name[0]][256:,256:]
                    original_top_right_gl_mask = gl_mask[:256,256:]

                    final_top_right_overlap_gl_mask[256:,256:] = right_gl_mask_overlap
                    final_top_right_overlap_gl_mask[256:,:256] = original_top_right_gl_mask
                    final_top_right_overlap_gl_mask[:256, 256:] = top_right_gl_mask_overlap
                    final_top_right_overlap_gl_mask[:256,:256] = top_gl_mask_overlap

                    overlap_gl_mask_path = os.path.join(gl_mask_save_dir, f"{gl_mask_name}_top_right.tif")
                    cv2.imwrite(overlap_gl_mask_path, final_top_right_overlap_gl_mask.astype(np.uint8))

        #for bot right overlap
        if right_image_name in image_name_list and bot_right_image_name in image_name_list and bot_image_name in image_name_list:
            final_bot_right_overlap_image = np.zeros((512, 512, 3))
            final_bot_right_overlap_bv_mask = np.zeros((512, 512))
            right_image = images[right_image_name[0]]
            bot_right_image = images[bot_right_image_name[0]]
            bot_image = images[bot_image_name[0]]

            right_overlap = right_image[256:512,0:256,:]
            right_bv_mask_overlap = bv_masks[right_image_name[0]][256:512,0:256]

            bot_right_overlap = bot_right_image[0:256,0:256,:]
            bot_right_bv_mask_overlap = bv_masks[bot_right_image_name[0]][0:256,0:256]

            bot_overlap = bot_image[0:256,256:512,:]
            bot_bv_mask_overlap = bv_masks[bot_image_name[0]][0:256,256:512]

            original_bot_right_image = image[256:512,256:512,:]
            original_bot_right_bv_mask = bv_mask[256:512,256:512]

            final_bot_right_overlap_image[256:,256:,] = bot_right_overlap
            final_bot_right_overlap_image[256:,:256,:] = bot_overlap
            final_bot_right_overlap_image[:256, 256:,:] = right_overlap
            final_bot_right_overlap_image[:256,:256,:] = original_bot_right_image

            final_bot_right_overlap_bv_mask[256:,256:] = bot_right_bv_mask_overlap
            final_bot_right_overlap_bv_mask[256:,:256] = bot_bv_mask_overlap
            final_bot_right_overlap_bv_mask[:256, 256:] = right_bv_mask_overlap
            final_bot_right_overlap_bv_mask[:256,:256] = original_bot_right_bv_mask

            overlap_image_path = os.path.join(image_save_dir, f"{image_name}_bot_right.tif")
            overlap_mask_path = os.path.join(bv_mask_save_dir, f"{bv_mask_name}_bot_right.tif")
            cv2.imwrite(overlap_image_path, final_bot_right_overlap_image.astype(np.uint8))
            cv2.imwrite(overlap_mask_path, final_bot_right_overlap_bv_mask.astype(np.uint8))

            if gl_mask_exists:
                if right_image_name in gl_mask_name_list and bot_right_image_name in gl_mask_name_list and bot_image_name in gl_mask_name_list:
                    final_bot_right_overlap_gl_mask = np.zeros((512, 512))

                    right_gl_mask_overlap = gl_masks[right_image_name[0]][256:,:256]
                    bot_right_gl_mask_overlap = gl_masks[bot_right_image_name[0]][:256,:256]
                    bot_gl_mask_overlap = gl_masks[bot_image_name[0]][:256,256:]
                    original_bot_right_gl_mask = gl_mask[256:,256:]

                    final_bot_right_overlap_gl_mask[256:,256:] = bot_right_gl_mask_overlap
                    final_bot_right_overlap_gl_mask[256:,:256] = bot_gl_mask_overlap
                    final_bot_right_overlap_gl_mask[:256, 256:] = right_gl_mask_overlap
                    final_bot_right_overlap_gl_mask[:256,:256] = original_bot_right_gl_mask

                    overlap_gl_mask_path = os.path.join(gl_mask_save_dir, f"{gl_mask_name}_bot_right.tif")
                    cv2.imwrite(overlap_gl_mask_path, final_bot_right_overlap_gl_mask.astype(np.uint8))

         #for bot left overlap:
        if left_image_name in image_name_list and bot_image_name in image_name_list and bot_left_image_name in image_name_list:
            final_bot_left_overlap_image = np.zeros((512, 512, 3))
            final_bot_left_overlap_bv_mask = np.zeros((512, 512))
            left_image = images[left_image_name[0]]
            bot_left_image = images[bot_left_image_name[0]]
            bot_image = images[bot_image_name[0]]

            bot_left_overlap = bot_left_image[:256,256:,:]
            bot_left_bv_mask_overlap = bv_masks[bot_left_image_name[0]][:256,256:]

            bot_overlap = bot_image[:256,:256,:]
            bot_bv_mask_overlap = bv_masks[bot_image_name[0]][:256,:256]

            left_overlap = left_image[256:,256:,:]
            left_bv_mask_overlap = bv_masks[left_image_name[0]][256:,256:]

            original_bot_left_image = image[256:,:256,:]
            original_bot_left_bv_mask = bv_mask[256:,:256]

            final_bot_left_overlap_image[256:,256:,:] = bot_overlap
            final_bot_left_overlap_image[256:, :256,:] = bot_left_overlap
            final_bot_left_overlap_image[:256,256:,:] = original_bot_left_image
            final_bot_left_overlap_image[:256,:256,:] = left_overlap

            final_bot_left_overlap_bv_mask[256:,256:] = bot_bv_mask_overlap
            final_bot_left_overlap_bv_mask[256:, :256] = bot_left_bv_mask_overlap
            final_bot_left_overlap_bv_mask[:256,256:] = original_bot_left_bv_mask
            final_bot_left_overlap_bv_mask[:256,:256] = left_bv_mask_overlap

            overlap_image_path = os.path.join(image_save_dir, f"{image_name}_bot_left.tif")
            overlap_mask_path = os.path.join(bv_mask_save_dir, f"{bv_mask_name}_bot_left.tif")
            cv2.imwrite(overlap_image_path, final_bot_left_overlap_image.astype(np.uint8))
            cv2.imwrite(overlap_mask_path, final_bot_left_overlap_bv_mask.astype(np.uint8))
            if gl_mask_exists:
                if left_image_name in gl_mask_name_list and bot_image_name in gl_mask_name_list and bot_left_image_name in gl_mask_name_list:
                    final_bot_left_overlap_gl_mask = np.zeros((512, 512))

                    bot_left_gl_mask_overlap = gl_masks[bot_left_image_name[0]][:256,256:]
                    bot_gl_mask_overlap = gl_masks[bot_image_name[0]][:256,:256]
                    left_gl_mask_overlap = gl_masks[left_image_name[0]][256:,256:]
                    original_bot_left_gl_mask = gl_mask[256:,:256]

                    final_bot_left_overlap_gl_mask[256:,256:] = bot_gl_mask_overlap
                    final_bot_left_overlap_gl_mask[256:, :256] = bot_left_gl_mask_overlap
                    final_bot_left_overlap_gl_mask[:256,256:] = original_bot_left_gl_mask
                    final_bot_left_overlap_gl_mask[:256,:256] = left_gl_mask_overlap

                    overlap_gl_mask_path = os.path.join(gl_mask_save_dir, f"{gl_mask_name}_bot_left.tif")
                    cv2.imwrite(overlap_gl_mask_path, final_bot_left_overlap_gl_mask.astype(np.uint8))
        else:
            print("No neighboring images found for {}, skipping to next image".format(image_name))
            continue