In [1]:
import numpy as np
import cv2
import xml.etree.ElementTree as ET
from imgaug.augmentables.polys import Polygon, PolygonsOnImage
from imgaug.augmentables.segmaps import SegmentationMapsOnImage
from imgaug import augmenters as iaa
import imgaug as ia
from scipy import ndimage
import imageio

import sys
np.set_printoptions(threshold=sys.maxsize)

ia.seed(1)

In [2]:
def extract_coords(segs):
        # Split the coordinates into pairs of X, Y values
        coords = [int(coord) for coord in segs.split(',')]
        return [(coords[i], coords[i + 1]) for i in range(0, len(coords), 2)]

def make_text_mask_and_kernel_mask(image_path, label_path, target_size):
        # print("image_path:", image_path)
        image = cv2.imread(image_path, flags=cv2.IMREAD_COLOR)
        H, W, _ = image.shape
        scale_h = (target_size) / H
        scale_w = (target_size) / W
        
        image = cv2.resize(image, dsize=(target_size, target_size))
        
        model_out_size = target_size
        text_mask = np.zeros((model_out_size, model_out_size), dtype=np.int32)
        
        # Parse the XML label file
        tree = ET.parse(label_path)
        root = tree.getroot()
        
        all_boxes = root.findall(".//box")
        polys = []
        for idx, box in enumerate(all_boxes):
            segs = box.find("segs").text
            coords = extract_coords(segs)

            # Draw the polygon on the image
            pts = [list(coord) for coord in coords]
            if len(pts) != 0:
                polys.append(pts) 
                pts = np.array(pts, np.int32).reshape((-1, 1, 2))
                pts = [[coord[0] * scale_w, coord[1] * scale_h] for coord in coords]
            else:
                continue
            
            try:
                pts = np.array(pts, np.int32).reshape((-1, 1, 2))
                cv2.fillPoly(text_mask, [pts], color=(idx + 1), lineType=cv2.LINE_AA)
            except:
                print(f"Found mis-label polygon in the file: {image_path}. Ignoring it!")
                continue
            
        return image, text_mask, polys

In [3]:
if __name__ == "__main__":
    image_path = "../../data/train_images/0001.jpg"
    label_path = "../../data/train_labels/0001.xml"
    target_size = 640
    
    image, mask, polys = make_text_mask_and_kernel_mask(image_path=image_path, label_path=label_path, target_size=target_size)
    
    mask_labeled, _ = ndimage.label(mask)

    
    segmap = SegmentationMapsOnImage(mask_labeled, shape=mask_labeled.shape)
    
    seq = iaa.Sequential([
        iaa.Sometimes(0.5, iaa.Affine(scale=(0.9, 1.1))),
        iaa.Sometimes(0.5, iaa.Fliplr(p=1)),
        iaa.Sometimes(0.5, iaa.Affine(rotate=(0, 360))),
        iaa.Sometimes(0.5, iaa.CropToFixedSize(height=640, width=640))
    ], random_order=True)
    
    # Augment images and segmaps.
    images_aug = []
    segmaps_aug = []
    for _ in range(5):
        images_aug_i, segmaps_aug_i = seq(image=image, segmentation_maps=segmap)
        print(type(segmaps_aug_i))
        images_aug.append(images_aug_i)
        segmaps_aug.append(segmaps_aug_i)
        
    cells = []
    for image_aug, segmap_aug in zip(images_aug, segmaps_aug):
        cells.append(image)                                         # column 1
        cells.append(segmap.draw_on_image(image)[0])                # column 2
        cells.append(image_aug)                                     # column 3
        cells.append(segmap_aug.draw_on_image(image_aug)[0])        # column 4
        cells.append(segmap_aug.draw(size=image_aug.shape[:2])[0])  # column 5

    # Convert cells to a grid image and save.
    grid_image = ia.draw_grid(cells, cols=5)
    imageio.imwrite("example_segmaps.jpg", grid_image)

<class 'imgaug.augmentables.segmaps.SegmentationMapsOnImage'>
<class 'imgaug.augmentables.segmaps.SegmentationMapsOnImage'>
<class 'imgaug.augmentables.segmaps.SegmentationMapsOnImage'>
<class 'imgaug.augmentables.segmaps.SegmentationMapsOnImage'>
<class 'imgaug.augmentables.segmaps.SegmentationMapsOnImage'>
