In [1]:
# A set of ~200 training images were manually labeled using Anylabeling (https://anylabeling.nrl.ai/docs). This tool was chosen because it supports polygons.
#
# This notebook takes large images (eg. 4000x6000) with correspodning labels and generates cropped 512x512 images and labels for each observation.
#
# Input images and labels are expected to reside in '../data/local/'.
# Resulting cropped images are saved to '../data/512/'.

In [2]:
import os
import glob
import  json
import cv2
from PIL import Image
from tqdm.auto import tqdm
import numpy as np
from shapely.geometry import Polygon, box
import shutil

In [None]:
DATASET_LABEL_PATHS = ['../data/local/*.json','../data/web/*/*.json']

SIZE = 512 # Generated images will be 512x512
DATASET_PATH_CROPPED = '../data/' + str(SIZE) + '/'
print(DATASET_PATH_CROPPED)
shutil.rmtree(DATASET_PATH_CROPPED, ignore_errors=True)
os.makedirs(DATASET_PATH_CROPPED, exist_ok=True)

In [4]:
DEBUG=False # Enable to output annotated images for manual review. Turn this off when generating final training images.

def display_cv2_image(cv2_buffer):
    rgb_buffer = cv2.cvtColor(cv2_buffer, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(rgb_buffer)
    display(img_pil)

# Get polygon center
def getCenter(points):
    x_coords = [p[0] for p in points]
    y_coords = [p[1] for p in points]
    
    x_center = sum(x_coords) / len(points)
    y_center = sum(y_coords) / len(points)
    
    return (x_center, y_center)

# Adjust center point if ROI goes out of bounds.
def recenter(centerPoint, size, h, w):
    bbox_y1 = (centerPoint[1]-size/2)
    bbox_y2 = (centerPoint[1]+size/2) 
    bbox_x1 = (centerPoint[0]-size/2)
    bbox_x2 = (centerPoint[0]+size/2)

    cpx = centerPoint[0]
    cpy = centerPoint[1]

    if bbox_y1 < 0:
        cpy = cpy - bbox_y1
    if bbox_x1 < 0:
        cpx = cpx - bbox_x1
    if bbox_y2 > h:
        cpy = cpy - (bbox_y2 - h)
    if bbox_x2 > w:
        cpx = cpx - (bbox_x2 - w)

    return([cpx, cpy])

In [None]:
files = []
for labels_path in DATASET_LABEL_PATHS:
    files = files + glob.glob(labels_path)

for file_idx, file in tqdm(enumerate(files), total=len(files)):
    data = json.load(open(file))
    image = cv2.imread(os.path.dirname(file) + "/" + data['imagePath'])
    h,w,c = np.shape(image)

    for label_idx, label in enumerate(data['shapes']):
        centerPoint = getCenter(label['points'])
        centerPoint = recenter(centerPoint, SIZE, h, w)
        roi = image.copy()[
            int(centerPoint[1]-SIZE/2):int(centerPoint[1]+SIZE/2), 
            int(centerPoint[0]-SIZE/2):int(centerPoint[0]+SIZE/2)
        ]

        # Save the cropped image
        crop_image_filename = "{}-{}.jpg".format(file_idx, label_idx)
        cv2.imwrite(DATASET_PATH_CROPPED + crop_image_filename, roi)

        # Save adjusted labels for crop, including all of the polygons that intersect this ROI
        data_copy = dict(data)
        shapes = []
        roi_box = box(0,0,SIZE, SIZE)

        for _, labelj in enumerate(data['shapes']):
            points = labelj['points']
            shifted_points = []
            p = 0
            for point in points:
                shifted_points.append([
                    point[0] - centerPoint[0]+SIZE/2,
                    point[1] - centerPoint[1]+SIZE/2
                ])

            try:
                uncropped_polygon = Polygon(shifted_points)
                uncropped_polygon = Polygon(uncropped_polygon.exterior.coords) # Fix case where polygon segments cross.
                clipped_polygon = uncropped_polygon.intersection(roi_box)
                if not clipped_polygon.is_empty:
                    clipped_points = list(clipped_polygon.exterior.coords)
                    shapes.append(clipped_points)
            except Exception as e:
                # Intersection may fail if the polygon is invalid. Okay to drop the polygon and continue.
                print(file)
                print(e)
                pass

        data_copy['shapes'] = []
        for clipped_points in shapes:
            data_copy['shapes'].append(
                {
                    "label": "streak",
                    "points": clipped_points,
                    "shape_type": "polygon"
                }
            )

        if DEBUG:
            for clipped_points in shapes:
                p = 0
                for point in clipped_points:
                    if p > 0:
                        cv2.line(roi, (int(clipped_points[p-1][0]), int(clipped_points[p-1][1])), (int(clipped_points[p][0]), int(clipped_points[p][1])), (0,255,0, 1))
                    p = p + 1
            cv2.imwrite(DATASET_PATH_CROPPED + crop_image_filename, roi)

        data_copy['imagePath'] = crop_image_filename
        data_copy['imageWidth'] = SIZE
        data_copy['imageHeight'] = SIZE

        crop_label_filename = "{}-{}.json".format(file_idx, label_idx)
        out_file = open(DATASET_PATH_CROPPED + crop_label_filename, "w")
        json.dump(data_copy, out_file, indent=4)