# Imports

In [None]:
import os
import re
import json
import itertools
import numpy as np
import cv2

# Setup

In [None]:
DATA_DIR = '../data'
OUTPUT_DIR = '../processed'
MASK_DIR = '../masks'
TILE_DIR = '../tiles'
JSON_EXT = 'json'
IMAGE_EXT = 'JPG'
PLANT_TYPE = 'ERFA'
TILE_SIZE = 256
TILE_OVERLAP = 0.25

In [None]:
experiment_sample_names = {
    os.path.basename(root): [sample for sample, ext in map(os.path.splitext, files) if ext == f'.{JSON_EXT}']
    for root, dirs, files in os.walk(DATA_DIR)
    if root != DATA_DIR
}

# Data Processing

In [None]:
def get_sample_image(experiment_name, sample_name):
    image_name = re.split(r' \(\d\)', sample_name)[0]
    return cv2.imread(f'{DATA_DIR}/{experiment_name}/{image_name}.{IMAGE_EXT}')

def write_image(image, path):
    written = cv2.imwrite(path, image)
    if not written:
        print(f'  Failed to write: {path}')

def get_sample_data(experiment_name, sample_name):
    with open(f'{DATA_DIR}/{experiment_name}/{sample_name}.{JSON_EXT}') as f:
        return json.load(f)

def write_sample_data(output_data, output_dir, sample):
    with open(f'{output_dir}/{sample}.{JSON_EXT}', 'w') as f:
        json.dump(output_data, f)

In [None]:
def add_annotation(image, boundary, bbox, plant_id, color=(0, 0, 255), thickness=8):
    x_min, y_min, x_max, y_max = bbox
    pos = ((x_max+x_min)//2, (y_max+y_min)//2)
    cv2.polylines(image, boundary, isClosed=True, color=color, thickness=thickness)
    cv2.putText(image, str(plant_id), pos, fontFace=0, fontScale=3, color=color, thickness=thickness)

In [None]:
def get_boundary_polygon(plant_data):
    boundary_flat = plant_data['segment']
    boundary_points = np.reshape(boundary_flat, (len(boundary_flat) // 2, 2))
    return np.int32([boundary_points])

def get_bounding_box(mask):
    # Adapted from https://stackoverflow.com/questions/31400769/bounding-box-of-numpy-array
    cols = np.any(mask, axis=0)
    rows = np.any(mask, axis=1)
    x_min, x_max = np.where(cols)[0][[0, -1]]
    y_min, y_max = np.where(rows)[0][[0, -1]]
    return [int(n) for n in [x_min, y_min, x_max, y_max]]

def mask_crop_image(image, mask, bbox):
    x_min, y_min, x_max, y_max = bbox
    mask_cropped = mask[y_min:y_max, x_min:x_max]
    mask_cropped_bgr = np.repeat(mask_cropped[:, :, np.newaxis], 3, axis=2)
    masked_cropped = image[y_min:y_max, x_min:x_max,:] * (mask_cropped_bgr / 255)
    return mask_cropped, masked_cropped

In [None]:
def subdivide_region(region, size, overlap):
    x_min, y_min, x_max, y_max = region
    overlap_size = round(size * overlap)
    return [
        (max(x_min, x), max(y_min, y), min(x_max, x + size), min(y_max, y + size))
        for y in itertools.chain(range(y_min, y_max - size, size - overlap_size), [y_max - size])
        for x in itertools.chain(range(x_min, x_max - size, size - overlap_size), [x_max - size])
    ]

def write_tiles(image, bbox, output_dir, plant_name, sample_name):
    plant_dir = f'{output_dir}/{plant_name}'
    if not os.path.exists(plant_dir):
        os.makedirs(plant_dir)
    x_min, y_min, x_max, y_max = bbox
    subregions = subdivide_region(bbox, TILE_SIZE, TILE_OVERLAP)
    for i, subregion in enumerate(subregions):
        x_min_sub, y_min_sub, x_max_sub, y_max_sub = subregion
        x_min_image = x_min_sub - x_min
        y_min_image = y_min_sub - y_min
        x_max_image = x_max_sub - x_min
        y_max_image = y_max_sub - y_min
        tile = np.zeros((TILE_SIZE, TILE_SIZE, 3))
        content = image[y_min_image:y_max_image, x_min_image:x_max_image]
        content_w = x_max_image - x_min_image
        content_h = y_max_image - y_min_image
        content_x = round((TILE_SIZE - content_w) / 2)
        content_y = round((TILE_SIZE - content_h) / 2)
        tile[content_y:content_y+content_h, content_x:content_x+content_w] = content
        write_image(tile, f'{plant_dir}/{i}_{x_min_sub - content_x}_{y_min_sub - content_y}.jpg')
        write_image(tile, f'{TILE_DIR}/{sample_name}__{plant_name}_{i}_{x_min_sub - content_x}_{y_min_sub - content_y}.jpg')

In [None]:
def process_samples(experiment_name, sample_names):
    print(f'Processing experiment {experiment_name}...')
    for sample_name in sample_names:
        sample_data = get_sample_data(experiment_name, sample_name)
        if PLANT_TYPE not in sample_data['classes']:
            print(f'  No {PLANT_TYPE} in {sample_name}; skipping')
            continue
        output_dir = f'{OUTPUT_DIR}/{experiment_name}/{sample_name}'
        masks_dir = f'{output_dir}/masks'
        masked_dir = f'{output_dir}/masked'
        tiles_dir = f'{output_dir}/tiles'
        for path in output_dir, masks_dir, masked_dir, tiles_dir:
            if not os.path.exists(path):
                os.makedirs(path)
        sample_image = get_sample_image(experiment_name, sample_name)
        annotated_image = sample_image.copy()
        sample_image_dims = sample_image.shape[:2]
        output_data = {}
        for plant_id, plant_data in enumerate(sample_data['labels']):
            if plant_data['class'] == PLANT_TYPE:
                boundary = get_boundary_polygon(plant_data)
                mask = cv2.fillPoly(np.zeros(sample_image_dims), boundary, color=255)
                bbox = get_bounding_box(mask)
                x, y, _, _ = bbox
                output_data[plant_id] = {'area': np.count_nonzero(mask), 'bbox': bbox}
                mask_cropped, masked_cropped = mask_crop_image(sample_image, mask, bbox)
                plant_name = f'{plant_id}_{x}_{y}'
                write_image(mask_cropped, f'{masks_dir}/{plant_name}.png')
                write_image(mask_cropped, f'{MASK_DIR}/{sample_name}__{plant_name}.png')
                write_image(masked_cropped, f'{masked_dir}/{plant_name}.jpg')
                write_tiles(masked_cropped, bbox, tiles_dir, plant_name, sample_name)
                add_annotation(annotated_image, boundary, bbox, plant_id)
        write_sample_data(output_data, output_dir, sample_name)
        write_image(annotated_image, f'{output_dir}/{sample_name}.jpg')

In [None]:
for experiment_name, sample_names in experiment_sample_names.items():
    process_samples(experiment_name, sample_names)