In [1]:
# Augments a set of yolov8 instance segmentation training data exported from Roboflow
# for our work extracting building outlines from scanned fire insurance plans.
# The data consists of images (640 x 640 pixels) in an 'images' folder, each of which have a corresponding
# label txt file in the 'labels' folder. Each label file contains coordinates defining
# a series of polygons to represent the building outlines.

# This script rotates, flips (mirrors), and adjusts the brightness of the images and
# creates updated annotation files for each.

import cv2
import numpy as np
import os
import random

In [2]:
def rotate_image_and_annotations(image, coords, angle):
    ''' Rotates an image as well as the coordinate data
    in its corresponding label file.
    '''
    # Get the image height and width
    h, w = image.shape[:2]
    
    # Calculate the center of the image
    # The // operator returns the largest integer that is less
    # than or equal to the result of the division
    center = (w // 2, h // 2)
    
    # Get the rotation matrix. This is used to rotate the image
    # takes the image center, an angle, and a scale ratio as inputs
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    
    # Perform the rotation on the image. If the rotation results in the creation
    # of blank areas within the image, fill these in with black.
    rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h), borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0))
    
    # Rotate the coordinates
    # Reshape the lines from the label text file such that the data is organized into
    # two columns (x and y). The -1 means that the number of rows is inferred
    # automatically based on how many coordinate pairs are found.
    coords = np.array(coords).reshape(-1, 2)
    
    # Create an array of ones that has the same number of rows as the 'coords' array.
    ones = np.ones(shape=(len(coords), 1))
    
    # combine the coordinates with the ones to create homogeneous coordinates
    # (this format allows for matrix multiplication)
    coords_homogeneous = np.hstack([coords, ones])

    # Applies the rotation matrix to the transpose of the homogeneous coordinates.
    # The coordinate data is transposed because, in matrix multiplication, the number
    # of columns in the first matrix must match the number of rows in the second.
    # The rotation matrix has 3 columns. Because we have added that column of ones,
    # transposing the coordinates means there will now be three rows.
    rotated_coords = rotation_matrix.dot(coords_homogeneous.T).T

    # Remove coordinates that now fall outside the rotated image boundaries
    if any(coord[0] < 0 or coord[0] >= w or coord[1] < 0 or coord[1] >= h for coord in rotated_coords):
        return rotated_image, None
        
    return rotated_image, rotated_coords

In [3]:
def flip_image_and_annotations(image, coords, flip_code):
    ''' flips (mirrors) the image and its annotation file
    horizontally or vertically '''
    
    flipped_image = cv2.flip(image, flip_code)
    
    # Put the coordinates in a numpy array
    coords = np.array(coords).reshape(-1, 2)
    
    if flip_code == 1:  # Horizontal flip
        # subtract the image width (image.shape[1]) from each x value
        # of the coordinates for a horizontal flip
        coords[:, 0] = image.shape[1] - coords[:, 0]
        
    elif flip_code == 0:  # Vertical flip
        # subtract the image height from each y value
        # for a vertical flip
        coords[:, 1] = image.shape[0] - coords[:, 1]
    
    return flipped_image, coords

In [4]:
def normalize_global(polygons, image_size=640):
    # Normalize all polygons based on the image size
    normalized_polygons = [(polygon / image_size).flatten() for polygon in polygons]
    return normalized_polygons

In [5]:
def adjust_brightness(image, beta):
    # Adjust brightness (beta) using cv2.convertScaleAbs
    adjusted_image = cv2.convertScaleAbs(image, alpha=1.0, beta=beta)
    return adjusted_image

In [6]:
def process_files(image_dir, label_dir, output_image_dir, output_label_dir):
    os.makedirs(output_image_dir, exist_ok=True)
    os.makedirs(output_label_dir, exist_ok=True)

    # list of possible rotation angles
    angles = list(range(-10, 0)) + list(range(1, 11)) + [-90, 90, 180, 270]
    # list of brightness adjustments
    beta_values = [-60, -30, 30, 60]
    # filename extensions for images
    image_ext = ('.jpg', '.png', '.jpeg', '.tiff', '.tif')

    # search image_dir for any image files
    for image_filename in os.listdir(image_dir):
        if image_filename.endswith(image_ext):
            image_path = os.path.join(image_dir, image_filename)
            # the annotation file has the same filename as the image, just with a '.txt' extension
            annotation_filename = image_filename.replace(os.path.splitext(image_filename)[1], ".txt")
            annotation_path = os.path.join(label_dir, annotation_filename)

            # check that the annotation file is present and not empty
            if os.path.exists(annotation_path) and os.path.getsize(annotation_path) > 0:
                image = cv2.imread(image_path)
                
                polygons = []
                # read the data from the annotation file
                with open(annotation_path, 'r') as file:
                    for line in file:
                        parts = line.strip().split()
                        # each line of the file should start with '0 ' (this is a label
                        # for the building-footprints class).
                        # turn the line of coordinate values into a list of floats.
                        # the values in the annotation file have been normalized so they're
                        # between 0.0 and 1.0, so multiply them by 640 to match the image
                        # dimensions.
                        if parts[0] == '0':
                            coords = list(map(float, parts[1:]))
                            coords = [coord * 640 for coord in coords]
                            polygons.append(coords)

                # take a random sampling of 5 values from the list of angles
                # because 24 rotations for each image seems like a bit much
                for angle in random.choices(angles, k=5):
                    rotated_image, rotated_polygons = [], []
                    for polygon in polygons:
                        rotated_image, rotated_coords = rotate_image_and_annotations(image, polygon, angle)
                        if rotated_coords is not None:
                            rotated_polygons.append(rotated_coords)
                        else:
                            continue

                    # perform the horizontal and vertical flips
                    for flip_code, flip_name in [(1, 'horizontal'), (0, 'vertical')]:
                        flipped_image, flipped_polygons = None, []
                        
                        for polygon in rotated_polygons:
                            temp_flipped_image, flipped_coords = flip_image_and_annotations(rotated_image, polygon, flip_code)

                            if flipped_coords is not None:
                                flipped_image = temp_flipped_image
                            
                                flipped_polygons.append(flipped_coords)

                        # brighten and darken the images
                        for beta in beta_values:
                            brightened_image = adjust_brightness(flipped_image, beta)
                            normalized_polygons = normalize_global(flipped_polygons)

                            
                            output_image_path = os.path.join(output_image_dir, f"{os.path.splitext(image_filename)[0]}_{angle}_{flip_name}_brightness_{beta}.png")
                            output_annotation_path = os.path.join(output_label_dir, f"{os.path.splitext(annotation_filename)[0]}_{angle}_{flip_name}_brightness_{beta}.txt")

                            
                            try:
                                # save the modified image
                                cv2.imwrite(output_image_path, brightened_image)
                            except Exception as e:
                                print('error outputting image {}: {}'.format(output_image_path, e))

                            # save the modified annotation file
                            with open(output_annotation_path, 'w') as file:
                                for polygon in normalized_polygons:
                                    coords_str = ' '.join(map(str, polygon))
                                    file.write(f"0 {coords_str}\n")


In [9]:
# Define directories
image_dir = "Fire-Insurance-14/valid/images"
label_dir = "Fire-Insurance-14/valid/labels"
output_image_dir = image_dir
output_label_dir = label_dir

# Process all files in the directories
process_files(image_dir, label_dir, output_image_dir, output_label_dir)

error outputting image Fire-Insurance-14/valid/images\912-7135g57-2br-oos_17c00004_jpg.rf.e7aeaa37456f20a20f729455921d5020_270_horizontal_brightness_-60.png: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgcodecs\src\loadsave.cpp:798: error: (-215:Assertion failed) !_img.empty() in function 'cv::imwrite'

error outputting image Fire-Insurance-14/valid/images\912-7135g57-2br-oos_17c00004_jpg.rf.e7aeaa37456f20a20f729455921d5020_270_horizontal_brightness_-30.png: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgcodecs\src\loadsave.cpp:798: error: (-215:Assertion failed) !_img.empty() in function 'cv::imwrite'

error outputting image Fire-Insurance-14/valid/images\912-7135g57-2br-oos_17c00004_jpg.rf.e7aeaa37456f20a20f729455921d5020_270_horizontal_brightness_30.png: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgcodecs\src\loadsave.cpp:798: error: (-215:Assertion failed) !_img.empty() in function 'cv::imwrite'

error outputting image Fir